Browse Source

Updated FLAC to 1.3.3

master
Christopher Snowhill 4 weeks ago
parent
commit
04686e999f
100 changed files with 65473 additions and 2578 deletions
  1. +0
    -212
      Frameworks/FLAC/flac-1.2.1/include/share/alloc.h
  2. +0
    -149
      Frameworks/FLAC/flac-1.2.1/src/libFLAC/bitmath.c
  3. +0
    -420
      Frameworks/FLAC/flac-1.2.1/src/libFLAC/cpu.c
  4. +0
    -142
      Frameworks/FLAC/flac-1.2.1/src/libFLAC/crc.c
  5. +0
    -568
      Frameworks/FLAC/flac-1.2.1/src/libFLAC/ia32/bitreader_asm.nasm
  6. +0
    -159
      Frameworks/FLAC/flac-1.2.1/src/libFLAC/ia32/stream_encoder_asm.nasm
  7. +0
    -429
      Frameworks/FLAC/flac-1.2.1/src/libFLAC/ppc/as/lpc_asm.s
  8. +0
    -431
      Frameworks/FLAC/flac-1.2.1/src/libFLAC/ppc/gas/lpc_asm.s
  9. +21
    -4
      Frameworks/FLAC/flac-1.3.3/AUTHORS
  10. +0
    -0
      Frameworks/FLAC/flac-1.3.3/COPYING.FDL
  11. +21
    -22
      Frameworks/FLAC/flac-1.3.3/COPYING.GPL
  12. +0
    -0
      Frameworks/FLAC/flac-1.3.3/COPYING.LGPL
  13. +2
    -1
      Frameworks/FLAC/flac-1.3.3/COPYING.Xiph
  14. +249
    -0
      Frameworks/FLAC/flac-1.3.3/FLAC-vs2005.sln
  15. +278
    -0
      Frameworks/FLAC/flac-1.3.3/FLAC.sln
  16. +55
    -0
      Frameworks/FLAC/flac-1.3.3/Makefile.am
  17. +39
    -0
      Frameworks/FLAC/flac-1.3.3/Makefile.deps
  18. +911
    -0
      Frameworks/FLAC/flac-1.3.3/Makefile.in
  19. +77
    -0
      Frameworks/FLAC/flac-1.3.3/Makefile.lite
  20. +123
    -41
      Frameworks/FLAC/flac-1.3.3/README
  21. +1237
    -0
      Frameworks/FLAC/flac-1.3.3/aclocal.m4
  22. +270
    -0
      Frameworks/FLAC/flac-1.3.3/ar-lib
  23. +66
    -0
      Frameworks/FLAC/flac-1.3.3/autogen.sh
  24. +348
    -0
      Frameworks/FLAC/flac-1.3.3/compile
  25. +1480
    -0
      Frameworks/FLAC/flac-1.3.3/config.guess
  26. +251
    -0
      Frameworks/FLAC/flac-1.3.3/config.h.in
  27. +0
    -0
      Frameworks/FLAC/flac-1.3.3/config.rpath
  28. +1801
    -0
      Frameworks/FLAC/flac-1.3.3/config.sub
  29. +23951
    -0
      Frameworks/FLAC/flac-1.3.3/configure
  30. +584
    -0
      Frameworks/FLAC/flac-1.3.3/configure.ac
  31. +791
    -0
      Frameworks/FLAC/flac-1.3.3/depcomp
  32. +1220
    -0
      Frameworks/FLAC/flac-1.3.3/doc/Doxyfile.in
  33. +12312
    -0
      Frameworks/FLAC/flac-1.3.3/doc/FLAC.tag
  34. +41
    -0
      Frameworks/FLAC/flac-1.3.3/doc/Makefile.am
  35. +762
    -0
      Frameworks/FLAC/flac-1.3.3/doc/Makefile.in
  36. +29
    -0
      Frameworks/FLAC/flac-1.3.3/doc/Makefile.lite
  37. +25
    -0
      Frameworks/FLAC/flac-1.3.3/doc/doxygen.footer.html
  38. +7
    -0
      Frameworks/FLAC/flac-1.3.3/doc/doxygen.header.html
  39. +53
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/Makefile.am
  40. +775
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/Makefile.in
  41. +78
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/_09_2all_8h_source.html
  42. +97
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/_09_2export_8h.html
  43. +74
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/_09_2export_8h_source.html
  44. +161
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/_09_2metadata_8h.html
  45. +238
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/_09_2metadata_8h_source.html
  46. +80
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/all_8h_source.html
  47. +127
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/annotated.html
  48. +74
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/assert_8h_source.html
  49. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/bc_s.png
  50. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/bdwn.png
  51. +107
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/callback_8h.html
  52. +82
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/callback_8h_source.html
  53. +133
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Decoder_1_1File-members.html
  54. +1381
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Decoder_1_1File.html
  55. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Decoder_1_1File.png
  56. +125
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Decoder_1_1Stream-members.html
  57. +1223
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Decoder_1_1Stream.html
  58. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Decoder_1_1Stream.png
  59. +82
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Decoder_1_1Stream_1_1State-members.html
  60. +107
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Decoder_1_1Stream_1_1State.html
  61. +148
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Encoder_1_1File-members.html
  62. +2018
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Encoder_1_1File.html
  63. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Encoder_1_1File.png
  64. +139
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Encoder_1_1Stream-members.html
  65. +1816
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Encoder_1_1Stream.html
  66. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Encoder_1_1Stream.png
  67. +82
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Encoder_1_1Stream_1_1State-members.html
  68. +107
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Encoder_1_1Stream_1_1State.html
  69. +115
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Application-members.html
  70. +801
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Application.html
  71. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Application.png
  72. +92
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Chain-members.html
  73. +412
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Chain.html
  74. +81
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Chain_1_1Status-members.html
  75. +104
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Chain_1_1Status.html
  76. +130
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1CueSheet-members.html
  77. +1107
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1CueSheet.html
  78. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1CueSheet.png
  79. +98
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1CueSheet_1_1Track-members.html
  80. +177
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1CueSheet_1_1Track.html
  81. +91
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Iterator-members.html
  82. +318
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Iterator.html
  83. +112
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Padding-members.html
  84. +799
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Padding.html
  85. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Padding.png
  86. +127
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Picture-members.html
  87. +905
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Picture.html
  88. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Picture.png
  89. +102
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Prototype-members.html
  90. +466
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Prototype.html
  91. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1Prototype.png
  92. +123
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1SeekTable-members.html
  93. +1054
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1SeekTable.html
  94. BIN
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1SeekTable.png
  95. +96
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1SimpleIterator-members.html
  96. +465
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1SimpleIterator.html
  97. +81
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1SimpleIterator_1_1Status-members.html
  98. +104
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1SimpleIterator_1_1Status.html
  99. +128
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1StreamInfo-members.html
  100. +1127
    -0
      Frameworks/FLAC/flac-1.3.3/doc/html/api/classFLAC_1_1Metadata_1_1StreamInfo.html

+ 0
- 212
Frameworks/FLAC/flac-1.2.1/include/share/alloc.h View File

@@ -1,212 +0,0 @@
/* alloc - Convenience routines for safely allocating memory
* Copyright (C) 2007 Josh Coalson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#ifndef FLAC__SHARE__ALLOC_H
#define FLAC__SHARE__ALLOC_H

#if HAVE_CONFIG_H
# include <config.h>
#endif

/* WATCHOUT: for c++ you may have to #define __STDC_LIMIT_MACROS 1 real early
* before #including this file, otherwise SIZE_MAX might not be defined
*/

#include <limits.h> /* for SIZE_MAX */
#if !defined _MSC_VER && !defined __MINGW32__ && !defined __EMX__
#include <stdint.h> /* for SIZE_MAX in case limits.h didn't get it */
#endif
#include <stdlib.h> /* for size_t, malloc(), etc */

#ifndef SIZE_MAX
# ifndef SIZE_T_MAX
# ifdef _MSC_VER
# define SIZE_T_MAX UINT_MAX
# else
# error
# endif
# endif
# define SIZE_MAX SIZE_T_MAX
#endif

#ifndef FLaC__INLINE
#define FLaC__INLINE
#endif

/* avoid malloc()ing 0 bytes, see:
* https://www.securecoding.cert.org/confluence/display/seccode/MEM04-A.+Do+not+make+assumptions+about+the+result+of+allocating+0+bytes?focusedCommentId=5407003
*/
static FLaC__INLINE void *safe_malloc_(size_t size)
{
/* malloc(0) is undefined; FLAC src convention is to always allocate */
if(!size)
size++;
return malloc(size);
}

static FLaC__INLINE void *safe_calloc_(size_t nmemb, size_t size)
{
if(!nmemb || !size)
return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
return calloc(nmemb, size);
}

/*@@@@ there's probably a better way to prevent overflows when allocating untrusted sums but this works for now */

static FLaC__INLINE void *safe_malloc_add_2op_(size_t size1, size_t size2)
{
size2 += size1;
if(size2 < size1)
return 0;
return safe_malloc_(size2);
}

static FLaC__INLINE void *safe_malloc_add_3op_(size_t size1, size_t size2, size_t size3)
{
size2 += size1;
if(size2 < size1)
return 0;
size3 += size2;
if(size3 < size2)
return 0;
return safe_malloc_(size3);
}

static FLaC__INLINE void *safe_malloc_add_4op_(size_t size1, size_t size2, size_t size3, size_t size4)
{
size2 += size1;
if(size2 < size1)
return 0;
size3 += size2;
if(size3 < size2)
return 0;
size4 += size3;
if(size4 < size3)
return 0;
return safe_malloc_(size4);
}

static FLaC__INLINE void *safe_malloc_mul_2op_(size_t size1, size_t size2)
#if 0
needs support for cases where sizeof(size_t) != 4
{
/* could be faster #ifdef'ing off SIZEOF_SIZE_T */
if(sizeof(size_t) == 4) {
if ((double)size1 * (double)size2 < 4294967296.0)
return malloc(size1*size2);
}
return 0;
}
#else
/* better? */
{
if(!size1 || !size2)
return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
if(size1 > SIZE_MAX / size2)
return 0;
return malloc(size1*size2);
}
#endif

static FLaC__INLINE void *safe_malloc_mul_3op_(size_t size1, size_t size2, size_t size3)
{
if(!size1 || !size2 || !size3)
return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
if(size1 > SIZE_MAX / size2)
return 0;
size1 *= size2;
if(size1 > SIZE_MAX / size3)
return 0;
return malloc(size1*size3);
}

/* size1*size2 + size3 */
static FLaC__INLINE void *safe_malloc_mul2add_(size_t size1, size_t size2, size_t size3)
{
if(!size1 || !size2)
return safe_malloc_(size3);
if(size1 > SIZE_MAX / size2)
return 0;
return safe_malloc_add_2op_(size1*size2, size3);
}

/* size1 * (size2 + size3) */
static FLaC__INLINE void *safe_malloc_muladd2_(size_t size1, size_t size2, size_t size3)
{
if(!size1 || (!size2 && !size3))
return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
size2 += size3;
if(size2 < size3)
return 0;
return safe_malloc_mul_2op_(size1, size2);
}

static FLaC__INLINE void *safe_realloc_add_2op_(void *ptr, size_t size1, size_t size2)
{
size2 += size1;
if(size2 < size1)
return 0;
return realloc(ptr, size2);
}

static FLaC__INLINE void *safe_realloc_add_3op_(void *ptr, size_t size1, size_t size2, size_t size3)
{
size2 += size1;
if(size2 < size1)
return 0;
size3 += size2;
if(size3 < size2)
return 0;
return realloc(ptr, size3);
}

static FLaC__INLINE void *safe_realloc_add_4op_(void *ptr, size_t size1, size_t size2, size_t size3, size_t size4)
{
size2 += size1;
if(size2 < size1)
return 0;
size3 += size2;
if(size3 < size2)
return 0;
size4 += size3;
if(size4 < size3)
return 0;
return realloc(ptr, size4);
}

static FLaC__INLINE void *safe_realloc_mul_2op_(void *ptr, size_t size1, size_t size2)
{
if(!size1 || !size2)
return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
if(size1 > SIZE_MAX / size2)
return 0;
return realloc(ptr, size1*size2);
}

/* size1 * (size2 + size3) */
static FLaC__INLINE void *safe_realloc_muladd2_(void *ptr, size_t size1, size_t size2, size_t size3)
{
if(!size1 || (!size2 && !size3))
return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
size2 += size3;
if(size2 < size3)
return 0;
return safe_realloc_mul_2op_(ptr, size1, size2);
}

#endif

+ 0
- 149
Frameworks/FLAC/flac-1.2.1/src/libFLAC/bitmath.c View File

@@ -1,149 +0,0 @@
/* libFLAC - Free Lossless Audio Codec library
* Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Xiph.org Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#if HAVE_CONFIG_H
# include <config.h>
#endif

#include "private/bitmath.h"
#include "FLAC/FLAC_assert.h"

/* An example of what FLAC__bitmath_ilog2() computes:
*
* ilog2( 0) = assertion failure
* ilog2( 1) = 0
* ilog2( 2) = 1
* ilog2( 3) = 1
* ilog2( 4) = 2
* ilog2( 5) = 2
* ilog2( 6) = 2
* ilog2( 7) = 2
* ilog2( 8) = 3
* ilog2( 9) = 3
* ilog2(10) = 3
* ilog2(11) = 3
* ilog2(12) = 3
* ilog2(13) = 3
* ilog2(14) = 3
* ilog2(15) = 3
* ilog2(16) = 4
* ilog2(17) = 4
* ilog2(18) = 4
*/
unsigned FLAC__bitmath_ilog2(FLAC__uint32 v)
{
unsigned l = 0;
FLAC__ASSERT(v > 0);
while(v >>= 1)
l++;
return l;
}

unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
{
unsigned l = 0;
FLAC__ASSERT(v > 0);
while(v >>= 1)
l++;
return l;
}

/* An example of what FLAC__bitmath_silog2() computes:
*
* silog2(-10) = 5
* silog2(- 9) = 5
* silog2(- 8) = 4
* silog2(- 7) = 4
* silog2(- 6) = 4
* silog2(- 5) = 4
* silog2(- 4) = 3
* silog2(- 3) = 3
* silog2(- 2) = 2
* silog2(- 1) = 2
* silog2( 0) = 0
* silog2( 1) = 2
* silog2( 2) = 3
* silog2( 3) = 3
* silog2( 4) = 4
* silog2( 5) = 4
* silog2( 6) = 4
* silog2( 7) = 4
* silog2( 8) = 5
* silog2( 9) = 5
* silog2( 10) = 5
*/
unsigned FLAC__bitmath_silog2(int v)
{
while(1) {
if(v == 0) {
return 0;
}
else if(v > 0) {
unsigned l = 0;
while(v) {
l++;
v >>= 1;
}
return l+1;
}
else if(v == -1) {
return 2;
}
else {
v++;
v = -v;
}
}
}

unsigned FLAC__bitmath_silog2_wide(FLAC__int64 v)
{
while(1) {
if(v == 0) {
return 0;
}
else if(v > 0) {
unsigned l = 0;
while(v) {
l++;
v >>= 1;
}
return l+1;
}
else if(v == -1) {
return 2;
}
else {
v++;
v = -v;
}
}
}

+ 0
- 420
Frameworks/FLAC/flac-1.2.1/src/libFLAC/cpu.c View File

@@ -1,420 +0,0 @@
/* libFLAC - Free Lossless Audio Codec library
* Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Xiph.org Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#if HAVE_CONFIG_H
# include <config.h>
#endif

#include "private/cpu.h"
#include <stdlib.h>
#include <stdio.h>

#if defined FLAC__CPU_IA32
# include <signal.h>
#elif defined FLAC__CPU_PPC
# if !defined FLAC__NO_ASM
# if defined FLAC__SYS_DARWIN
# include <sys/sysctl.h>
# include <mach/mach.h>
# include <mach/mach_host.h>
# include <mach/host_info.h>
# include <mach/machine.h>
# ifndef CPU_SUBTYPE_POWERPC_970
# define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100)
# endif
# else /* FLAC__SYS_DARWIN */

# include <signal.h>
# include <setjmp.h>

static sigjmp_buf jmpbuf;
static volatile sig_atomic_t canjump = 0;

static void sigill_handler (int sig)
{
if (!canjump) {
signal (sig, SIG_DFL);
raise (sig);
}
canjump = 0;
siglongjmp (jmpbuf, 1);
}
# endif /* FLAC__SYS_DARWIN */
# endif /* FLAC__NO_ASM */
#endif /* FLAC__CPU_PPC */

#if defined (__NetBSD__) || defined(__OpenBSD__)
#include <sys/param.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#endif

#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif

#if defined(__APPLE__)
/* how to get sysctlbyname()? */
#endif

/* these are flags in EDX of CPUID AX=00000001 */
#if 0
static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000;
static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000;
/* these are flags in ECX of CPUID AX=00000001 */
static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001;
static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200;
/* these are flags in EDX of CPUID AX=80000001 */
static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000;
static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000;
static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000;
#endif


/*
* Extra stuff needed for detection of OS support for SSE on IA-32
*/
#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && defined FLAC__HAS_NASM && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS
# if defined(__linux__)
/*
* If the OS doesn't support SSE, we will get here with a SIGILL. We
* modify the return address to jump over the offending SSE instruction
* and also the operation following it that indicates the instruction
* executed successfully. In this way we use no global variables and
* stay thread-safe.
*
* 3 + 3 + 6:
* 3 bytes for "xorps xmm0,xmm0"
* 3 bytes for estimate of how long the follwing "inc var" instruction is
* 6 bytes extra in case our estimate is wrong
* 12 bytes puts us in the NOP "landing zone"
*/
# undef USE_OBSOLETE_SIGCONTEXT_FLAVOR /* #define this to use the older signal handler method */
# ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR
static void sigill_handler_sse_os(int signal, struct sigcontext sc)
{
(void)signal;
sc.eip += 3 + 3 + 6;
}
# else
# include <sys/ucontext.h>
static void sigill_handler_sse_os(int signal, siginfo_t *si, void *uc)
{
(void)signal, (void)si;
((ucontext_t*)uc)->uc_mcontext.gregs[14/*REG_EIP*/] += 3 + 3 + 6;
}
# endif
# elif defined(_MSC_VER)
# include <windows.h>
# undef USE_TRY_CATCH_FLAVOR /* #define this to use the try/catch method for catching illegal opcode exception */
# ifdef USE_TRY_CATCH_FLAVOR
# else
LONG CALLBACK sigill_handler_sse_os(EXCEPTION_POINTERS *ep)
{
if(ep->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
ep->ContextRecord->Eip += 3 + 3 + 6;
return EXCEPTION_CONTINUE_EXECUTION;
}
return EXCEPTION_CONTINUE_SEARCH;
}
# endif
# endif
#endif


void FLAC__cpu_info(FLAC__CPUInfo *info)
{
/*
* IA32-specific
*/
#ifdef FLAC__CPU_IA32
info->type = FLAC__CPUINFO_TYPE_IA32;
#if !defined FLAC__NO_ASM && defined FLAC__HAS_NASM
info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */
info->data.ia32.cpuid = FLAC__cpu_have_cpuid_asm_ia32()? true : false;
info->data.ia32.bswap = info->data.ia32.cpuid; /* CPUID => BSWAP since it came after */
info->data.ia32.cmov = false;
info->data.ia32.mmx = false;
info->data.ia32.fxsr = false;
info->data.ia32.sse = false;
info->data.ia32.sse2 = false;
info->data.ia32.sse3 = false;
info->data.ia32.ssse3 = false;
info->data.ia32._3dnow = false;
info->data.ia32.ext3dnow = false;
info->data.ia32.extmmx = false;
if(info->data.ia32.cpuid) {
/* http://www.sandpile.org/ia32/cpuid.htm */
FLAC__uint32 flags_edx, flags_ecx;
FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx);
info->data.ia32.cmov = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false;
info->data.ia32.mmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX )? true : false;
info->data.ia32.fxsr = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false;
info->data.ia32.sse = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE )? true : false;
info->data.ia32.sse2 = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false;
info->data.ia32.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
info->data.ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;

#ifdef FLAC__USE_3DNOW
flags_edx = FLAC__cpu_info_extended_amd_asm_ia32();
info->data.ia32._3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW )? true : false;
info->data.ia32.ext3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false;
info->data.ia32.extmmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX )? true : false;
#else
info->data.ia32._3dnow = info->data.ia32.ext3dnow = info->data.ia32.extmmx = false;
#endif

#ifdef DEBUG
fprintf(stderr, "CPU info (IA-32):\n");
fprintf(stderr, " CPUID ...... %c\n", info->data.ia32.cpuid ? 'Y' : 'n');
fprintf(stderr, " BSWAP ...... %c\n", info->data.ia32.bswap ? 'Y' : 'n');
fprintf(stderr, " CMOV ....... %c\n", info->data.ia32.cmov ? 'Y' : 'n');
fprintf(stderr, " MMX ........ %c\n", info->data.ia32.mmx ? 'Y' : 'n');
fprintf(stderr, " FXSR ....... %c\n", info->data.ia32.fxsr ? 'Y' : 'n');
fprintf(stderr, " SSE ........ %c\n", info->data.ia32.sse ? 'Y' : 'n');
fprintf(stderr, " SSE2 ....... %c\n", info->data.ia32.sse2 ? 'Y' : 'n');
fprintf(stderr, " SSE3 ....... %c\n", info->data.ia32.sse3 ? 'Y' : 'n');
fprintf(stderr, " SSSE3 ...... %c\n", info->data.ia32.ssse3 ? 'Y' : 'n');
fprintf(stderr, " 3DNow! ..... %c\n", info->data.ia32._3dnow ? 'Y' : 'n');
fprintf(stderr, " 3DNow!-ext . %c\n", info->data.ia32.ext3dnow? 'Y' : 'n');
fprintf(stderr, " 3DNow!-MMX . %c\n", info->data.ia32.extmmx ? 'Y' : 'n');
#endif

/*
* now have to check for OS support of SSE/SSE2
*/
if(info->data.ia32.fxsr || info->data.ia32.sse || info->data.ia32.sse2) {
#if defined FLAC__NO_SSE_OS
/* assume user knows better than us; turn it off */
info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
#elif defined FLAC__SSE_OS
/* assume user knows better than us; leave as detected above */
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
int sse = 0;
size_t len;
/* at least one of these must work: */
len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse);
len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse" , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */
if(!sse)
info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
#elif defined(__NetBSD__) || defined (__OpenBSD__)
# if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__)
int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE };
size_t len = sizeof(val);
if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
else { /* double-check SSE2 */
mib[1] = CPU_SSE2;
len = sizeof(val);
if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
}
# else
info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
# endif
#elif defined(__linux__)
int sse = 0;
struct sigaction sigill_save;
#ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR
if(0 == sigaction(SIGILL, NULL, &sigill_save) && signal(SIGILL, (void (*)(int))sigill_handler_sse_os) != SIG_ERR)
#else
struct sigaction sigill_sse;
sigill_sse.sa_sigaction = sigill_handler_sse_os;
__sigemptyset(&sigill_sse.sa_mask);
sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */
if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save))
#endif
{
/* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */
/* see sigill_handler_sse_os() for an explanation of the following: */
asm volatile (
"xorl %0,%0\n\t" /* for some reason, still need to do this to clear 'sse' var */
"xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */
"incl %0\n\t" /* SIGILL handler will jump over this */
/* landing zone */
"nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */
"nop\n\t"
"nop" /* SIGILL jump lands here if "inc" is 1 byte */
: "=r"(sse)
: "r"(sse)
);

sigaction(SIGILL, &sigill_save, NULL);
}

if(!sse)
info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
#elif defined(_MSC_VER)
# ifdef USE_TRY_CATCH_FLAVOR
_try {
__asm {
# if _MSC_VER <= 1200
/* VC6 assembler doesn't know SSE, have to emit bytecode instead */
_emit 0x0F
_emit 0x57
_emit 0xC0
# else
xorps xmm0,xmm0
# endif
}
}
_except(EXCEPTION_EXECUTE_HANDLER) {
if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
}
# else
int sse = 0;
LPTOP_LEVEL_EXCEPTION_FILTER save = SetUnhandledExceptionFilter(sigill_handler_sse_os);
/* see GCC version above for explanation */
/* http://msdn2.microsoft.com/en-us/library/4ks26t93.aspx */
/* http://www.codeproject.com/cpp/gccasm.asp */
/* http://www.hick.org/~mmiller/msvc_inline_asm.html */
__asm {
# if _MSC_VER <= 1200
/* VC6 assembler doesn't know SSE, have to emit bytecode instead */
_emit 0x0F
_emit 0x57
_emit 0xC0
# else
xorps xmm0,xmm0
# endif
inc sse
nop
nop
nop
nop
nop
nop
nop
nop
nop
}
SetUnhandledExceptionFilter(save);
if(!sse)
info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
# endif
#else
/* no way to test, disable to be safe */
info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
#endif
#ifdef DEBUG
fprintf(stderr, " SSE OS sup . %c\n", info->data.ia32.sse ? 'Y' : 'n');
#endif

}
}
#else
info->use_asm = false;
#endif

/*
* PPC-specific
*/
#elif defined FLAC__CPU_PPC
info->type = FLAC__CPUINFO_TYPE_PPC;
# if !defined FLAC__NO_ASM
info->use_asm = true;
# ifdef FLAC__USE_ALTIVEC
# if defined FLAC__SYS_DARWIN
{
int val = 0, mib[2] = { CTL_HW, HW_VECTORUNIT };
size_t len = sizeof(val);
info->data.ppc.altivec = !(sysctl(mib, 2, &val, &len, NULL, 0) || !val);
}
{
host_basic_info_data_t hostInfo;
mach_msg_type_number_t infoCount;

infoCount = HOST_BASIC_INFO_COUNT;
host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount);

info->data.ppc.ppc64 = (hostInfo.cpu_type == CPU_TYPE_POWERPC) && (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970);
}
# else /* FLAC__USE_ALTIVEC && !FLAC__SYS_DARWIN */
{
/* no Darwin, do it the brute-force way */
/* @@@@@@ this is not thread-safe; replace with SSE OS method above or remove */
info->data.ppc.altivec = 0;
info->data.ppc.ppc64 = 0;

signal (SIGILL, sigill_handler);
canjump = 0;
if (!sigsetjmp (jmpbuf, 1)) {
canjump = 1;

asm volatile (
"mtspr 256, %0\n\t"
"vand %%v0, %%v0, %%v0"
:
: "r" (-1)
);

info->data.ppc.altivec = 1;
}
canjump = 0;
if (!sigsetjmp (jmpbuf, 1)) {
int x = 0;
canjump = 1;

/* PPC64 hardware implements the cntlzd instruction */
asm volatile ("cntlzd %0, %1" : "=r" (x) : "r" (x) );

info->data.ppc.ppc64 = 1;
}
signal (SIGILL, SIG_DFL); /*@@@@@@ should save and restore old signal */
}
# endif
# else /* !FLAC__USE_ALTIVEC */
info->data.ppc.altivec = 0;
info->data.ppc.ppc64 = 0;
# endif
# else
info->use_asm = false;
# endif

/*
* unknown CPI
*/
#else
info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
info->use_asm = false;
#endif
}

+ 0
- 142
Frameworks/FLAC/flac-1.2.1/src/libFLAC/crc.c View File

@@ -1,142 +0,0 @@
/* libFLAC - Free Lossless Audio Codec library
* Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007 Josh Coalson
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Xiph.org Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#if HAVE_CONFIG_H
# include <config.h>
#endif

#include "private/crc.h"

/* CRC-8, poly = x^8 + x^2 + x^1 + x^0, init = 0 */

FLAC__byte const FLAC__crc8_table[256] = {
0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15,
0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65,
0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5,
0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85,
0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2,
0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2,
0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32,
0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42,
0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C,
0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC,
0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C,
0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C,
0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B,
0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B,
0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB,
0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB,
0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
};

/* CRC-16, poly = x^16 + x^15 + x^2 + x^0, init = 0 */

unsigned FLAC__crc16_table[256] = {
0x0000, 0x8005, 0x800f, 0x000a, 0x801b, 0x001e, 0x0014, 0x8011,
0x8033, 0x0036, 0x003c, 0x8039, 0x0028, 0x802d, 0x8027, 0x0022,
0x8063, 0x0066, 0x006c, 0x8069, 0x0078, 0x807d, 0x8077, 0x0072,
0x0050, 0x8055, 0x805f, 0x005a, 0x804b, 0x004e, 0x0044, 0x8041,
0x80c3, 0x00c6, 0x00cc, 0x80c9, 0x00d8, 0x80dd, 0x80d7, 0x00d2,
0x00f0, 0x80f5, 0x80ff, 0x00fa, 0x80eb, 0x00ee, 0x00e4, 0x80e1,
0x00a0, 0x80a5, 0x80af, 0x00aa, 0x80bb, 0x00be, 0x00b4, 0x80b1,
0x8093, 0x0096, 0x009c, 0x8099, 0x0088, 0x808d, 0x8087, 0x0082,
0x8183, 0x0186, 0x018c, 0x8189, 0x0198, 0x819d, 0x8197, 0x0192,
0x01b0, 0x81b5, 0x81bf, 0x01ba, 0x81ab, 0x01ae, 0x01a4, 0x81a1,
0x01e0, 0x81e5, 0x81ef, 0x01ea, 0x81fb, 0x01fe, 0x01f4, 0x81f1,
0x81d3, 0x01d6, 0x01dc, 0x81d9, 0x01c8, 0x81cd, 0x81c7, 0x01c2,
0x0140, 0x8145, 0x814f, 0x014a, 0x815b, 0x015e, 0x0154, 0x8151,
0x8173, 0x0176, 0x017c, 0x8179, 0x0168, 0x816d, 0x8167, 0x0162,
0x8123, 0x0126, 0x012c, 0x8129, 0x0138, 0x813d, 0x8137, 0x0132,
0x0110, 0x8115, 0x811f, 0x011a, 0x810b, 0x010e, 0x0104, 0x8101,
0x8303, 0x0306, 0x030c, 0x8309, 0x0318, 0x831d, 0x8317, 0x0312,
0x0330, 0x8335, 0x833f, 0x033a, 0x832b, 0x032e, 0x0324, 0x8321,
0x0360, 0x8365, 0x836f, 0x036a, 0x837b, 0x037e, 0x0374, 0x8371,
0x8353, 0x0356, 0x035c, 0x8359, 0x0348, 0x834d, 0x8347, 0x0342,
0x03c0, 0x83c5, 0x83cf, 0x03ca, 0x83db, 0x03de, 0x03d4, 0x83d1,
0x83f3, 0x03f6, 0x03fc, 0x83f9, 0x03e8, 0x83ed, 0x83e7, 0x03e2,
0x83a3, 0x03a6, 0x03ac, 0x83a9, 0x03b8, 0x83bd, 0x83b7, 0x03b2,
0x0390, 0x8395, 0x839f, 0x039a, 0x838b, 0x038e, 0x0384, 0x8381,
0x0280, 0x8285, 0x828f, 0x028a, 0x829b, 0x029e, 0x0294, 0x8291,
0x82b3, 0x02b6, 0x02bc, 0x82b9, 0x02a8, 0x82ad, 0x82a7, 0x02a2,
0x82e3, 0x02e6, 0x02ec, 0x82e9, 0x02f8, 0x82fd, 0x82f7, 0x02f2,
0x02d0, 0x82d5, 0x82df, 0x02da, 0x82cb, 0x02ce, 0x02c4, 0x82c1,
0x8243, 0x0246, 0x024c, 0x8249, 0x0258, 0x825d, 0x8257, 0x0252,
0x0270, 0x8275, 0x827f, 0x027a, 0x826b, 0x026e, 0x0264, 0x8261,
0x0220, 0x8225, 0x822f, 0x022a, 0x823b, 0x023e, 0x0234, 0x8231,
0x8213, 0x0216, 0x021c, 0x8219, 0x0208, 0x820d, 0x8207, 0x0202
};


void FLAC__crc8_update(const FLAC__byte data, FLAC__uint8 *crc)
{
*crc = FLAC__crc8_table[*crc ^ data];
}

void FLAC__crc8_update_block(const FLAC__byte *data, unsigned len, FLAC__uint8 *crc)
{
while(len--)
*crc = FLAC__crc8_table[*crc ^ *data++];
}

FLAC__uint8 FLAC__crc8(const FLAC__byte *data, unsigned len)
{
FLAC__uint8 crc = 0;

while(len--)
crc = FLAC__crc8_table[crc ^ *data++];

return crc;
}

unsigned FLAC__crc16(const FLAC__byte *data, unsigned len)
{
unsigned crc = 0;

while(len--)
crc = ((crc<<8) ^ FLAC__crc16_table[(crc>>8) ^ *data++]) & 0xffff;

return crc;
}

+ 0
- 568
Frameworks/FLAC/flac-1.2.1/src/libFLAC/ia32/bitreader_asm.nasm View File

@@ -1,568 +0,0 @@
; vim:filetype=nasm ts=8

; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

%include "nasm.h"

data_section

cextern FLAC__crc16_table ; unsigned FLAC__crc16_table[256];
cextern bitreader_read_from_client_ ; FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br);

cglobal FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap

code_section


; **********************************************************************
;
; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
;
; Some details like assertions and other checking is performed by the caller.
ALIGN 16
cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap

;ASSERT(0 != br);
;ASSERT(0 != br->buffer);
; WATCHOUT: code only works if sizeof(brword)==32; we can make things much faster with this assertion
;ASSERT(FLAC__BITS_PER_WORD == 32);
;ASSERT(parameter < 32);
; the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it

;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
;; [esp + 16] unsigned parameter
;; [esp + 12] unsigned nvals
;; [esp + 8] int vals[]
;; [esp + 4] FLAC__BitReader *br
mov eax, [esp + 12] ; if(nvals == 0)
test eax, eax
ja .nvals_gt_0
mov eax, 1 ; return true;
ret

.nvals_gt_0:
push ebp
push ebx
push esi
push edi
sub esp, 4
;; [esp + 36] unsigned parameter
;; [esp + 32] unsigned nvals
;; [esp + 28] int vals[]
;; [esp + 24] FLAC__BitReader *br
;; [esp] ucbits
mov ebp, [esp + 24] ; ebp <- br == br->buffer
mov esi, [ebp + 16] ; esi <- br->consumed_words (aka 'cwords' in the C version)
mov ecx, [ebp + 20] ; ecx <- br->consumed_bits (aka 'cbits' in the C version)
xor edi, edi ; edi <- 0 'uval'
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
;; [ebp] br->buffer
;; [ebp + 8] br->words
;; [ebp + 12] br->bytes
;; [ebp + 16] br->consumed_words
;; [ebp + 20] br->consumed_bits
;; [ebp + 24] br->read_crc
;; [ebp + 28] br->crc16_align

; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
mov eax, [ebp + 8] ; eax <- br->words
sub eax, esi ; eax <- br->words-cwords
shl eax, 2 ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD
add eax, [ebp + 12] ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
shl eax, 3 ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
sub eax, ecx ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
mov [esp], eax ; ucbits <- eax

ALIGN 16
.val_loop: ; while(1) {

;
; read unary part
;
.unary_loop: ; while(1) {
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
cmp esi, [ebp + 8] ; while(cwords < br->words) /* if we've not consumed up to a partial tail word... */
jae near .c1_next1
.c1_loop: ; {
mov ebx, [ebp]
mov eax, [ebx + 4*esi] ; b = br->buffer[cwords]
mov edx, eax ; edx = br->buffer[cwords] (saved for later use)
shl eax, cl ; b = br->buffer[cwords] << cbits
test eax, eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
jz near .c1_next2 ; if(b) {
bsr ebx, eax
not ebx
and ebx, 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax)
add ecx, ebx ; cbits += i;
add edi, ebx ; uval += i;
add ecx, byte 1 ; cbits++; /* skip over stop bit */
test ecx, ~31
jz near .break1 ; if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx, [ebp + 28] ; ecx <- br->crc16_align
mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
mov edi, _FLAC__crc16_table
%else
mov edi, FLAC__crc16_table
%endif
;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[]
;; ecx br->crc16_align
;; edx byteswapped brword to CRC
;; esi cwords
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx, ecx ; switch(br->crc16_align) ...
jnz .c0b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c0b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
movzx ebx, dl
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
.c0b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff))
movzx ebx, dh
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shr edx, 16
.c0b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff))
movzx ebx, dl
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
.c0b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff)
movzx ebx, dh
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
movzx eax, ax
mov [ebp + 24], eax ; br->read_crc <- crc
pop edi

add esi, byte 1 ; cwords++;
xor ecx, ecx ; cbits = 0;
; }
jmp near .break1 ; goto break1;
;; this section relocated out of the way for performance
.c0b4:
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
cmp ecx, 8
je .c0b1
shr edx, 16
cmp ecx, 16
je .c0b2
jmp .c0b3

;; this section relocated out of the way for performance
.c1b4:
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
cmp ecx, 8
je .c1b1
shr edx, 16
cmp ecx, 16
je .c1b2
jmp .c1b3

.c1_next2: ; } else {
;; ecx cbits
;; edx current brword 'b'
;; esi cwords
;; edi uval
;; ebp br
add edi, 32
sub edi, ecx ; uval += FLAC__BITS_PER_WORD - cbits;
; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx, [ebp + 28] ; ecx <- br->crc16_align
mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
mov edi, _FLAC__crc16_table
%else
mov edi, FLAC__crc16_table
%endif
;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[]
;; ecx br->crc16_align
;; edx byteswapped brword to CRC
;; esi cwords
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx, ecx ; switch(br->crc16_align) ...
jnz .c1b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c1b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
movzx ebx, dl
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
.c1b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff))
movzx ebx, dh
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shr edx, 16
.c1b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff))
movzx ebx, dl
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
.c1b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff)
movzx ebx, dh
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
movzx eax, ax
mov [ebp + 24], eax ; br->read_crc <- crc
pop edi

add esi, byte 1 ; cwords++;
xor ecx, ecx ; cbits = 0;
; /* didn't find stop bit yet, have to keep going... */
; }

cmp esi, [ebp + 8] ; } while(cwords < br->words) /* if we've not consumed up to a partial tail word... */
jb near .c1_loop

.c1_next1:
; at this point we've eaten up all the whole words; have to try
; reading through any tail bytes before calling the read callback.
; this is a repeat of the above logic adjusted for the fact we
; don't have a whole word. note though if the client is feeding
; us data a byte at a time (unlikely), br->consumed_bits may not
; be zero.
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
mov edx, [ebp + 12] ; edx <- br->bytes
test edx, edx
jz .read1 ; if(br->bytes) { [NOTE: this case is rare so it doesn't have to be all that fast ]
mov ebx, [ebp]
shl edx, 3 ; edx <- const unsigned end = br->bytes * 8;
mov eax, [ebx + 4*esi] ; b = br->buffer[cwords]
xchg edx, ecx ; [edx <- cbits , ecx <- end]
mov ebx, 0xffffffff ; ebx <- FLAC__WORD_ALL_ONES
shr ebx, cl ; ebx <- FLAC__WORD_ALL_ONES >> end
not ebx ; ebx <- ~(FLAC__WORD_ALL_ONES >> end)
xchg edx, ecx ; [edx <- end , ecx <- cbits]
and eax, ebx ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end));
shl eax, cl ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
test eax, eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
jz .c1_next3 ; if(b) {
bsr ebx, eax
not ebx
and ebx, 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax)
add ecx, ebx ; cbits += i;
add edi, ebx ; uval += i;
add ecx, byte 1 ; cbits++; /* skip over stop bit */
jmp short .break1 ; goto break1;
.c1_next3: ; } else {
sub edi, ecx
add edi, edx ; uval += end - cbits;
add ecx, edx ; cbits += end
; /* didn't find stop bit yet, have to keep going... */
; }
; }
.read1:
; flush registers and read; bitreader_read_from_client_() does
; not touch br->consumed_bits at all but we still need to set
; it in case it fails and we have to return false.
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
mov [ebp + 16], esi ; br->consumed_words = cwords;
mov [ebp + 20], ecx ; br->consumed_bits = cbits;
push ecx ; /* save */
push ebp ; /* push br argument */
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
call _bitreader_read_from_client_
%else
call bitreader_read_from_client_
%endif
pop edx ; /* discard, unused */
pop ecx ; /* restore */
mov esi, [ebp + 16] ; cwords = br->consumed_words;
; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
mov ebx, [ebp + 8] ; ebx <- br->words
sub ebx, esi ; ebx <- br->words-cwords
shl ebx, 2 ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
add ebx, [ebp + 12] ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
shl ebx, 3 ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
sub ebx, ecx ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
add ebx, edi ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval
; + uval to offset our count by the # of unary bits already
; consumed before the read, because we will add these back
; in all at once at break1
mov [esp], ebx ; ucbits <- ebx
test eax, eax ; if(!bitreader_read_from_client_(br))
jnz near .unary_loop
jmp .end ; return false; /* eax (the return value) is already 0 */
; } /* end while(1) unary part */

ALIGN 16
.break1:
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
;; [esp] ucbits
sub [esp], edi ; ucbits -= uval;
sub dword [esp], byte 1 ; ucbits--; /* account for stop bit */

;
; read binary part
;
mov ebx, [esp + 36] ; ebx <- parameter
test ebx, ebx ; if(parameter) {
jz near .break2
.read2:
cmp [esp], ebx ; while(ucbits < parameter) {
jae .c2_next1
; flush registers and read; bitreader_read_from_client_() does
; not touch br->consumed_bits at all but we still need to set
; it in case it fails and we have to return false.
mov [ebp + 16], esi ; br->consumed_words = cwords;
mov [ebp + 20], ecx ; br->consumed_bits = cbits;
push ecx ; /* save */
push ebp ; /* push br argument */
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
call _bitreader_read_from_client_
%else
call bitreader_read_from_client_
%endif
pop edx ; /* discard, unused */
pop ecx ; /* restore */
mov esi, [ebp + 16] ; cwords = br->consumed_words;
; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
mov edx, [ebp + 8] ; edx <- br->words
sub edx, esi ; edx <- br->words-cwords
shl edx, 2 ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
add edx, [ebp + 12] ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
shl edx, 3 ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
sub edx, ecx ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
mov [esp], edx ; ucbits <- edx
test eax, eax ; if(!bitreader_read_from_client_(br))
jnz .read2
jmp .end ; return false; /* eax (the return value) is already 0 */
; }
.c2_next1:
;; ebx parameter
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
;; [esp] ucbits
cmp esi, [ebp + 8] ; if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
jae near .c2_next2
test ecx, ecx ; if(cbits) {
jz near .c2_next3 ; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
mov eax, 32
mov edx, [ebp]
sub eax, ecx ; const unsigned n = FLAC__BITS_PER_WORD - cbits;
mov edx, [edx + 4*esi] ; const brword word = br->buffer[cwords];
cmp ebx, eax ; if(parameter < n) {
jae .c2_next4
; uval <<= parameter;
; uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter);
shl edx, cl
xchg ebx, ecx
shld edi, edx, cl
add ebx, ecx ; cbits += parameter;
xchg ebx, ecx ; ebx <- parameter, ecx <- cbits
jmp .break2 ; goto break2;
; }
.c2_next4:
; uval <<= n;
; uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
%if 1
rol edx, cl ; @@@@@@OPT: may be faster to use rol to save edx so we can restore it for CRC'ing
; @@@@@@OPT: or put parameter in ch instead and free up ebx completely again
%else
shl edx, cl
%endif
xchg eax, ecx
shld edi, edx, cl
xchg eax, ecx
%if 1
ror edx, cl ; restored.
%else
mov edx, [ebp]
mov edx, [edx + 4*esi]
%endif
; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers]
push ebx ; [need more registers]
push eax ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx, [ebp + 28] ; ecx <- br->crc16_align
mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
mov edi, _FLAC__crc16_table
%else
mov edi, FLAC__crc16_table
%endif
;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[]
;; ecx br->crc16_align
;; edx byteswapped brword to CRC
;; esi cwords
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx, ecx ; switch(br->crc16_align) ...
jnz .c2b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c2b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
movzx ebx, dl
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
.c2b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff))
movzx ebx, dh
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shr edx, 16
.c2b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff))
movzx ebx, dl
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
.c2b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff)
movzx ebx, dh
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
movzx eax, ax
mov [ebp + 24], eax ; br->read_crc <- crc
pop eax
pop ebx
pop edi
add esi, byte 1 ; cwords++;
mov ecx, ebx
sub ecx, eax ; cbits = parameter - n;
jz .break2 ; if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
; uval <<= cbits;
; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
mov eax, [ebp]
mov eax, [eax + 4*esi]
shld edi, eax, cl
; }
jmp .break2 ; goto break2;

;; this section relocated out of the way for performance
.c2b4:
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
cmp ecx, 8
je .c2b1
shr edx, 16
cmp ecx, 16
je .c2b2
jmp .c2b3

.c2_next3: ; } else {
mov ecx, ebx ; cbits = parameter;
; uval <<= cbits;
; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
mov eax, [ebp]
mov eax, [eax + 4*esi]
shld edi, eax, cl
jmp .break2 ; goto break2;
; }
.c2_next2: ; } else {
; in this case we're starting our read at a partial tail word;
; the reader has guaranteed that we have at least 'parameter'
; bits available to read, which makes this case simpler.
; uval <<= parameter;
; if(cbits) {
; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
; uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter);
; cbits += parameter;
; goto break2;
; } else {
; cbits = parameter;
; uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
; goto break2;
; }
; the above is much shorter in assembly:
mov eax, [ebp]
mov eax, [eax + 4*esi] ; eax <- br->buffer[cwords]
shl eax, cl ; eax <- br->buffer[cwords] << cbits
add ecx, ebx ; cbits += parameter
xchg ebx, ecx ; ebx <- cbits, ecx <- parameter
shld edi, eax, cl ; uval <<= parameter <<< 'parameter' bits of tail word
xchg ebx, ecx ; ebx <- parameter, ecx <- cbits
; }
; }
.break2:
sub [esp], ebx ; ucbits -= parameter;

;
; compose the value
;
mov ebx, [esp + 28] ; ebx <- vals
mov edx, edi ; edx <- uval
and edi, 1 ; edi <- uval & 1
shr edx, 1 ; edx <- uval >> 1
neg edi ; edi <- -(int)(uval & 1)
xor edx, edi ; edx <- (uval >> 1 ^ -(int)(uval & 1))
mov [ebx], edx ; *vals <- edx
sub dword [esp + 32], byte 1 ; --nvals;
jz .finished ; if(nvals == 0) /* jump to finish */
xor edi, edi ; uval = 0;
add dword [esp + 28], 4 ; ++vals
jmp .val_loop ; }

.finished:
mov [ebp + 16], esi ; br->consumed_words = cwords;
mov [ebp + 20], ecx ; br->consumed_bits = cbits;
mov eax, 1
.end:
add esp, 4
pop edi
pop esi
pop ebx
pop ebp
ret

end

%ifdef OBJ_FORMAT_elf
section .note.GNU-stack noalloc
%endif

+ 0
- 159
Frameworks/FLAC/flac-1.2.1/src/libFLAC/ia32/stream_encoder_asm.nasm View File

@@ -1,159 +0,0 @@
; vim:filetype=nasm ts=8

; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

%include "nasm.h"

data_section

cglobal precompute_partition_info_sums_32bit_asm_ia32_

code_section


; **********************************************************************
;
; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
; void precompute_partition_info_sums_32bit_(
; const FLAC__int32 residual[],
; FLAC__uint64 abs_residual_partition_sums[],
; unsigned blocksize,
; unsigned predictor_order,
; unsigned min_partition_order,
; unsigned max_partition_order
; )
;
ALIGN 16
cident precompute_partition_info_sums_32bit_asm_ia32_

;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
;; [esp + 4] const FLAC__int32 residual[]
;; [esp + 8] FLAC__uint64 abs_residual_partition_sums[]
;; [esp + 12] unsigned blocksize
;; [esp + 16] unsigned predictor_order
;; [esp + 20] unsigned min_partition_order
;; [esp + 24] unsigned max_partition_order
push ebp
push ebx
push esi
push edi
sub esp, 8
;; [esp + 28] const FLAC__int32 residual[]
;; [esp + 32] FLAC__uint64 abs_residual_partition_sums[]
;; [esp + 36] unsigned blocksize
;; [esp + 40] unsigned predictor_order
;; [esp + 44] unsigned min_partition_order
;; [esp + 48] unsigned max_partition_order
;; [esp] partitions
;; [esp + 4] default_partition_samples

mov ecx, [esp + 48]
mov eax, 1
shl eax, cl
mov [esp], eax ; [esp] <- partitions = 1u << max_partition_order;
mov eax, [esp + 36]
shr eax, cl
mov [esp + 4], eax ; [esp + 4] <- default_partition_samples = blocksize >> max_partition_order;

;
; first do max_partition_order
;
mov edi, [esp + 4]
sub edi, [esp + 40] ; edi <- end = (unsigned)(-(int)predictor_order) + default_partition_samples
xor esi, esi ; esi <- residual_sample = 0
xor ecx, ecx ; ecx <- partition = 0
mov ebp, [esp + 28] ; ebp <- residual[]
xor ebx, ebx ; ebx <- abs_residual_partition_sum = 0;
; note we put the updates to 'end' and 'abs_residual_partition_sum' at the end of loop0 and in the initialization above so we could align loop0 and loop1
ALIGN 16
.loop0: ; for(partition = residual_sample = 0; partition < partitions; partition++) {
.loop1: ; for( ; residual_sample < end; residual_sample++)
mov eax, [ebp + esi * 4]
cdq
xor eax, edx
sub eax, edx
add ebx, eax ; abs_residual_partition_sum += abs(residual[residual_sample]);
;@@@@@@ check overflow flag and abort here?
add esi, byte 1
cmp esi, edi ; /* since the loop will always run at least once, we can put the loop check down here */
jb .loop1
.next1:
add edi, [esp + 4] ; end += default_partition_samples;
mov eax, [esp + 32]
mov [eax + ecx * 8], ebx ; abs_residual_partition_sums[partition] = abs_residual_partition_sum;
mov [eax + ecx * 8 + 4], dword 0
xor ebx, ebx ; abs_residual_partition_sum = 0;
add ecx, byte 1
cmp ecx, [esp] ; /* since the loop will always run at least once, we can put the loop check down here */
jb .loop0
.next0: ; }
;
; now merge partitions for lower orders
;
mov esi, [esp + 32] ; esi <- abs_residual_partition_sums[from_partition==0];
mov eax, [esp]
lea edi, [esi + eax * 8] ; edi <- abs_residual_partition_sums[to_partition==partitions];
mov ecx, [esp + 48]
sub ecx, byte 1 ; ecx <- partition_order = (int)max_partition_order - 1;
ALIGN 16
.loop2: ; for(; partition_order >= (int)min_partition_order; partition_order--) {
cmp ecx, [esp + 44]
jl .next2
mov edx, 1
shl edx, cl ; const unsigned partitions = 1u << partition_order;
ALIGN 16
.loop3: ; for(i = 0; i < partitions; i++) {
mov eax, [esi]
mov ebx, [esi + 4]
add eax, [esi + 8]
adc ebx, [esi + 12]
mov [edi], eax
mov [edi + 4], ebx ; a_r_p_s[to_partition] = a_r_p_s[from_partition] + a_r_p_s[from_partition+1];
add esi, byte 16
add edi, byte 8
sub edx, byte 1
jnz .loop3 ; }
sub ecx, byte 1
jmp .loop2 ; }
.next2:

add esp, 8
pop edi
pop esi
pop ebx
pop ebp
ret

end

%ifdef OBJ_FORMAT_elf
section .note.GNU-stack noalloc
%endif

+ 0
- 429
Frameworks/FLAC/flac-1.2.1/src/libFLAC/ppc/as/lpc_asm.s View File

@@ -1,429 +0,0 @@
; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2004,2005,2006,2007 Josh Coalson
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.text
.align 2
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16

.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8

_FLAC__lpc_restore_signal_asm_ppc_altivec_16:
; r3: residual[]
; r4: data_len
; r5: qlp_coeff[]
; r6: order
; r7: lp_quantization
; r8: data[]

; see src/libFLAC/lpc.c:FLAC__lpc_restore_signal()
; these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual
; bps<=15 for mid-side coding, since that uses an extra bit)

; these should be fast; the inner loop is unrolled (it takes no more than
; 3*(order%4) instructions, all of which are arithmetic), and all of the
; coefficients and all relevant history stay in registers, so the outer loop
; has only one load from memory (the residual)

; I have not yet run this through simg4, so there may be some avoidable stalls,
; and there may be a somewhat more clever way to do the outer loop

; the branch mechanism may prevent dynamic loading; I still need to examine
; this issue, and there may be a more elegant method

stmw r31,-4(r1)

addi r9,r1,-28
li r31,0xf
andc r9,r9,r31 ; for quadword-aligned stack data

slwi r6,r6,2 ; adjust for word size
slwi r4,r4,2
add r4,r4,r8 ; r4 = data+data_len

mfspr r0,256 ; cache old vrsave
addis r31,0,hi16(0xfffffc00)
ori r31,r31,lo16(0xfffffc00)
mtspr 256,r31 ; declare VRs in vrsave

cmplw cr0,r8,r4 ; i<data_len
bc 4,0,L1400

; load coefficients into v0-v7 and initial history into v8-v15
li r31,0xf
and r31,r8,r31 ; r31: data%4
li r11,16
subf r31,r31,r11 ; r31: 4-(data%4)
slwi r31,r31,3 ; convert to bits for vsro
li r10,-4
stw r31,-4(r9)
lvewx v0,r10,r9
vspltisb v18,-1
vsro v18,v18,v0 ; v18: mask vector

li r31,0x8
lvsl v0,0,r31
vsldoi v0,v0,v0,12
li r31,0xc
lvsl v1,0,r31
vspltisb v2,0
vspltisb v3,-1
vmrglw v2,v2,v3
vsel v0,v1,v0,v2 ; v0: reversal permutation vector

add r10,r5,r6
lvsl v17,0,r5 ; v17: coefficient alignment permutation vector
vperm v17,v17,v17,v0 ; v17: reversal coefficient alignment permutation vector

mr r11,r8
lvsl v16,0,r11 ; v16: history alignment permutation vector

lvx v0,0,r5
addi r5,r5,16
lvx v1,0,r5
vperm v0,v0,v1,v17
lvx v8,0,r11
addi r11,r11,-16
lvx v9,0,r11
vperm v8,v9,v8,v16
cmplw cr0,r5,r10
bc 12,0,L1101
vand v0,v0,v18
addis r31,0,hi16(L1307)
ori r31,r31,lo16(L1307)
b L1199

L1101:
addi r5,r5,16
lvx v2,0,r5
vperm v1,v1,v2,v17
addi r11,r11,-16
lvx v10,0,r11
vperm v9,v10,v9,v16
cmplw cr0,r5,r10
bc 12,0,L1102
vand v1,v1,v18
addis r31,0,hi16(L1306)
ori r31,r31,lo16(L1306)
b L1199

L1102:
addi r5,r5,16
lvx v3,0,r5
vperm v2,v2,v3,v17
addi r11,r11,-16
lvx v11,0,r11
vperm v10,v11,v10,v16
cmplw cr0,r5,r10
bc 12,0,L1103
vand v2,v2,v18
addis r31,0,hi16(L1305)
ori r31,r31,lo16(L1305)
b L1199

L1103:
addi r5,r5,16
lvx v4,0,r5
vperm v3,v3,v4,v17
addi r11,r11,-16
lvx v12,0,r11
vperm v11,v12,v11,v16
cmplw cr0,r5,r10
bc 12,0,L1104
vand v3,v3,v18
addis r31,0,hi16(L1304)
ori r31,r31,lo16(L1304)
b L1199

L1104:
addi r5,r5,16
lvx v5,0,r5
vperm v4,v4,v5,v17
addi r11,r11,-16
lvx v13,0,r11
vperm v12,v13,v12,v16
cmplw cr0,r5,r10
bc 12,0,L1105
vand v4,v4,v18
addis r31,0,hi16(L1303)
ori r31,r31,lo16(L1303)
b L1199

L1105:
addi r5,r5,16
lvx v6,0,r5
vperm v5,v5,v6,v17
addi r11,r11,-16
lvx v14,0,r11
vperm v13,v14,v13,v16
cmplw cr0,r5,r10
bc 12,0,L1106
vand v5,v5,v18
addis r31,0,hi16(L1302)
ori r31,r31,lo16(L1302)
b L1199

L1106:
addi r5,r5,16
lvx v7,0,r5
vperm v6,v6,v7,v17
addi r11,r11,-16
lvx v15,0,r11
vperm v14,v15,v14,v16
cmplw cr0,r5,r10
bc 12,0,L1107
vand v6,v6,v18
addis r31,0,hi16(L1301)
ori r31,r31,lo16(L1301)
b L1199

L1107:
addi r5,r5,16
lvx v19,0,r5
vperm v7,v7,v19,v17
addi r11,r11,-16
lvx v19,0,r11
vperm v15,v19,v15,v16
vand v7,v7,v18
addis r31,0,hi16(L1300)
ori r31,r31,lo16(L1300)

L1199:
mtctr r31

; set up invariant vectors
vspltish v16,0 ; v16: zero vector

li r10,-12
lvsr v17,r10,r8 ; v17: result shift vector
lvsl v18,r10,r3 ; v18: residual shift back vector

li r10,-4
stw r7,-4(r9)
lvewx v19,r10,r9 ; v19: lp_quantization vector

L1200:
vmulosh v20,v0,v8 ; v20: sum vector
bcctr 20,0

L1300:
vmulosh v21,v7,v15
vsldoi v15,v15,v14,4 ; increment history
vaddsws v20,v20,v21

L1301:
vmulosh v21,v6,v14
vsldoi v14,v14,v13,4
vaddsws v20,v20,v21

L1302:
vmulosh v21,v5,v13
vsldoi v13,v13,v12,4
vaddsws v20,v20,v21

L1303:
vmulosh v21,v4,v12
vsldoi v12,v12,v11,4
vaddsws v20,v20,v21

L1304:
vmulosh v21,v3,v11
vsldoi v11,v11,v10,4
vaddsws v20,v20,v21

L1305:
vmulosh v21,v2,v10
vsldoi v10,v10,v9,4
vaddsws v20,v20,v21

L1306:
vmulosh v21,v1,v9
vsldoi v9,v9,v8,4
vaddsws v20,v20,v21

L1307:
vsumsws v20,v20,v16 ; v20[3]: sum
vsraw v20,v20,v19 ; v20[3]: sum >> lp_quantization

lvewx v21,0,r3 ; v21[n]: *residual
vperm v21,v21,v21,v18 ; v21[3]: *residual
vaddsws v20,v21,v20 ; v20[3]: *residual + (sum >> lp_quantization)
vsldoi v18,v18,v18,4 ; increment shift vector

vperm v21,v20,v20,v17 ; v21[n]: shift for storage
vsldoi v17,v17,v17,12 ; increment shift vector
stvewx v21,0,r8

vsldoi v20,v20,v20,12
vsldoi v8,v8,v20,4 ; insert value onto history

addi r3,r3,4
addi r8,r8,4
cmplw cr0,r8,r4 ; i<data_len
bc 12,0,L1200

L1400:
mtspr 256,r0 ; restore old vrsave
lmw r31,-4(r1)
blr

_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8:
; r3: residual[]
; r4: data_len
; r5: qlp_coeff[]
; r6: order
; r7: lp_quantization
; r8: data[]

; see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above
; this version assumes order<=8; it uses fewer vector registers, which should
; save time in context switches, and has less code, which may improve
; instruction caching

stmw r31,-4(r1)

addi r9,r1,-28
li r31,0xf
andc r9,r9,r31 ; for quadword-aligned stack data

slwi r6,r6,2 ; adjust for word size
slwi r4,r4,2
add r4,r4,r8 ; r4 = data+data_len

mfspr r0,256 ; cache old vrsave
addis r31,0,hi16(0xffc00000)
ori r31,r31,lo16(0xffc00000)
mtspr 256,r31 ; declare VRs in vrsave

cmplw cr0,r8,r4 ; i<data_len
bc 4,0,L2400

; load coefficients into v0-v1 and initial history into v2-v3
li r31,0xf
and r31,r8,r31 ; r31: data%4
li r11,16
subf r31,r31,r11 ; r31: 4-(data%4)
slwi r31,r31,3 ; convert to bits for vsro
li r10,-4
stw r31,-4(r9)
lvewx v0,r10,r9
vspltisb v6,-1
vsro v6,v6,v0 ; v6: mask vector

li r31,0x8
lvsl v0,0,r31
vsldoi v0,v0,v0,12
li r31,0xc
lvsl v1,0,r31
vspltisb v2,0
vspltisb v3,-1
vmrglw v2,v2,v3
vsel v0,v1,v0,v2 ; v0: reversal permutation vector

add r10,r5,r6
lvsl v5,0,r5 ; v5: coefficient alignment permutation vector
vperm v5,v5,v5,v0 ; v5: reversal coefficient alignment permutation vector

mr r11,r8
lvsl v4,0,r11 ; v4: history alignment permutation vector

lvx v0,0,r5
addi r5,r5,16
lvx v1,0,r5
vperm v0,v0,v1,v5
lvx v2,0,r11
addi r11,r11,-16
lvx v3,0,r11
vperm v2,v3,v2,v4
cmplw cr0,r5,r10
bc 12,0,L2101