Mozzi  version 2015-05-11-20:23
sound synthesis library for Arduino
 All Classes Functions Typedefs Groups
cogl_sqrti.h
1 
18 #include "mozzi_fixmath.h"
19 
20 
21 //http://www.codecodex.com/wiki/Calculate_an_integer_square_root
22 //see Integer Square Roots by Jack W. Crenshaw, figure 2, http://www.embedded.com/electronics-blogs/programmer-s-toolbox/4219659/Integer-Square-Roots
23  typedef uint8_t uint8_t;
24  typedef unsigned short int uint16;
25  typedef unsigned long int uint32;
26 
27 
28 
29  uint32 // OR uint16 OR uint8_t
30  isqrt32 (uint32 n) // OR isqrt16 ( uint16 n ) OR isqrt8 ( uint8_t n ) - respectively [ OR overloaded as isqrt (uint16_t?? n) in C++ ]
31  {
32  register uint32 // OR register uint16 OR register uint8_t - respectively
33  root, remainder, place;
34 
35  root = 0;
36  remainder = n;
37  place = 0x40000000; // OR place = 0x4000; OR place = 0x40; - respectively
38 
39  while (place > remainder)
40  place = place >> 2;
41  while (place)
42  {
43  if (remainder >= root + place)
44  {
45  remainder = remainder - root - place;
46  root = root + (place << 1);
47  }
48  root = root >> 1;
49  place = place >> 2;
50  }
51  return root;
52  }
53 
54  //http://www.codecodex.com/wiki/Calculate_an_integer_square_root
55  uint16 // OR uint16 OR uint8_t
56  isqrt16 (uint16 n) // OR isqrt16 ( uint16 n ) OR isqrt8 ( uint8_t n ) - respectively [ OR overloaded as isqrt (uint16_t?? n) in C++ ]
57  {
58  register uint16 // OR register uint16 OR register uint8_t - respectively
59  root, remainder, place;
60 
61  root = 0;
62  remainder = n;
63  place = 0x4000; // OR place = 0x4000; OR place = 0x40; - respectively
64 
65  while (place > remainder)
66  place = place >> 2;
67  while (place)
68  {
69  if (remainder >= root + place)
70  {
71  remainder = remainder - root - place;
72  root = root + (place << 1);
73  }
74  root = root >> 1;
75  place = place >> 2;
76  }
77  return root;
78  }
79 
80 
81 
82 
83 
84 /*-- isqrt -----------------------------------------------------------------*/
85 
86 unsigned long isqrt(unsigned long n) {
87  unsigned long s, t;
88 
89 #define sqrtBit(k) \
90  t = s+(1UL<<(k-1)); t <<= k+1; if (n >= t) { n -= t; s |= 1UL<<k; }
91 
92  s = 0UL;
93 #ifdef __alpha
94  if (n >= 1UL<<62) { n -= 1UL<<62; s = 1UL<<31; }
95  sqrtBit(30); sqrtBit(29); sqrtBit(28); sqrtBit(27); sqrtBit(26);
96  sqrtBit(25); sqrtBit(24); sqrtBit(23); sqrtBit(22); sqrtBit(21);
97  sqrtBit(20); sqrtBit(19); sqrtBit(18); sqrtBit(17); sqrtBit(16);
98  sqrtBit(15);
99 #else
100  if (n >= 1UL<<30) { n -= 1UL<<30; s = 1UL<<15; }
101 #endif
102  sqrtBit(14); sqrtBit(13); sqrtBit(12); sqrtBit(11); sqrtBit(10);
103  sqrtBit(9); sqrtBit(8); sqrtBit(7); sqrtBit(6); sqrtBit(5);
104  sqrtBit(4); sqrtBit(3); sqrtBit(2); sqrtBit(1);
105  if (n > s<<1) s |= 1UL;
106 
107 #undef sqrtBit
108 
109  return s;
110 } /* end isqrt */
111 
112 
132 uint16_t SquareRoot(uint32_t a_nInput)
133 {
134  uint32_t op = a_nInput;
135  uint32_t res = 0;
136  uint32_t one = 1uL << 30; // The second-to-top bit is set: use 1u << 14 for uint16_t type; use 1uL<<30 for uint32_t type
137 
138 
139  // "one" starts at the highest power of four <= than the argument.
140  while (one > op)
141  {
142  one >>= 2;
143  }
144 
145  while (one != 0)
146  {
147  if (op >= res + one)
148  {
149  op = op - (res + one);
150  res = res + 2 * one;
151  }
152  res >>= 1;
153  one >>= 2;
154  }
155  return res;
156 }
157 
158 
159 
160 
161 int cogl_sqrti (int number)
162 {
163 
164  /* This is a fixed point implementation of the Quake III sqrt algorithm,
165  * described, for example, at
166  * http://www.codemaestro.com/reviews/review00000105.html
167  *
168  * While the original QIII is extremely fast, the use of floating division
169  * and multiplication makes it perform very on arm processors without FPU.
170  *
171  * The key to successfully replacing the floating point operations with
172  * fixed point is in the choice of the fixed point format. The QIII
173  * algorithm does not calculate the square root, but its reciprocal ('y'
174  * below), which is only at the end turned to the inverse value. In order
175  * for the algorithm to produce satisfactory results, the reciprocal value
176  * must be represented with sufficient precission; the 16.16 we use
177  * elsewhere in clutter is not good enough, and 10.22 is used instead.
178  */
179  //CoglFixed x;
180  long x;
181  uint32_t y_1; /* 10.22 fixed point */
182  uint32_t f = 0x600000; /* '1.5' as 10.22 fixed */
183 
184  union
185  {
186  float f;
187  uint32_t i;
188  } flt, flt2;
189 
190  flt.f = number;
191 
192  x = Q15n0_to_Q15n16(number) / 2;
193 
194  /* The QIII initial estimate */
195  flt.i = 0x5f3759df - ( flt.i >> 1 );
196 
197  /* Now, we convert the float to 10.22 fixed. We exploit the mechanism
198  * described at http://www.d6.com/users/checker/pdfs/gdmfp.pdf.
199  *
200  * We want 22 bit fraction; a single precission float uses 23 bit
201  * mantisa, so we only need to add 2^(23-22) (no need for the 1.5
202  * multiplier as we are only dealing with positive numbers).
203  *
204  * Note: we have to use two separate variables here -- for some reason,
205  * if we try to use just the flt variable, gcc on ARM optimises the whole
206  * addition out, and it all goes pear shape, since without it, the bits
207  * in the float will not be correctly aligned.
208  */
209  flt2.f = flt.f + 2.0;
210  flt2.i &= 0x7FFFFF;
211 
212  /* Now we correct the estimate */
213  y_1 = (flt2.i >> 11) * (flt2.i >> 11);
214  y_1 = (y_1 >> 8) * (x >> 8);
215 
216  y_1 = f - y_1;
217  flt2.i = (flt2.i >> 11) * (y_1 >> 11);
218 
219  /* If the original argument is less than 342, we do another
220  * iteration to improve precission (for arguments >= 342, the single
221  * iteration produces generally better results).
222  */
223  if (x < 171)
224  {
225  y_1 = (flt2.i >> 11) * (flt2.i >> 11);
226  y_1 = (y_1 >> 8) * (x >> 8);
227 
228  y_1 = f - y_1;
229  flt2.i = (flt2.i >> 11) * (y_1 >> 11);
230  }
231 
232  /* Invert, round and convert from 10.22 to an integer
233  * 0x1e3c68 is a magical rounding constant that produces slightly
234  * better results than 0x200000.
235  */
236  return (number * flt2.i + 0x1e3c68) >> 22;
237 
238 }
Q15n16 Q15n0_to_Q15n16(Q15n0 a)
Convert Q15n0 int16_t to Q15n16 fix.