Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_fft_float32.c
1/*
2 * Copyright 2013-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* license of Kiss FFT */
29/*
30Copyright (c) 2003-2010, Mark Borgerding
31
32All rights reserved.
33
34Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35
36 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41*/
42
43/*
44 * NE10 Library : dsp/NE10_fft_float32.c
45 */
46
47#include "NE10_types.h"
48#include "NE10_macros.h"
49#include "NE10_fft.h"
50
51static void ne10_mixed_radix_butterfly_float32_c (ne10_fft_cpx_float32_t * Fout,
53 ne10_int32_t * factors,
54 ne10_fft_cpx_float32_t * twiddles,
56{
57 ne10_int32_t fstride, mstride, N;
58 ne10_int32_t fstride1;
59 ne10_int32_t f_count, m_count;
60 ne10_int32_t stage_count;
61
62 ne10_fft_cpx_float32_t scratch_in[8];
63 ne10_fft_cpx_float32_t scratch_out[8];
64 ne10_fft_cpx_float32_t scratch[16];
65 ne10_fft_cpx_float32_t scratch_tw[6];
66
67 ne10_fft_cpx_float32_t *Fin1, *Fin2, *Fout1, *Fout2;
68 ne10_fft_cpx_float32_t *Fout_ls = Fout;
70 ne10_fft_cpx_float32_t *tw, *tw1, *tw2;
71 const ne10_float32_t TW_81 = 0.70710678;
72 const ne10_float32_t TW_81N = -0.70710678;
73
74 // init fstride, mstride, N, tw
75 stage_count = factors[0];
76 fstride = factors[1];
77 mstride = factors[ (stage_count << 1) - 1 ];
78 N = factors[ stage_count << 1 ]; // radix
79 tw = twiddles;
80
81 // the first stage
82 Fin1 = Fin;
83 Fout1 = Fout;
84 if (N == 2) // length of FFT is 2^n (n is odd)
85 {
86 // radix 8
87 N = fstride >> 1; // 1/4 of length of FFT
88 fstride1 = fstride >> 2;
89
90 Fin1 = Fin;
91 for (f_count = 0; f_count < fstride1; f_count ++)
92 {
93 Fout1 = & Fout[ f_count * 8 ];
94
95 scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
96 scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
97 scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
98 scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
99 scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
100 scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
101 scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
102 scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
103 scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
104 scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
105 scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
106 scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
107 scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
108 scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
109 scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
110 scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
111
112 // radix 4 butterfly without twiddles
113 scratch[0] = scratch_in[0];
114 scratch[1] = scratch_in[1];
115
116 scratch[2] = scratch_in[2];
117 scratch[3].r = (scratch_in[3].r + scratch_in[3].i) * TW_81;
118 scratch[3].i = (scratch_in[3].i - scratch_in[3].r) * TW_81;
119
120 scratch[4] = scratch_in[4];
121 scratch[5].r = scratch_in[5].i;
122 scratch[5].i = -scratch_in[5].r;
123
124 scratch[6].r = scratch_in[6].r;
125 scratch[6].i = scratch_in[6].i;
126 scratch[7].r = (scratch_in[7].r - scratch_in[7].i) * TW_81N;
127 scratch[7].i = (scratch_in[7].i + scratch_in[7].r) * TW_81N;
128
129 // radix 2 butterfly
130 scratch[8].r = scratch[0].r + scratch[4].r;
131 scratch[8].i = scratch[0].i + scratch[4].i;
132 scratch[9].r = scratch[1].r + scratch[5].r;
133 scratch[9].i = scratch[1].i + scratch[5].i;
134
135 scratch[10].r = scratch[0].r - scratch[4].r;
136 scratch[10].i = scratch[0].i - scratch[4].i;
137 scratch[11].r = scratch[1].r - scratch[5].r;
138 scratch[11].i = scratch[1].i - scratch[5].i;
139
140 // radix 2 butterfly
141 scratch[12].r = scratch[2].r + scratch[6].r;
142 scratch[12].i = scratch[2].i + scratch[6].i;
143 scratch[13].r = scratch[3].r + scratch[7].r;
144 scratch[13].i = scratch[3].i + scratch[7].i;
145
146 scratch[14].r = scratch[2].r - scratch[6].r;
147 scratch[14].i = scratch[2].i - scratch[6].i;
148 scratch[15].r = scratch[3].r - scratch[7].r;
149 scratch[15].i = scratch[3].i - scratch[7].i;
150
151 // third result
152 scratch_out[4].r = scratch[8].r - scratch[12].r;
153 scratch_out[4].i = scratch[8].i - scratch[12].i;
154 scratch_out[5].r = scratch[9].r - scratch[13].r;
155 scratch_out[5].i = scratch[9].i - scratch[13].i;
156
157 // first result
158 scratch_out[0].r = scratch[8].r + scratch[12].r;
159 scratch_out[0].i = scratch[8].i + scratch[12].i;
160 scratch_out[1].r = scratch[9].r + scratch[13].r;
161 scratch_out[1].i = scratch[9].i + scratch[13].i;
162
163 // second result
164 scratch_out[2].r = scratch[10].r + scratch[14].i;
165 scratch_out[2].i = scratch[10].i - scratch[14].r;
166 scratch_out[3].r = scratch[11].r + scratch[15].i;
167 scratch_out[3].i = scratch[11].i - scratch[15].r;
168
169 // forth result
170 scratch_out[6].r = scratch[10].r - scratch[14].i;
171 scratch_out[6].i = scratch[10].i + scratch[14].r;
172 scratch_out[7].r = scratch[11].r - scratch[15].i;
173 scratch_out[7].i = scratch[11].i + scratch[15].r;
174
175 // store
176 Fout1[0] = scratch_out[0];
177 Fout1[1] = scratch_out[1];
178 Fout1[2] = scratch_out[2];
179 Fout1[3] = scratch_out[3];
180 Fout1[4] = scratch_out[4];
181 Fout1[5] = scratch_out[5];
182 Fout1[6] = scratch_out[6];
183 Fout1[7] = scratch_out[7];
184
185 Fin1 += 1;
186 } // f_count
187 tw += 6;
188 mstride <<= 2;
189 fstride >>= 4;
190 stage_count -= 2;
191
192 // swap
193 Ftmp = buffer;
194 buffer = Fout;
195 Fout = Ftmp;
196 }
197 else if (N == 4) // length of FFT is 2^n (n is even)
198 {
199 //fstride is nfft>>2
200 for (f_count = fstride; f_count ; f_count --)
201 {
202 // load
203 scratch_in[0] = *Fin1;
204 Fin2 = Fin1 + fstride;
205 scratch_in[1] = *Fin2;
206 Fin2 = Fin2 + fstride;
207 scratch_in[2] = *Fin2;
208 Fin2 = Fin2 + fstride;
209 scratch_in[3] = *Fin2;
210
211 // radix 4 butterfly without twiddles
212
213 // radix 2 butterfly
214 scratch[0].r = scratch_in[0].r + scratch_in[2].r;
215 scratch[0].i = scratch_in[0].i + scratch_in[2].i;
216
217 scratch[1].r = scratch_in[0].r - scratch_in[2].r;
218 scratch[1].i = scratch_in[0].i - scratch_in[2].i;
219
220 // radix 2 butterfly
221 scratch[2].r = scratch_in[1].r + scratch_in[3].r;
222 scratch[2].i = scratch_in[1].i + scratch_in[3].i;
223
224 scratch[3].r = scratch_in[1].r - scratch_in[3].r;
225 scratch[3].i = scratch_in[1].i - scratch_in[3].i;
226
227 // third result
228 scratch_out[2].r = scratch[0].r - scratch[2].r;
229 scratch_out[2].i = scratch[0].i - scratch[2].i;
230
231 // first result
232 scratch_out[0].r = scratch[0].r + scratch[2].r;
233 scratch_out[0].i = scratch[0].i + scratch[2].i;
234
235 // second result
236 scratch_out[1].r = scratch[1].r + scratch[3].i;
237 scratch_out[1].i = scratch[1].i - scratch[3].r;
238
239 // forth result
240 scratch_out[3].r = scratch[1].r - scratch[3].i;
241 scratch_out[3].i = scratch[1].i + scratch[3].r;
242
243 // store
244 * Fout1 ++ = scratch_out[0];
245 * Fout1 ++ = scratch_out[1];
246 * Fout1 ++ = scratch_out[2];
247 * Fout1 ++ = scratch_out[3];
248
249 Fin1++;
250 } // f_count
251
252 N = fstride; // 1/4 of length of FFT
253
254 // update address for other stages
255 stage_count--;
256 fstride >>= 2;
257
258 // swap
259 Ftmp = buffer;
260 buffer = Fout;
261 Fout = Ftmp;
262 // end of first stage
263 }
264
265
266 // others but the last one
267 for (; stage_count > 1 ; stage_count--)
268 {
269 Fin1 = buffer;
270 for (f_count = 0; f_count < fstride; f_count ++)
271 {
272 Fout1 = & Fout[ f_count * mstride << 2 ];
273 tw1 = tw;
274 for (m_count = mstride; m_count ; m_count --)
275 {
276 // load
277 scratch_tw[0] = *tw1;
278 tw2 = tw1 + mstride;
279 scratch_tw[1] = *tw2;
280 tw2 += mstride;
281 scratch_tw[2] = *tw2;
282 scratch_in[0] = * Fin1;
283 Fin2 = Fin1 + N;
284 scratch_in[1] = * Fin2;
285 Fin2 += N;
286 scratch_in[2] = * Fin2;
287 Fin2 += N;
288 scratch_in[3] = * Fin2;
289
290 // radix 4 butterfly with twiddles
291
292 scratch[0] = scratch_in[0];
293 scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
294 scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
295
296 scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
297 scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
298
299 scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
300 scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
301
302 // radix 2 butterfly
303 scratch[4].r = scratch[0].r + scratch[2].r;
304 scratch[4].i = scratch[0].i + scratch[2].i;
305
306 scratch[5].r = scratch[0].r - scratch[2].r;
307 scratch[5].i = scratch[0].i - scratch[2].i;
308
309 // radix 2 butterfly
310 scratch[6].r = scratch[1].r + scratch[3].r;
311 scratch[6].i = scratch[1].i + scratch[3].i;
312
313 scratch[7].r = scratch[1].r - scratch[3].r;
314 scratch[7].i = scratch[1].i - scratch[3].i;
315
316 // third result
317 scratch_out[2].r = scratch[4].r - scratch[6].r;
318 scratch_out[2].i = scratch[4].i - scratch[6].i;
319
320 // first result
321 scratch_out[0].r = scratch[4].r + scratch[6].r;
322 scratch_out[0].i = scratch[4].i + scratch[6].i;
323
324 // second result
325 scratch_out[1].r = scratch[5].r + scratch[7].i;
326 scratch_out[1].i = scratch[5].i - scratch[7].r;
327
328 // forth result
329 scratch_out[3].r = scratch[5].r - scratch[7].i;
330 scratch_out[3].i = scratch[5].i + scratch[7].r;
331
332 // store
333 *Fout1 = scratch_out[0];
334 Fout2 = Fout1 + mstride;
335 *Fout2 = scratch_out[1];
336 Fout2 += mstride;
337 *Fout2 = scratch_out[2];
338 Fout2 += mstride;
339 *Fout2 = scratch_out[3];
340
341 tw1++;
342 Fin1 ++;
343 Fout1 ++;
344 } // m_count
345 } // f_count
346 tw += mstride * 3;
347 mstride <<= 2;
348 fstride >>= 2;
349
350 // swap
351 Ftmp = buffer;
352 buffer = Fout;
353 Fout = Ftmp;
354 } // stage_count
355
356 // the last one
357 if (stage_count)
358 {
359 Fin1 = buffer;
360 // if stage count is even, output to the input array
361 Fout1 = Fout_ls;
362
363 for (f_count = 0; f_count < fstride; f_count ++)
364 {
365 tw1 = tw;
366 for (m_count = mstride; m_count ; m_count --)
367 {
368 // load
369 scratch_tw[0] = *tw1;
370 tw2 = tw1 + mstride;
371 scratch_tw[1] = *tw2;
372 tw2 += mstride;
373 scratch_tw[2] = *tw2;
374 scratch_in[0] = * Fin1;
375 Fin2 = Fin1 + N;
376 scratch_in[1] = * Fin2;
377 Fin2 += N;
378 scratch_in[2] = * Fin2;
379 Fin2 += N;
380 scratch_in[3] = * Fin2;
381
382 // radix 4 butterfly with twiddles
383
384 scratch[0] = scratch_in[0];
385 scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
386 scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
387
388 scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
389 scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
390
391 scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
392 scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
393
394 // radix 2 butterfly
395 scratch[4].r = scratch[0].r + scratch[2].r;
396 scratch[4].i = scratch[0].i + scratch[2].i;
397
398 scratch[5].r = scratch[0].r - scratch[2].r;
399 scratch[5].i = scratch[0].i - scratch[2].i;
400
401 // radix 2 butterfly
402 scratch[6].r = scratch[1].r + scratch[3].r;
403 scratch[6].i = scratch[1].i + scratch[3].i;
404
405 scratch[7].r = scratch[1].r - scratch[3].r;
406 scratch[7].i = scratch[1].i - scratch[3].i;
407
408 // third result
409 scratch_out[2].r = scratch[4].r - scratch[6].r;
410 scratch_out[2].i = scratch[4].i - scratch[6].i;
411
412 // first result
413 scratch_out[0].r = scratch[4].r + scratch[6].r;
414 scratch_out[0].i = scratch[4].i + scratch[6].i;
415
416 // second result
417 scratch_out[1].r = scratch[5].r + scratch[7].i;
418 scratch_out[1].i = scratch[5].i - scratch[7].r;
419
420 // forth result
421 scratch_out[3].r = scratch[5].r - scratch[7].i;
422 scratch_out[3].i = scratch[5].i + scratch[7].r;
423
424 // store
425 *Fout1 = scratch_out[0];
426 Fout2 = Fout1 + N;
427 *Fout2 = scratch_out[1];
428 Fout2 += N;
429 *Fout2 = scratch_out[2];
430 Fout2 += N;
431 *Fout2 = scratch_out[3];
432
433 tw1 ++;
434 Fin1 ++;
435 Fout1 ++;
436 } // m_count
437 } // f_count
438 } // last stage
439}
440
441static void ne10_mixed_radix_butterfly_inverse_float32_c (ne10_fft_cpx_float32_t * Fout,
443 ne10_int32_t * factors,
444 ne10_fft_cpx_float32_t * twiddles,
445 ne10_fft_cpx_float32_t * buffer)
446{
447 ne10_int32_t fstride, mstride, N;
448 ne10_int32_t fstride1;
449 ne10_int32_t f_count, m_count;
450 ne10_int32_t stage_count;
451 ne10_float32_t one_by_nfft;
452
453 ne10_fft_cpx_float32_t scratch_in[8];
454 ne10_fft_cpx_float32_t scratch_out[8];
455 ne10_fft_cpx_float32_t scratch[16];
456 ne10_fft_cpx_float32_t scratch_tw[6];
457
458 ne10_fft_cpx_float32_t *Fin1, *Fin2, *Fout1, *Fout2;
459 ne10_fft_cpx_float32_t *Fout_ls = Fout;
461 ne10_fft_cpx_float32_t *tw, *tw1, *tw2;
462 const ne10_float32_t TW_81 = 0.70710678;
463 const ne10_float32_t TW_81N = -0.70710678;
464
465 // init fstride, mstride, N, one_by_nfft, tw
466 stage_count = factors[0];
467 fstride = factors[1];
468 mstride = factors[ (stage_count << 1) - 1 ];
469 N = factors[ stage_count << 1 ]; // radix
470 one_by_nfft = (1.0f / (ne10_float32_t) (fstride * N));
471 tw = twiddles;
472
473 // the first stage
474 Fin1 = Fin;
475 Fout1 = Fout;
476 if (N == 2) // length of FFT is 2^n (n is odd)
477 {
478 // radix 8
479 N = fstride >> 1; // 1/4 of length of FFT
480 fstride1 = fstride >> 2;
481
482 Fin1 = Fin;
483 for (f_count = 0; f_count < fstride1; f_count ++)
484 {
485 Fout1 = & Fout[ f_count * 8 ];
486
487 scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
488 scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
489 scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
490 scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
491 scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
492 scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
493 scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
494 scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
495 scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
496 scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
497 scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
498 scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
499 scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
500 scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
501 scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
502 scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
503
504 // radix 4 butterfly with twiddles
505
506 scratch[0] = scratch_in[0];
507 scratch[1] = scratch_in[1];
508
509 scratch[2] = scratch_in[2];
510 scratch[3].r = (scratch_in[3].r - scratch_in[3].i) * TW_81;
511 scratch[3].i = (scratch_in[3].i + scratch_in[3].r) * TW_81;
512
513 scratch[4] = scratch_in[4];
514 scratch[5].r = -scratch_in[5].i;
515 scratch[5].i = scratch_in[5].r;
516
517 scratch[6].r = scratch_in[6].r;
518 scratch[6].i = scratch_in[6].i;
519 scratch[7].r = (scratch_in[7].r + scratch_in[7].i) * TW_81N;
520 scratch[7].i = (scratch_in[7].i - scratch_in[7].r) * TW_81N;
521
522 // radix 2 butterfly
523 scratch[8].r = scratch[0].r + scratch[4].r;
524 scratch[8].i = scratch[0].i + scratch[4].i;
525 scratch[9].r = scratch[1].r + scratch[5].r;
526 scratch[9].i = scratch[1].i + scratch[5].i;
527
528 scratch[10].r = scratch[0].r - scratch[4].r;
529 scratch[10].i = scratch[0].i - scratch[4].i;
530 scratch[11].r = scratch[1].r - scratch[5].r;
531 scratch[11].i = scratch[1].i - scratch[5].i;
532
533 // radix 2 butterfly
534 scratch[12].r = scratch[2].r + scratch[6].r;
535 scratch[12].i = scratch[2].i + scratch[6].i;
536 scratch[13].r = scratch[3].r + scratch[7].r;
537 scratch[13].i = scratch[3].i + scratch[7].i;
538
539 scratch[14].r = scratch[2].r - scratch[6].r;
540 scratch[14].i = scratch[2].i - scratch[6].i;
541 scratch[15].r = scratch[3].r - scratch[7].r;
542 scratch[15].i = scratch[3].i - scratch[7].i;
543
544 // third result
545 scratch_out[4].r = scratch[8].r - scratch[12].r;
546 scratch_out[4].i = scratch[8].i - scratch[12].i;
547 scratch_out[5].r = scratch[9].r - scratch[13].r;
548 scratch_out[5].i = scratch[9].i - scratch[13].i;
549
550 // first result
551 scratch_out[0].r = scratch[8].r + scratch[12].r;
552 scratch_out[0].i = scratch[8].i + scratch[12].i;
553 scratch_out[1].r = scratch[9].r + scratch[13].r;
554 scratch_out[1].i = scratch[9].i + scratch[13].i;
555
556 // second result
557 scratch_out[2].r = scratch[10].r - scratch[14].i;
558 scratch_out[2].i = scratch[10].i + scratch[14].r;
559 scratch_out[3].r = scratch[11].r - scratch[15].i;
560 scratch_out[3].i = scratch[11].i + scratch[15].r;
561
562 // forth result
563 scratch_out[6].r = scratch[10].r + scratch[14].i;
564 scratch_out[6].i = scratch[10].i - scratch[14].r;
565 scratch_out[7].r = scratch[11].r + scratch[15].i;
566 scratch_out[7].i = scratch[11].i - scratch[15].r;
567
568 // store
569 Fout1[0] = scratch_out[0];
570 Fout1[1] = scratch_out[1];
571 Fout1[2] = scratch_out[2];
572 Fout1[3] = scratch_out[3];
573 Fout1[4] = scratch_out[4];
574 Fout1[5] = scratch_out[5];
575 Fout1[6] = scratch_out[6];
576 Fout1[7] = scratch_out[7];
577
578 Fin1 += 1;
579 } // f_count
580 tw += 6;
581 mstride <<= 2;
582 fstride >>= 4;
583 stage_count -= 2;
584
585 if (stage_count == 0)
586 {
587 for (f_count = 0; f_count < 8; f_count++)
588 {
589 Fout[f_count].r *= one_by_nfft;
590 Fout[f_count].i *= one_by_nfft;
591 }
592 }
593
594 // swap
595 Ftmp = buffer;
596 buffer = Fout;
597 Fout = Ftmp;
598 }
599 else if (N == 4) // length of FFT is 2^n (n is even)
600 {
601 //fstride is nfft>>2
602 for (f_count = fstride; f_count ; f_count --)
603 {
604 // load
605 scratch_in[0] = *Fin1;
606 Fin2 = Fin1 + fstride;
607 scratch_in[1] = *Fin2;
608 Fin2 = Fin2 + fstride;
609 scratch_in[2] = *Fin2;
610 Fin2 = Fin2 + fstride;
611 scratch_in[3] = *Fin2;
612
613 // radix 4 butterfly without twiddles
614
615 // radix 2 butterfly
616 scratch[0].r = scratch_in[0].r + scratch_in[2].r;
617 scratch[0].i = scratch_in[0].i + scratch_in[2].i;
618
619 scratch[1].r = scratch_in[0].r - scratch_in[2].r;
620 scratch[1].i = scratch_in[0].i - scratch_in[2].i;
621
622 // radix 2 butterfly
623 scratch[2].r = scratch_in[1].r + scratch_in[3].r;
624 scratch[2].i = scratch_in[1].i + scratch_in[3].i;
625
626 scratch[3].r = scratch_in[1].r - scratch_in[3].r;
627 scratch[3].i = scratch_in[1].i - scratch_in[3].i;
628
629 // third result
630 scratch_out[2].r = scratch[0].r - scratch[2].r;
631 scratch_out[2].i = scratch[0].i - scratch[2].i;
632
633 // first result
634 scratch_out[0].r = scratch[0].r + scratch[2].r;
635 scratch_out[0].i = scratch[0].i + scratch[2].i;
636
637 // second result
638 scratch_out[1].r = scratch[1].r - scratch[3].i;
639 scratch_out[1].i = scratch[1].i + scratch[3].r;
640
641 // forth result
642 scratch_out[3].r = scratch[1].r + scratch[3].i;
643 scratch_out[3].i = scratch[1].i - scratch[3].r;
644
645 // store
646 * Fout1 ++ = scratch_out[0];
647 * Fout1 ++ = scratch_out[1];
648 * Fout1 ++ = scratch_out[2];
649 * Fout1 ++ = scratch_out[3];
650
651 Fin1++;
652 } // f_count
653
654 N = fstride; // 1/4 of length of FFT
655
656 // update address for other stages
657 stage_count--;
658 fstride >>= 2;
659
660 if (stage_count == 0)
661 {
662 for (f_count = 0; f_count < 4; f_count++)
663 {
664 Fout[f_count].r *= one_by_nfft;
665 Fout[f_count].i *= one_by_nfft;
666 }
667 }
668 // swap
669 Ftmp = buffer;
670 buffer = Fout;
671 Fout = Ftmp;
672 // end of first stage
673 }
674
675
676 // others but the last one
677 for (; stage_count > 1 ; stage_count--)
678 {
679 Fin1 = buffer;
680 for (f_count = 0; f_count < fstride; f_count ++)
681 {
682 Fout1 = & Fout[ f_count * mstride << 2 ];
683 tw1 = tw;
684 for (m_count = mstride; m_count ; m_count --)
685 {
686 // load
687 scratch_tw[0] = *tw1;
688 tw2 = tw1 + mstride;
689 scratch_tw[1] = *tw2;
690 tw2 += mstride;
691 scratch_tw[2] = *tw2;
692 scratch_in[0] = * Fin1;
693 Fin2 = Fin1 + N;
694 scratch_in[1] = * Fin2;
695 Fin2 += N;
696 scratch_in[2] = * Fin2;
697 Fin2 += N;
698 scratch_in[3] = * Fin2;
699
700 // radix 4 butterfly with twiddles
701
702 scratch[0] = scratch_in[0];
703 scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
704 scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
705
706 scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
707 scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
708
709 scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
710 scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
711
712 // radix 2 butterfly
713 scratch[4].r = scratch[0].r + scratch[2].r;
714 scratch[4].i = scratch[0].i + scratch[2].i;
715
716 scratch[5].r = scratch[0].r - scratch[2].r;
717 scratch[5].i = scratch[0].i - scratch[2].i;
718
719 // radix 2 butterfly
720 scratch[6].r = scratch[1].r + scratch[3].r;
721 scratch[6].i = scratch[1].i + scratch[3].i;
722
723 scratch[7].r = scratch[1].r - scratch[3].r;
724 scratch[7].i = scratch[1].i - scratch[3].i;
725
726 // third result
727 scratch_out[2].r = scratch[4].r - scratch[6].r;
728 scratch_out[2].i = scratch[4].i - scratch[6].i;
729
730 // first result
731 scratch_out[0].r = scratch[4].r + scratch[6].r;
732 scratch_out[0].i = scratch[4].i + scratch[6].i;
733
734 // second result
735 scratch_out[1].r = scratch[5].r - scratch[7].i;
736 scratch_out[1].i = scratch[5].i + scratch[7].r;
737
738 // forth result
739 scratch_out[3].r = scratch[5].r + scratch[7].i;
740 scratch_out[3].i = scratch[5].i - scratch[7].r;
741
742 // store
743 *Fout1 = scratch_out[0];
744 Fout2 = Fout1 + mstride;
745 *Fout2 = scratch_out[1];
746 Fout2 += mstride;
747 *Fout2 = scratch_out[2];
748 Fout2 += mstride;
749 *Fout2 = scratch_out[3];
750
751 tw1++;
752 Fin1 ++;
753 Fout1 ++;
754 } // m_count
755 } // f_count
756 tw += mstride * 3;
757 mstride <<= 2;
758 fstride >>= 2;
759
760 // swap
761 Ftmp = buffer;
762 buffer = Fout;
763 Fout = Ftmp;
764 } // stage_count
765
766 // the last one
767 if (stage_count)
768 {
769 Fin1 = buffer;
770 // if stage count is even, output to the input array
771 Fout1 = Fout_ls;
772
773 for (f_count = 0; f_count < fstride; f_count ++)
774 {
775 tw1 = tw;
776 for (m_count = mstride; m_count ; m_count --)
777 {
778 // load
779 scratch_tw[0] = *tw1;
780 tw2 = tw1 + mstride;
781 scratch_tw[1] = *tw2;
782 tw2 += mstride;
783 scratch_tw[2] = *tw2;
784 scratch_in[0] = * Fin1;
785 Fin2 = Fin1 + N;
786 scratch_in[1] = * Fin2;
787 Fin2 += N;
788 scratch_in[2] = * Fin2;
789 Fin2 += N;
790 scratch_in[3] = * Fin2;
791
792 // radix 4 butterfly with twiddles
793
794 scratch[0] = scratch_in[0];
795 scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
796 scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
797
798 scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
799 scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
800
801 scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
802 scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
803
804 // radix 2 butterfly
805 scratch[4].r = scratch[0].r + scratch[2].r;
806 scratch[4].i = scratch[0].i + scratch[2].i;
807
808 scratch[5].r = scratch[0].r - scratch[2].r;
809 scratch[5].i = scratch[0].i - scratch[2].i;
810
811 // radix 2 butterfly
812 scratch[6].r = scratch[1].r + scratch[3].r;
813 scratch[6].i = scratch[1].i + scratch[3].i;
814
815 scratch[7].r = scratch[1].r - scratch[3].r;
816 scratch[7].i = scratch[1].i - scratch[3].i;
817
818 // third result
819 scratch_out[2].r = (scratch[4].r - scratch[6].r) * one_by_nfft;
820 scratch_out[2].i = (scratch[4].i - scratch[6].i) * one_by_nfft;
821
822 // first result
823 scratch_out[0].r = (scratch[4].r + scratch[6].r) * one_by_nfft;
824 scratch_out[0].i = (scratch[4].i + scratch[6].i) * one_by_nfft;
825
826 // second result
827 scratch_out[1].r = (scratch[5].r - scratch[7].i) * one_by_nfft;
828 scratch_out[1].i = (scratch[5].i + scratch[7].r) * one_by_nfft;
829
830 // forth result
831 scratch_out[3].r = (scratch[5].r + scratch[7].i) * one_by_nfft;
832 scratch_out[3].i = (scratch[5].i - scratch[7].r) * one_by_nfft;
833
834 // store
835 *Fout1 = scratch_out[0];
836 Fout2 = Fout1 + N;
837 *Fout2 = scratch_out[1];
838 Fout2 += N;
839 *Fout2 = scratch_out[2];
840 Fout2 += N;
841 *Fout2 = scratch_out[3];
842
843 tw1 ++;
844 Fin1 ++;
845 Fout1 ++;
846 } // m_count
847 } // f_count
848 } // last stage
849}
850
851static void ne10_fft_split_r2c_1d_float32 (ne10_fft_cpx_float32_t *dst,
852 const ne10_fft_cpx_float32_t *src,
853 ne10_fft_cpx_float32_t *twiddles,
854 ne10_int32_t ncfft)
855{
856 ne10_int32_t k;
857 ne10_fft_cpx_float32_t fpnk, fpk, f1k, f2k, tw, tdc;
858
859 tdc.r = src[0].r;
860 tdc.i = src[0].i;
861
862 dst[0].r = tdc.r + tdc.i;
863 dst[ncfft].r = tdc.r - tdc.i;
864 dst[ncfft].i = dst[0].i = 0;
865
866 for (k = 1; k <= ncfft / 2 ; ++k)
867 {
868 fpk = src[k];
869 fpnk.r = src[ncfft - k].r;
870 fpnk.i = - src[ncfft - k].i;
871
872 f1k.r = fpk.r + fpnk.r;
873 f1k.i = fpk.i + fpnk.i;
874
875 f2k.r = fpk.r - fpnk.r;
876 f2k.i = fpk.i - fpnk.i;
877
878 tw.r = f2k.r * (twiddles[k - 1]).r - f2k.i * (twiddles[k - 1]).i;
879 tw.i = f2k.r * (twiddles[k - 1]).i + f2k.i * (twiddles[k - 1]).r;
880
881 dst[k].r = (f1k.r + tw.r) * 0.5f;
882 dst[k].i = (f1k.i + tw.i) * 0.5f;
883 dst[ncfft - k].r = (f1k.r - tw.r) * 0.5f;
884 dst[ncfft - k].i = (tw.i - f1k.i) * 0.5f;
885 }
886}
887
888static void ne10_fft_split_c2r_1d_float32 (ne10_fft_cpx_float32_t *dst,
889 const ne10_fft_cpx_float32_t *src,
890 ne10_fft_cpx_float32_t *twiddles,
891 ne10_int32_t ncfft)
892{
893
894 ne10_int32_t k;
895 ne10_fft_cpx_float32_t fk, fnkc, fek, fok, tmp;
896
897
898 dst[0].r = (src[0].r + src[ncfft].r) * 0.5f;
899 dst[0].i = (src[0].r - src[ncfft].r) * 0.5f;
900
901 for (k = 1; k <= ncfft / 2; k++)
902 {
903 fk = src[k];
904 fnkc.r = src[ncfft - k].r;
905 fnkc.i = -src[ncfft - k].i;
906
907 fek.r = fk.r + fnkc.r;
908 fek.i = fk.i + fnkc.i;
909
910 tmp.r = fk.r - fnkc.r;
911 tmp.i = fk.i - fnkc.i;
912
913 fok.r = tmp.r * twiddles[k - 1].r + tmp.i * twiddles[k - 1].i;
914 fok.i = tmp.i * twiddles[k - 1].r - tmp.r * twiddles[k - 1].i;
915
916 dst[k].r = (fek.r + fok.r) * 0.5f;
917 dst[k].i = (fek.i + fok.i) * 0.5f;
918
919 dst[ncfft - k].r = (fek.r - fok.r) * 0.5f;
920 dst[ncfft - k].i = (fok.i - fek.i) * 0.5f;
921 }
922}
923
998{
999 ne10_fft_cfg_float32_t st = NULL;
1000 ne10_uint32_t memneeded = sizeof (ne10_fft_state_float32_t)
1001 + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors*/
1002 + sizeof (ne10_fft_cpx_float32_t) * nfft /* twiddle*/
1003 + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer*/
1004 + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1005
1006 st = (ne10_fft_cfg_float32_t) NE10_MALLOC (memneeded);
1007
1008 // Only backward FFT is scaled by default.
1009 st->is_forward_scaled = 0;
1010 st->is_backward_scaled = 1;
1011
1012 if (st == NULL)
1013 {
1014 return st;
1015 }
1016
1017 uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_float32_t);
1018 NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1019 st->factors = (ne10_int32_t*) address;
1020 st->twiddles = (ne10_fft_cpx_float32_t*) (st->factors + (NE10_MAXFACTORS * 2));
1021 st->buffer = st->twiddles + nfft;
1022 st->nfft = nfft;
1023
1024 ne10_int32_t result = ne10_factor (nfft, st->factors, NE10_FACTOR_DEFAULT);
1025 if (result == NE10_ERR)
1026 {
1027 NE10_FREE (st);
1028 return st;
1029 }
1030
1031 // Check if ALGORITHM FLAG is NE10_FFT_ALG_ANY.
1032 {
1033 ne10_int32_t stage_count = st->factors[0];
1034 ne10_int32_t algorithm_flag = st->factors[2 * (stage_count + 1)];
1035
1036 // Enable radix-8.
1037 if (algorithm_flag == NE10_FFT_ALG_ANY)
1038 {
1039 result = ne10_factor (st->nfft, st->factors, NE10_FACTOR_EIGHT);
1040 if (result == NE10_ERR)
1041 {
1042 PRINT_HIT;
1043 NE10_FREE (st);
1044 return st;
1045 }
1046 }
1047 }
1048
1049 ne10_fft_generate_twiddles_float32 (st->twiddles, st->factors, nfft);
1050
1051 return st;
1052}
1053
1068 ne10_int32_t inverse_fft)
1069{
1070 ne10_int32_t stage_count = cfg->factors[0];
1071 ne10_int32_t algorithm_flag = cfg->factors[2 * (stage_count + 1)];
1072
1073 assert ((algorithm_flag == NE10_FFT_ALG_24)
1074 || (algorithm_flag == NE10_FFT_ALG_ANY));
1075
1076 switch (algorithm_flag)
1077 {
1078 case NE10_FFT_ALG_24:
1079 if (inverse_fft)
1080 {
1081 ne10_mixed_radix_butterfly_inverse_float32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer);
1082 }
1083 else
1084 {
1085 ne10_mixed_radix_butterfly_float32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer);
1086 }
1087 break;
1088 case NE10_FFT_ALG_ANY:
1089 if (inverse_fft)
1090 {
1091 ne10_mixed_radix_generic_butterfly_inverse_float32_c (fout, fin,
1092 cfg->factors, cfg->twiddles, cfg->buffer, cfg->is_backward_scaled);
1093 }
1094 else
1095 {
1096 ne10_mixed_radix_generic_butterfly_float32_c (fout, fin,
1097 cfg->factors, cfg->twiddles, cfg->buffer, cfg->is_forward_scaled);
1098 }
1099 break;
1100 }
1101}
1102
//end of C2C_FFT_IFFT group
1106
1184// For NE10_UNROLL_LEVEL > 0, please refer to NE10_rfft_float.c
1185#if (NE10_UNROLL_LEVEL == 0)
1186
1194{
1196 ne10_int32_t ncfft = nfft >> 1;
1197
1198 ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_float32_t)
1199 + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1200 + sizeof (ne10_fft_cpx_float32_t) * ncfft /* twiddle*/
1201 + sizeof (ne10_fft_cpx_float32_t) * (ncfft / 2) /* super twiddles*/
1202 + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer*/
1203 + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1204
1205 st = (ne10_fft_r2c_cfg_float32_t) NE10_MALLOC (memneeded);
1206
1207 if (st)
1208 {
1209 uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_r2c_state_float32_t);
1210 NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1211 st->factors = (ne10_int32_t*) address;
1212 st->twiddles = (ne10_fft_cpx_float32_t*) (st->factors + (NE10_MAXFACTORS * 2));
1213 st->super_twiddles = st->twiddles + ncfft;
1214 st->buffer = st->super_twiddles + (ncfft / 2);
1215 st->ncfft = ncfft;
1216
1217 ne10_int32_t result = ne10_factor (ncfft, st->factors, NE10_FACTOR_DEFAULT);
1218 if (result == NE10_ERR)
1219 {
1220 NE10_FREE (st);
1221 return st;
1222 }
1223
1224 ne10_int32_t i, j;
1225 ne10_int32_t *factors = st->factors;
1226 ne10_fft_cpx_float32_t *twiddles = st->twiddles;
1228 ne10_int32_t stage_count = factors[0];
1229 ne10_int32_t fstride1 = factors[1];
1230 ne10_int32_t fstride2 = fstride1 * 2;
1231 ne10_int32_t fstride3 = fstride1 * 3;
1232 ne10_int32_t m;
1233
1234 const ne10_float32_t pi = NE10_PI;
1235 ne10_float32_t phase1;
1236 ne10_float32_t phase2;
1237 ne10_float32_t phase3;
1238
1239 for (i = stage_count - 1; i > 0; i--)
1240 {
1241 fstride1 >>= 2;
1242 fstride2 >>= 2;
1243 fstride3 >>= 2;
1244 m = factors[2 * i + 1];
1245 tw = twiddles;
1246 for (j = 0; j < m; j++)
1247 {
1248 phase1 = -2 * pi * fstride1 * j / ncfft;
1249 phase2 = -2 * pi * fstride2 * j / ncfft;
1250 phase3 = -2 * pi * fstride3 * j / ncfft;
1251 tw->r = (ne10_float32_t) cos (phase1);
1252 tw->i = (ne10_float32_t) sin (phase1);
1253 (tw + m)->r = (ne10_float32_t) cos (phase2);
1254 (tw + m)->i = (ne10_float32_t) sin (phase2);
1255 (tw + m * 2)->r = (ne10_float32_t) cos (phase3);
1256 (tw + m * 2)->i = (ne10_float32_t) sin (phase3);
1257 tw++;
1258 }
1259 twiddles += m * 3;
1260 }
1261
1262 tw = st->super_twiddles;
1263 for (i = 0; i < ncfft / 2; i++)
1264 {
1265 phase1 = -pi * ( (ne10_float32_t) (i + 1) / ncfft + 0.5f);
1266 tw->r = (ne10_float32_t) cos (phase1);
1267 tw->i = (ne10_float32_t) sin (phase1);
1268 tw++;
1269 }
1270
1271 }
1272 return st;
1273}
1274
1286 ne10_float32_t *fin,
1288{
1289 ne10_fft_cpx_float32_t * tmpbuf = cfg->buffer;
1290
1291 ne10_mixed_radix_butterfly_float32_c (tmpbuf, (ne10_fft_cpx_float32_t*) fin, cfg->factors, cfg->twiddles, fout);
1292 ne10_fft_split_r2c_1d_float32 (fout, tmpbuf, cfg->super_twiddles, cfg->ncfft);
1293}
1294
1305void ne10_fft_c2r_1d_float32_c (ne10_float32_t *fout,
1308{
1309 ne10_fft_cpx_float32_t * tmpbuf1 = cfg->buffer;
1310 ne10_fft_cpx_float32_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
1311
1312 ne10_fft_split_c2r_1d_float32 (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft);
1313 ne10_mixed_radix_butterfly_inverse_float32_c ( (ne10_fft_cpx_float32_t*) fout, tmpbuf1, cfg->factors, cfg->twiddles, tmpbuf2);
1314}
1315
1319#endif // NE10_UNROLL_LEVEL
void ne10_fft_c2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
structure for the floating point FFT state
Definition NE10_types.h:241
ne10_int32_t is_forward_scaled
@biref Flag to control scaling behaviour in forward floating point complex FFT.
Definition NE10_types.h:255
ne10_int32_t is_backward_scaled
@biref Flag to control scaling behaviour in backward floating point complex FFT.
Definition NE10_types.h:264