Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_fft_generic_int32.h
1/*
2 * Copyright 2015 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* license of Kiss FFT */
29/*
30Copyright (c) 2003-2010, Mark Borgerding
31
32All rights reserved.
33
34Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35
36 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41*/
42
43/*
44 * NE10 Library : dsp/NE10_fft_generic_int32.h
45 */
46
47#ifndef NE10_FFT_GENERIC_INT32_H
48#define NE10_FFT_GENERIC_INT32_H
49
50#include "NE10_types.h"
51#include "NE10_macros.h"
52#include "NE10_fft.h"
53
54#define NE10_CPX_MUL_S32(Z,A,B) \
55 do { \
56 ne10_int32_t ARBR = ((NE10_F2I32_SAMPPROD) A.r * B.r) >> 31; \
57 ne10_int32_t ARBI = ((NE10_F2I32_SAMPPROD) A.r * B.i) >> 31; \
58 ne10_int32_t AIBR = ((NE10_F2I32_SAMPPROD) A.i * B.r) >> 31; \
59 ne10_int32_t AIBI = ((NE10_F2I32_SAMPPROD) A.i * B.i) >> 31; \
60 Z.r = ARBR - AIBI; \
61 Z.i = ARBI + AIBR; \
62 } while (0)
63
64#define NE10_S_MUL_S32(A,S) (((NE10_F2I32_SAMPPROD) (A) * (S)) >> 31)
65
73template<int RADIX>
74inline void FFT_MUL_TW (ne10_fft_cpx_int32_t out[RADIX],
75 const ne10_fft_cpx_int32_t in[RADIX],
76 const ne10_fft_cpx_int32_t tw[RADIX]);
77
78template<>
79inline void FFT_MUL_TW<2> (ne10_fft_cpx_int32_t out[2],
80 const ne10_fft_cpx_int32_t in[2],
81 const ne10_fft_cpx_int32_t tw[1])
82{
83 out[0] = in[0];
84 NE10_CPX_MUL_S32 (out[1], in[1], tw[0]);
85}
86
87template<int RADIX>
88inline void FFT_MUL_TW (ne10_fft_cpx_int32_t out[RADIX],
89 const ne10_fft_cpx_int32_t in[RADIX],
90 const ne10_fft_cpx_int32_t tw[RADIX - 1])
91{
92 FFT_MUL_TW<RADIX - 1> (out, in, tw);
93 NE10_CPX_MUL_S32 (out[RADIX - 1], in[RADIX - 1], tw[RADIX - 2]);
94}
95
97// FFT Kernel
98// F: Forward
99// C: Complex
100// U: Unscaled
102
108template<int RADIX>
109inline void FFT_FCU (ne10_fft_cpx_int32_t scratch_out[RADIX],
110 const ne10_fft_cpx_int32_t scratch_in[RADIX]);
111
117template<>
118inline void FFT_FCU<2> (ne10_fft_cpx_int32_t scratch_out[2],
119 const ne10_fft_cpx_int32_t scratch_in[2])
120{
121 NE10_CPX_ADD (scratch_out[0], scratch_in[0], scratch_in[1]);
122 NE10_CPX_SUB (scratch_out[1], scratch_in[0], scratch_in[1]);
123}
124
130template<>
131inline void FFT_FCU<3> (ne10_fft_cpx_int32_t Fout[3],
132 const ne10_fft_cpx_int32_t Fin[3])
133{
134 ne10_fft_cpx_int32_t scratch[4];
135 ne10_fft_cpx_int32_t scratch_in[3];
136
137 scratch_in[0] = Fin[0];
138 scratch_in[1] = Fin[1];
139 scratch_in[2] = Fin[2];
140
141 scratch[1] = scratch_in[1];
142 scratch[2] = scratch_in[2];
143
144 NE10_CPX_ADD (scratch[3], scratch[1], scratch[2]);
145 NE10_CPX_SUB (scratch[0], scratch[1], scratch[2]);
146
147 scratch_in[1].r = scratch_in[0].r - (scratch[3].r >> 1);
148 scratch_in[1].i = scratch_in[0].i - (scratch[3].i >> 1);
149
150 scratch[0].r = NE10_S_MUL_S32 (scratch[0].r , -TW_3I_S32);
151 scratch[0].i = NE10_S_MUL_S32 (scratch[0].i , -TW_3I_S32);
152
153 scratch_in[0].r += scratch[3].r;
154 scratch_in[0].i += scratch[3].i;
155
156 scratch_in[2].r = scratch_in[1].r + scratch[0].i;
157 scratch_in[2].i = scratch_in[1].i - scratch[0].r;
158
159 scratch_in[1].r -= scratch[0].i;
160 scratch_in[1].i += scratch[0].r;
161
162 Fout[0] = scratch_in[0];
163 Fout[1] = scratch_in[1];
164 Fout[2] = scratch_in[2];
165}
166
172template<>
173inline void FFT_FCU<4> (ne10_fft_cpx_int32_t scratch_out[4],
174 const ne10_fft_cpx_int32_t scratch_in[4])
175{
176 ne10_fft_cpx_int32_t scratch[4];
177
178 NE10_CPX_ADD (scratch[0], scratch_in[0], scratch_in[2]);
179 NE10_CPX_SUB (scratch[1], scratch_in[0], scratch_in[2]);
180 NE10_CPX_ADD (scratch[2], scratch_in[1], scratch_in[3]);
181 NE10_CPX_SUB (scratch[3], scratch_in[1], scratch_in[3]);
182
183 NE10_CPX_SUB (scratch_out[2], scratch[0], scratch[2]);
184 NE10_CPX_ADD (scratch_out[0], scratch[0], scratch[2]);
185
186 scratch_out[1].r = scratch[1].r + scratch[3].i;
187 scratch_out[1].i = scratch[1].i - scratch[3].r;
188 scratch_out[3].r = scratch[1].r - scratch[3].i;
189 scratch_out[3].i = scratch[1].i + scratch[3].r;
190}
191
197template<>
198inline void FFT_FCU<5> (ne10_fft_cpx_int32_t Fout[5],
199 const ne10_fft_cpx_int32_t Fin[5])
200{
201 ne10_fft_cpx_int32_t scratch[13], scratch_in[5];
202
203 scratch_in[0] = Fin[0];
204 scratch_in[1] = Fin[1];
205 scratch_in[2] = Fin[2];
206 scratch_in[3] = Fin[3];
207 scratch_in[4] = Fin[4];
208
209 scratch[0] = scratch_in[0];
210 scratch[1] = scratch_in[1];
211 scratch[2] = scratch_in[2];
212 scratch[3] = scratch_in[3];
213 scratch[4] = scratch_in[4];
214
215 NE10_CPX_ADD (scratch[ 7], scratch[1], scratch[4]);
216 NE10_CPX_SUB (scratch[10], scratch[1], scratch[4]);
217 NE10_CPX_ADD (scratch[ 8], scratch[2], scratch[3]);
218 NE10_CPX_SUB (scratch[ 9], scratch[2], scratch[3]);
219
220 scratch_in[0].r += scratch[7].r + scratch[8].r;
221 scratch_in[0].i += scratch[7].i + scratch[8].i;
222
223 scratch[5].r = scratch[0].r
224 + NE10_S_MUL_S32 (scratch[7].r, TW_5A_S32.r)
225 + NE10_S_MUL_S32 (scratch[8].r, TW_5B_S32.r);
226 scratch[5].i = scratch[0].i
227 + NE10_S_MUL_S32 (scratch[7].i, TW_5A_S32.r)
228 + NE10_S_MUL_S32 (scratch[8].i, TW_5B_S32.r);
229
230 scratch[6].r = NE10_S_MUL_S32 (scratch[10].i, TW_5A_S32.i)
231 + NE10_S_MUL_S32 (scratch[9].i, TW_5B_S32.i);
232 scratch[6].i = -NE10_S_MUL_S32 (scratch[10].r, TW_5A_S32.i)
233 - NE10_S_MUL_S32 (scratch[9].r, TW_5B_S32.i);
234
235 NE10_CPX_SUB (scratch_in[1], scratch[5], scratch[6]);
236 NE10_CPX_ADD (scratch_in[4], scratch[5], scratch[6]);
237
238 scratch[11].r = scratch[0].r
239 + NE10_S_MUL_S32 (scratch[7].r, TW_5B_S32.r)
240 + NE10_S_MUL_S32 (scratch[8].r, TW_5A_S32.r);
241 scratch[11].i = scratch[0].i
242 + NE10_S_MUL_S32 (scratch[7].i, TW_5B_S32.r)
243 + NE10_S_MUL_S32 (scratch[8].i, TW_5A_S32.r);
244
245 scratch[12].r = -NE10_S_MUL_S32 (scratch[10].i, TW_5B_S32.i)
246 + NE10_S_MUL_S32 (scratch[9].i, TW_5A_S32.i);
247 scratch[12].i = NE10_S_MUL_S32 (scratch[10].r, TW_5B_S32.i)
248 - NE10_S_MUL_S32 (scratch[9].r, TW_5A_S32.i);
249
250 NE10_CPX_ADD (scratch_in[2], scratch[11], scratch[12]);
251 NE10_CPX_SUB (scratch_in[3], scratch[11], scratch[12]);
252
253 Fout[0] = scratch_in[0];
254 Fout[1] = scratch_in[1];
255 Fout[2] = scratch_in[2];
256 Fout[3] = scratch_in[3];
257 Fout[4] = scratch_in[4];
258}
259
263template<class T>
264inline void NE10_CONJ_S (T &);
265
266template<>
267inline void NE10_CONJ_S<ne10_fft_cpx_int32_t> (ne10_fft_cpx_int32_t &scalar)
268{
269 scalar.i = -scalar.i;
270}
271
277template<int RADIX, class T>
278inline void NE10_CONJ (T in[RADIX])
279{
280 NE10_CONJ<RADIX - 1> (in);
281 NE10_CONJ_S<T> (in[RADIX - 1]);
282}
283
284template<>
285inline void NE10_CONJ<1, ne10_fft_cpx_int32_t> (ne10_fft_cpx_int32_t in[1])
286{
287 NE10_CONJ_S<ne10_fft_cpx_int32_t> (in[0]);
288}
289
290template<class T>
291inline T NE10_CPX_LOAD_S (const T *ptr)
292{
293 return *ptr;
294}
295
296template<class T>
297inline void NE10_CPX_STORE_S (T *Fout, const T in)
298{
299 *Fout = in;
300}
301
309template<int RADIX, class T>
310inline void NE10_LOAD_BY_STEP (T out[RADIX],
311 const T *Fin,
312 const ne10_int32_t in_step);
313
314template<>
315inline void NE10_LOAD_BY_STEP<1, ne10_fft_cpx_int32_t> (
317 const ne10_fft_cpx_int32_t *Fin,
318 const ne10_int32_t)
319{
320 out[0] = NE10_CPX_LOAD_S<ne10_fft_cpx_int32_t> (Fin);
321}
322
323template<int RADIX, class T>
324inline void NE10_LOAD_BY_STEP (T out[RADIX],
325 const T *Fin,
326 const ne10_int32_t in_step)
327{
328 out[0] = NE10_CPX_LOAD_S<T> (Fin);
329 NE10_LOAD_BY_STEP<RADIX - 1, T> (out + 1, Fin + in_step, in_step);
330}
331
339template<int RADIX, class T>
340inline void NE10_STORE_BY_STEP (T *Fout,
341 const T in[RADIX],
342 const ne10_int32_t out_step)
343{
344 NE10_CPX_STORE_S<T> (Fout, in[0]);
345 NE10_STORE_BY_STEP<RADIX - 1, T> (Fout + out_step, in + 1, out_step);
346}
347
348template<>
349inline void NE10_STORE_BY_STEP<1, ne10_fft_cpx_int32_t> (
351 const ne10_fft_cpx_int32_t in[1],
352 const ne10_int32_t)
353{
354 Fout[0] = in[0];
355}
356
363template<int RADIX>
364inline void NE10_SCALED (ne10_fft_cpx_int32_t out[RADIX],
365 const ne10_int32_t scaling)
366{
367 NE10_F2I32_FIXDIV (out[0], scaling);
368 NE10_SCALED<RADIX - 1> (out + 1, scaling);
369}
370
371template<>
372inline void NE10_SCALED<1> (ne10_fft_cpx_int32_t out[1],
373 const ne10_int32_t scaling)
374{
375 NE10_F2I32_FIXDIV (out[0], scaling);
376}
377
378#endif // NE10_FFT_GENERIC_INT32_H
structure for the 32 bits fixed point FFT function.
Definition NE10_types.h:329