Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
test_suite_fft_int32.c
1/*
2 * Copyright 2013-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : test_suite_fft_int32.c
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <math.h>
35#include <string.h>
36
37#include "NE10_dsp.h"
38#include "seatest.h"
39#include "unit_test_common.h"
40
41
42/* ----------------------------------------------------------------------
43** Global defines
44** ------------------------------------------------------------------- */
45
46/* Max FFT Length and double buffer for real and imag */
47#define TEST_LENGTH_SAMPLES (32768)
48#define MIN_LENGTH_SAMPLES_CPX (4)
49#define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2)
50
51#define SNR_THRESHOLD_INT32 25.0f
52
53#define TEST_COUNT 250000
54
55/* ----------------------------------------------------------------------
56** Defines each of the tests performed
57** ------------------------------------------------------------------- */
58
59//input and output
60static ne10_int32_t testInput_i32_unscaled[TEST_LENGTH_SAMPLES * 2];
61static ne10_int32_t testInput_i32_scaled[TEST_LENGTH_SAMPLES * 2];
62static ne10_int32_t * guarded_in_c = NULL;
63static ne10_int32_t * guarded_in_neon = NULL;
64static ne10_int32_t * in_c = NULL;
65static ne10_int32_t * in_neon = NULL;
66
67static ne10_int32_t * guarded_out_c = NULL;
68static ne10_int32_t * guarded_out_neon = NULL;
69static ne10_int32_t * out_c = NULL;
70static ne10_int32_t * out_neon = NULL;
71
72static ne10_float32_t snr = 0.0f;
73
74static ne10_int64_t time_c = 0;
75static ne10_int64_t time_neon = 0;
76static ne10_float32_t time_speedup = 0.0f;
77static ne10_float32_t time_savings = 0.0f;
78
79void test_fft_c2c_1d_int32_conformance()
80{
81
82 ne10_int32_t i = 0;
83 ne10_int32_t fftSize = 0;
85 ne10_fft_cfg_int32_t cfg_neon;
86 ne10_float32_t * out_c_tmp = NULL;
87 ne10_float32_t * out_neon_tmp = NULL;
88
89 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
90
91 /* init input memory */
92 guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
93 guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
94 in_c = guarded_in_c + ARRAY_GUARD_LEN;
95 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
96
97 /* init dst memory */
98 guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
99 guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
100 out_c = guarded_out_c + ARRAY_GUARD_LEN;
101 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
102
103 out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
104 out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
105
106 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
107 {
108 testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
109 testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
110 }
111 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
112 {
113 fprintf (stdout, "FFT size %d\n", fftSize);
114 /* FFT init */
115 cfg_c = ne10_fft_alloc_c2c_int32_c (fftSize);
116 if (cfg_c == NULL)
117 {
118 fprintf (stdout, "======ERROR, FFT alloc fails\n");
119 return;
120 }
121
122 cfg_neon = ne10_fft_alloc_c2c_int32_neon (fftSize);
123 if (cfg_neon == NULL)
124 {
125 NE10_FREE (cfg_c);
126 fprintf (stdout, "======ERROR, FFT alloc fails\n");
127 return;
128 }
129
130 /* unscaled FFT test */
131 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
132 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
133
134 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
135 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
136 ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 0);
137 ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 0, 0);
138 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
139 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
140
141 //conformance test
142 for (i = 0; i < fftSize * 2; i++)
143 {
144 out_c_tmp[i] = (ne10_float32_t) out_c[i];
145 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
146 }
147 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
148 assert_false ( (snr < SNR_THRESHOLD_INT32));
149
150 /* IFFT test */
151 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
152 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
153
154 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
155 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
156 ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 0);
157 ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 0);
158 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
159 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
160
161 //conformance test
162 for (i = 0; i < fftSize * 2; i++)
163 {
164 out_c_tmp[i] = (ne10_float32_t) out_c[i];
165 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
166 }
167 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
168 assert_false ( (snr < SNR_THRESHOLD_INT32));
169
170 /* scaled FFT test */
171 memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
172 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
173
174 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
175 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
176 ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 1);
177 ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 0, 1);
178 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
179 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
180
181 //conformance test
182 for (i = 0; i < fftSize * 2; i++)
183 {
184 out_c_tmp[i] = (ne10_float32_t) out_c[i];
185 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
186 }
187 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
188 assert_false ( (snr < SNR_THRESHOLD_INT32));
189
190 /* IFFT test */
191 memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
192 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
193
194 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
195 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
196 ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 1);
197 ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 1);
198 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
199 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
200
201 //conformance test
202 for (i = 0; i < fftSize * 2; i++)
203 {
204 out_c_tmp[i] = (ne10_float32_t) out_c[i];
205 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
206 }
207 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
208 assert_false ( (snr < SNR_THRESHOLD_INT32));
209
210 NE10_FREE (cfg_c);
211 NE10_FREE (cfg_neon);
212 }
213
214 NE10_FREE (guarded_in_c);
215 NE10_FREE (guarded_in_neon);
216 NE10_FREE (guarded_out_c);
217 NE10_FREE (guarded_out_neon);
218 NE10_FREE (out_c_tmp);
219 NE10_FREE (out_neon_tmp);
220}
221
222void test_fft_c2c_1d_int32_performance()
223{
224
225 ne10_int32_t i = 0;
226 ne10_int32_t fftSize = 0;
228 ne10_fft_cfg_int32_t cfg_neon;
229 ne10_int32_t test_loop = 0;
230
231 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
232 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
233
234 /* init input memory */
235 guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
236 guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
237 in_c = guarded_in_c + ARRAY_GUARD_LEN;
238 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
239
240 /* init dst memory */
241 guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
242 guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
243 out_c = guarded_out_c + ARRAY_GUARD_LEN;
244 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
245
246 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
247 {
248 testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
249 testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
250 }
251 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
252 {
253 fprintf (stdout, "FFT size %d\n", fftSize);
254
255 /* FFT test */
256 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
257 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
258 cfg_c = ne10_fft_alloc_c2c_int32_c (fftSize);
259 if (cfg_c == NULL)
260 {
261 fprintf (stdout, "======ERROR, FFT alloc fails\n");
262 return;
263 }
264
265 cfg_neon = ne10_fft_alloc_c2c_int32_neon (fftSize);
266 if (cfg_neon == NULL)
267 {
268 NE10_FREE (cfg_c);
269 fprintf (stdout, "======ERROR, FFT alloc fails\n");
270 return;
271 }
272
273 test_loop = TEST_COUNT / fftSize;
274
275 GET_TIME
276 (
277 time_c,
278 {
279 for (i = 0; i < test_loop; i++)
280 ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 0);
281 }
282 );
283 GET_TIME
284 (
285 time_neon,
286 {
287 for (i = 0; i < test_loop; i++)
288 ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_neon, 0, 0);
289 }
290 );
291 time_speedup = (ne10_float32_t) time_c / time_neon;
292 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
293 ne10_log (__FUNCTION__, " unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
294
295 /* IFFT test */
296 memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int32_t));
297 memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int32_t));
298
299 GET_TIME
300 (
301 time_c,
302 {
303 for (i = 0; i < test_loop; i++)
304 ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 0);
305 }
306 );
307 GET_TIME
308 (
309 time_neon,
310 {
311 for (i = 0; i < test_loop; i++)
312 ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 0);
313 }
314 );
315
316 time_speedup = (ne10_float32_t) time_c / time_neon;
317 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
318 ne10_log (__FUNCTION__, "unscaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
319
320 /* FFT test */
321 memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
322 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
323
324 GET_TIME
325 (
326 time_c,
327 {
328 for (i = 0; i < test_loop; i++)
329 ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 1);
330 }
331 );
332 GET_TIME
333 (
334 time_neon,
335 {
336 for (i = 0; i < test_loop; i++)
337 ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_neon, 0, 1);
338 }
339 );
340 time_speedup = (ne10_float32_t) time_c / time_neon;
341 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
342 ne10_log (__FUNCTION__, " scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
343
344 /* IFFT test */
345 memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int32_t));
346 memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int32_t));
347
348 GET_TIME
349 (
350 time_c,
351 {
352 for (i = 0; i < test_loop; i++)
353 ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 1);
354 }
355 );
356 GET_TIME
357 (
358 time_neon,
359 {
360 for (i = 0; i < test_loop; i++)
361 ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 1);
362 }
363 );
364
365 time_speedup = (ne10_float32_t) time_c / time_neon;
366 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
367 ne10_log (__FUNCTION__, " scaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
368
369 NE10_FREE (cfg_c);
370 NE10_FREE (cfg_neon);
371 }
372
373 NE10_FREE (guarded_in_c);
374 NE10_FREE (guarded_in_neon);
375 NE10_FREE (guarded_out_c);
376 NE10_FREE (guarded_out_neon);
377}
378
379void test_fft_r2c_1d_int32_conformance()
380{
381
382 ne10_int32_t i = 0;
383 ne10_int32_t fftSize = 0;
385 ne10_float32_t * out_c_tmp = NULL;
386 ne10_float32_t * out_neon_tmp = NULL;
387
388 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
389
390 /* init input memory */
391 guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
392 guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
393 in_c = guarded_in_c + ARRAY_GUARD_LEN;
394 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
395
396 /* init dst memory */
397 guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
398 guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
399 out_c = guarded_out_c + ARRAY_GUARD_LEN;
400 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
401
402 out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
403 out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
404
405 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
406 {
407 testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
408 testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
409 }
410 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
411 {
412 fprintf (stdout, "FFT size %d\n", fftSize);
413 /* FFT init */
414 cfg = ne10_fft_alloc_r2c_int32 (fftSize);
415 if (cfg == NULL)
416 {
417 fprintf (stdout, "======ERROR, FFT alloc fails\n");
418 return;
419 }
420
421 /* unscaled FFT test */
422 memcpy (in_c, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
423 memcpy (in_neon, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
424
425 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
426 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
427
428 ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 0);
429 ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 0);
430
431 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
432 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
433
434 //conformance test
435 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
436 {
437 out_c_tmp[i] = (ne10_float32_t) out_c[i];
438 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
439 }
440 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
441 assert_false ( (snr < SNR_THRESHOLD_INT32));
442
443 /* IFFT test */
444 for (i = 1; i < (fftSize / 2); i++)
445 {
446 in_c[2 * i] = testInput_i32_unscaled[2 * i];
447 in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
448 in_c[2 * (fftSize - i)] = in_c[2 * i];
449 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
450 }
451 in_c[0] = testInput_i32_unscaled[0];
452 in_c[1] = 0;
453 in_c[fftSize] = testInput_i32_unscaled[1];
454 in_c[fftSize + 1] = 0;
455 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
456
457 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
458 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
459
460 ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0);
461 ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 0);
462
463 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
464 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
465
466 //conformance test
467 for (i = 0; i < fftSize; i++)
468 {
469 out_c_tmp[i] = (ne10_float32_t) out_c[i];
470 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
471 }
472 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
473 assert_false ( (snr < SNR_THRESHOLD_INT32));
474
475 /* scaled FFT test */
476 memcpy (in_c, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
477 memcpy (in_neon, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
478
479 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
480 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
481
482 ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 1);
483 ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 1);
484
485 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
486 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
487
488 //conformance test
489 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
490 {
491 out_c_tmp[i] = (ne10_float32_t) out_c[i];
492 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
493 }
494 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
495 assert_false ( (snr < SNR_THRESHOLD_INT32));
496
497 /* IFFT test */
498 for (i = 1; i < (fftSize / 2); i++)
499 {
500 in_c[2 * i] = testInput_i32_scaled[2 * i];
501 in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
502 in_c[2 * (fftSize - i)] = in_c[2 * i];
503 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
504 }
505 in_c[0] = testInput_i32_scaled[0];
506 in_c[1] = 0;
507 in_c[fftSize] = testInput_i32_scaled[1];
508 in_c[fftSize + 1] = 0;
509 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
510
511 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
512 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
513
514 ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1);
515 ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1);
516
517 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
518 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
519
520 //conformance test
521 for (i = 0; i < fftSize; i++)
522 {
523 out_c_tmp[i] = (ne10_float32_t) out_c[i];
524 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
525 }
526 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
527 assert_false ( (snr < SNR_THRESHOLD_INT32));
528
529
530 NE10_FREE (cfg);
531 }
532
533 NE10_FREE (guarded_in_c);
534 NE10_FREE (guarded_in_neon);
535 NE10_FREE (guarded_out_c);
536 NE10_FREE (guarded_out_neon);
537 NE10_FREE (out_c_tmp);
538 NE10_FREE (out_neon_tmp);
539}
540
541void test_fft_r2c_1d_int32_performance()
542{
543
544 ne10_int32_t i = 0;
545 ne10_int32_t fftSize = 0;
547 ne10_int32_t test_loop = 0;
548
549 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
550 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
551
552 /* init input memory */
553 guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
554 guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
555 in_c = guarded_in_c + ARRAY_GUARD_LEN;
556 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
557
558 /* init dst memory */
559 guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
560 guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
561 out_c = guarded_out_c + ARRAY_GUARD_LEN;
562 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
563
564 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
565 {
566 testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
567 testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
568 }
569 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
570 {
571 fprintf (stdout, "FFT size %d\n", fftSize);
572
573 cfg = ne10_fft_alloc_r2c_int32 (fftSize);
574 if (cfg == NULL)
575 {
576 fprintf (stdout, "======ERROR, FFT alloc fails\n");
577 return;
578 }
579 test_loop = TEST_COUNT / fftSize;
580 /* unscaled FFT test */
581 memcpy (in_c, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
582 memcpy (in_neon, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
583
584 GET_TIME
585 (
586 time_c,
587 {
588 for (i = 0; i < test_loop; i++)
589 ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 0);
590 }
591 );
592 GET_TIME
593 (
594 time_neon,
595 {
596 for (i = 0; i < test_loop; i++)
597 ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 0);
598 }
599 );
600
601 time_speedup = (ne10_float32_t) time_c / time_neon;
602 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
603 ne10_log (__FUNCTION__, "Int32 unscaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
604
605 /* IFFT test */
606 for (i = 1; i < (fftSize / 2); i++)
607 {
608 in_c[2 * i] = testInput_i32_unscaled[2 * i];
609 in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
610 in_c[2 * (fftSize - i)] = in_c[2 * i];
611 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
612 }
613 in_c[0] = testInput_i32_unscaled[0];
614 in_c[1] = 0;
615 in_c[fftSize] = testInput_i32_unscaled[1];
616 in_c[fftSize + 1] = 0;
617 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
618
619 GET_TIME
620 (
621 time_c,
622 {
623 for (i = 0; i < test_loop; i++)
624 ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0);
625 }
626 );
627 GET_TIME
628 (
629 time_neon,
630 {
631 for (i = 0; i < test_loop; i++)
632 ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 0);
633 }
634 );
635
636 time_speedup = (ne10_float32_t) time_c / time_neon;
637 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
638 ne10_log (__FUNCTION__, "Int32 unscaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
639
640 /* scaled FFT test */
641 memcpy (in_c, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
642 memcpy (in_neon, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
643
644 GET_TIME
645 (
646 time_c,
647 {
648 for (i = 0; i < test_loop; i++)
649 ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 1);
650 }
651 );
652 GET_TIME
653 (
654 time_neon,
655 {
656 for (i = 0; i < test_loop; i++)
657 ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 1);
658 }
659 );
660
661 time_speedup = (ne10_float32_t) time_c / time_neon;
662 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
663 ne10_log (__FUNCTION__, "Int32 scaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
664
665 /* IFFT test */
666 for (i = 1; i < (fftSize / 2); i++)
667 {
668 in_c[2 * i] = testInput_i32_scaled[2 * i];
669 in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
670 in_c[2 * (fftSize - i)] = in_c[2 * i];
671 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
672 }
673 in_c[0] = testInput_i32_scaled[0];
674 in_c[1] = 0;
675 in_c[fftSize] = testInput_i32_scaled[1];
676 in_c[fftSize + 1] = 0;
677 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
678
679 GET_TIME
680 (
681 time_c,
682 {
683 for (i = 0; i < test_loop; i++)
684 ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1);
685 }
686 );
687 GET_TIME
688 (
689 time_neon,
690 {
691 for (i = 0; i < test_loop; i++)
692 ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1);
693 }
694 );
695
696 time_speedup = (ne10_float32_t) time_c / time_neon;
697 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
698 ne10_log (__FUNCTION__, "Int32 scaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
699
700 NE10_FREE (cfg);
701 }
702
703 NE10_FREE (guarded_in_c);
704 NE10_FREE (guarded_in_neon);
705 NE10_FREE (guarded_out_c);
706 NE10_FREE (guarded_out_neon);
707}
708
709void test_fft_c2c_1d_int32()
710{
711#if defined (SMOKE_TEST)||(REGRESSION_TEST)
712 test_fft_c2c_1d_int32_conformance();
713#endif
714
715#if defined (PERFORMANCE_TEST)
716 test_fft_c2c_1d_int32_performance();
717#endif
718}
719
720void test_fft_r2c_1d_int32()
721{
722#if defined (SMOKE_TEST)||(REGRESSION_TEST)
723 test_fft_r2c_1d_int32_conformance();
724#endif
725
726#if defined (PERFORMANCE_TEST)
727 test_fft_r2c_1d_int32_performance();
728#endif
729}
730
731static void my_test_setup (void)
732{
733 ne10_log_buffer_ptr = ne10_log_buffer;
734}
735
736void test_fixture_fft_c2c_1d_int32 (void)
737{
738 test_fixture_start(); // starts a fixture
739
740 fixture_setup (my_test_setup);
741
742 run_test (test_fft_c2c_1d_int32); // run tests
743
744 test_fixture_end(); // ends a fixture
745}
746
747void test_fixture_fft_r2c_1d_int32 (void)
748{
749 test_fixture_start(); // starts a fixture
750
751 fixture_setup (my_test_setup);
752
753 run_test (test_fft_r2c_1d_int32); // run tests
754
755 test_fixture_end(); // ends a fixture
756}
void ne10_fft_c2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition NE10_fft.c:435
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
void ne10_fft_c2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
void ne10_fft_r2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
void ne10_fft_c2r_1d_int32_neon(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.
void ne10_fft_r2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
void ne10_fft_c2r_1d_int32_c(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.
ne10_fft_r2c_cfg_int32_t ne10_fft_alloc_r2c_int32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
structure for the 32 bits fixed point FFT function.
Definition NE10_types.h:329