Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
test_suite_fft_int16.c
1/*
2 * Copyright 2013-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : test_suite_fft_int16.c
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <math.h>
35#include <string.h>
36
37#include "NE10_dsp.h"
38#include "seatest.h"
39#include "unit_test_common.h"
40
41
42/* ----------------------------------------------------------------------
43** Global defines
44** ------------------------------------------------------------------- */
45
46/* Max FFT Length and double buffer for real and imag */
47#define TEST_LENGTH_SAMPLES (4096)
48#define MIN_LENGTH_SAMPLES_CPX (4)
49#define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2)
50
51#define SNR_THRESHOLD_INT16 15.0f
52
53#define TEST_COUNT 250000
54
55/* ----------------------------------------------------------------------
56** Defines each of the tests performed
57** ------------------------------------------------------------------- */
58
59//input and output
60static ne10_int32_t testInput_i16_unscaled[TEST_LENGTH_SAMPLES * 2];
61static ne10_int32_t testInput_i16_scaled[TEST_LENGTH_SAMPLES * 2];
62static ne10_int16_t * guarded_in_c = NULL;
63static ne10_int16_t * guarded_in_neon = NULL;
64static ne10_int16_t * in_c = NULL;
65static ne10_int16_t * in_neon = NULL;
66
67static ne10_int16_t * guarded_out_c = NULL;
68static ne10_int16_t * guarded_out_neon = NULL;
69static ne10_int16_t * out_c = NULL;
70static ne10_int16_t * out_neon = NULL;
71
72static ne10_float32_t snr = 0.0f;
73
74static ne10_int64_t time_c = 0;
75static ne10_int64_t time_neon = 0;
76static ne10_float32_t time_speedup = 0.0f;
77static ne10_float32_t time_savings = 0.0f;
78
79void test_fft_c2c_1d_int16_conformance()
80{
81
82 ne10_int32_t i = 0;
83 ne10_int32_t fftSize = 0;
85 ne10_float32_t * out_c_tmp = NULL;
86 ne10_float32_t * out_neon_tmp = NULL;
87
88 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
89
90 /* init input memory */
91 guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
92 guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
93 in_c = guarded_in_c + ARRAY_GUARD_LEN;
94 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
95
96 /* init dst memory */
97 guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
98 guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
99 out_c = guarded_out_c + ARRAY_GUARD_LEN;
100 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
101
102 out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
103 out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
104
105 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
106 {
107 testInput_i16_unscaled[i] = (ne10_int32_t) (drand48() * 1024) - 512;
108 testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
109 }
110 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
111 {
112 fprintf (stdout, "FFT size %d\n", fftSize);
113 cfg = ne10_fft_alloc_c2c_int16 (fftSize);
114 if (cfg == NULL)
115 {
116 fprintf (stdout, "======ERROR, FFT alloc fails\n");
117 return;
118 }
119
120 /* unscaled FFT test */
121 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
122 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
123
124 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
125 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
126
128 ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0, 0);
129
130 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
131 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
132
133 //conformance test
134 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
135 {
136 out_c_tmp[i] = (ne10_float32_t) out_c[i];
137 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
138 }
139 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
140 assert_false ( (snr < SNR_THRESHOLD_INT16));
141
142 /* IFFT test */
143 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
144 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
145
146 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
147 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
148
150 ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1, 0);
151
152 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
153 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
154
155 //conformance test
156 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
157 {
158 out_c_tmp[i] = (ne10_float32_t) out_c[i];
159 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
160 }
161 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
162 assert_false ( (snr < SNR_THRESHOLD_INT16));
163
164 /* scaled FFT test */
165 memcpy (in_c, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
166 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
167
168 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
169 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
170
172 ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0, 1);
173
174 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
175 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
176
177 //conformance test
178 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
179 {
180 out_c_tmp[i] = (ne10_float32_t) out_c[i];
181 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
182 }
183 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
184 assert_false ( (snr < SNR_THRESHOLD_INT16));
185
186 /* IFFT test */
187 memcpy (in_c, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
188 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
189
190 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
191 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
192
194 ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1, 1);
195
196 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
197 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
198
199 //conformance test
200 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
201 {
202 out_c_tmp[i] = (ne10_float32_t) out_c[i];
203 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
204 }
205 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
206 assert_false ( (snr < SNR_THRESHOLD_INT16));
207
208 NE10_FREE (cfg);
209 }
210
211 NE10_FREE (guarded_in_c);
212 NE10_FREE (guarded_in_neon);
213 NE10_FREE (guarded_out_c);
214 NE10_FREE (guarded_out_neon);
215 NE10_FREE (out_c_tmp);
216 NE10_FREE (out_neon_tmp);
217}
218
219void test_fft_c2c_1d_int16_performance()
220{
221
222 ne10_int32_t i = 0;
223 ne10_int32_t fftSize = 0;
225 ne10_int32_t test_loop = 0;
226
227 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
228 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
229
230 /* init input memory */
231 guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
232 guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
233 in_c = guarded_in_c + ARRAY_GUARD_LEN;
234 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
235
236 /* init dst memory */
237 guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
238 guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
239 out_c = guarded_out_c + ARRAY_GUARD_LEN;
240 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
241
242 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
243 {
244 testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
245 testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
246 }
247 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
248 {
249 fprintf (stdout, "FFT size %d\n", fftSize);
250 cfg = ne10_fft_alloc_c2c_int16 (fftSize);
251 if (cfg == NULL)
252 {
253 fprintf (stdout, "======ERROR, FFT alloc fails\n");
254 return;
255 }
256 test_loop = TEST_COUNT / fftSize;
257
258 /* unscaled FFT test */
259 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
260 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
261
262 GET_TIME
263 (
264 time_c,
265 {
266 for (i = 0; i < test_loop; i++)
268 }
269 );
270 GET_TIME
271 (
272 time_neon,
273 {
274 for (i = 0; i < test_loop; i++)
275 ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0, 0);
276 }
277 );
278
279 time_speedup = (ne10_float32_t) time_c / time_neon;
280 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
281 ne10_log (__FUNCTION__, "Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
282
283 /* IFFT test */
284 memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int16_t));
285 memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int16_t));
286
287 GET_TIME
288 (
289 time_c,
290 {
291 for (i = 0; i < test_loop; i++)
293 }
294 );
295 GET_TIME
296 (
297 time_neon,
298 {
299 for (i = 0; i < test_loop; i++)
300 ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1, 0);
301 }
302 );
303
304 time_speedup = (ne10_float32_t) time_c / time_neon;
305 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
306 ne10_log (__FUNCTION__, "Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
307 /* scaled FFT test */
308 memcpy (in_c, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
309 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
310
311 GET_TIME
312 (
313 time_c,
314 {
315 for (i = 0; i < test_loop; i++)
317 }
318 );
319 GET_TIME
320 (
321 time_neon,
322 {
323 for (i = 0; i < test_loop; i++)
324 ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0, 1);
325 }
326 );
327
328 time_speedup = (ne10_float32_t) time_c / time_neon;
329 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
330 ne10_log (__FUNCTION__, "Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
331
332 /* IFFT test */
333 memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int16_t));
334 memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int16_t));
335
336 GET_TIME
337 (
338 time_c,
339 {
340 for (i = 0; i < test_loop; i++)
342 }
343 );
344 GET_TIME
345 (
346 time_neon,
347 {
348 for (i = 0; i < test_loop; i++)
349 ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1, 1);
350 }
351 );
352
353 time_speedup = (ne10_float32_t) time_c / time_neon;
354 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
355 ne10_log (__FUNCTION__, "Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
356
357 NE10_FREE (cfg);
358 }
359
360 NE10_FREE (guarded_in_c);
361 NE10_FREE (guarded_in_neon);
362 NE10_FREE (guarded_out_c);
363 NE10_FREE (guarded_out_neon);
364}
365
366void test_fft_r2c_1d_int16_conformance()
367{
368
369 ne10_int32_t i = 0;
370 ne10_int32_t fftSize = 0;
372 ne10_float32_t * out_c_tmp = NULL;
373 ne10_float32_t * out_neon_tmp = NULL;
374
375 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
376
377 /* init input memory */
378 guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
379 guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
380 in_c = guarded_in_c + ARRAY_GUARD_LEN;
381 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
382
383 /* init dst memory */
384 guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
385 guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
386 out_c = guarded_out_c + ARRAY_GUARD_LEN;
387 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
388
389 out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
390 out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
391
392 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
393 {
394 testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
395 testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
396 }
397 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
398 {
399 fprintf (stdout, "RFFT size %d\n", fftSize);
400 cfg = ne10_fft_alloc_r2c_int16 (fftSize);
401 if (cfg == NULL)
402 {
403 fprintf (stdout, "======ERROR, FFT alloc fails\n");
404 return;
405 }
406
407 /* unscaled FFT test */
408 memcpy (in_c, testInput_i16_unscaled, fftSize * sizeof (ne10_int16_t));
409 memcpy (in_neon, testInput_i16_unscaled, fftSize * sizeof (ne10_int16_t));
410
411 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
412 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
413
414 ne10_fft_r2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, in_c, cfg, 0);
415 ne10_fft_r2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, in_neon, cfg, 0);
416
417 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
418 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
419
420 //conformance test
421 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
422 {
423 out_c_tmp[i] = (ne10_float32_t) out_c[i];
424 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
425 }
426 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
427 assert_false ( (snr < SNR_THRESHOLD_INT16));
428
429 /* IFFT test */
430 for (i = 1; i < (fftSize / 2); i++)
431 {
432 in_c[2 * i] = testInput_i16_unscaled[2 * i];
433 in_c[2 * i + 1] = testInput_i16_unscaled[2 * i + 1];
434 in_c[2 * (fftSize - i)] = in_c[2 * i];
435 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
436 }
437 in_c[0] = testInput_i16_unscaled[0];
438 in_c[1] = 0;
439 in_c[fftSize] = testInput_i16_unscaled[1];
440 in_c[fftSize + 1] = 0;
441 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int16_t));
442
443 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int16_t));
444 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int16_t));
445
446 ne10_fft_c2r_1d_int16_c (out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 0);
447 ne10_fft_c2r_1d_int16_neon (out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0);
448
449 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int16_t));
450 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int16_t));
451
452 //conformance test
453 for (i = 0; i < fftSize; i++)
454 {
455 out_c_tmp[i] = (ne10_float32_t) out_c[i];
456 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
457 }
458 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
459 assert_false ( (snr < SNR_THRESHOLD_INT16));
460
461 /* scaled FFT test */
462 memcpy (in_c, testInput_i16_scaled, fftSize * sizeof (ne10_int16_t));
463 memcpy (in_neon, testInput_i16_scaled, fftSize * sizeof (ne10_int16_t));
464
465 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
466 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
467
468 ne10_fft_r2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, in_c, cfg, 1);
469 ne10_fft_r2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, in_neon, cfg, 1);
470
471 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
472 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
473
474 //conformance test
475 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
476 {
477 out_c_tmp[i] = (ne10_float32_t) out_c[i];
478 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
479 }
480 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
481 assert_false ( (snr < SNR_THRESHOLD_INT16));
482
483 /* IFFT test */
484 for (i = 1; i < (fftSize / 2); i++)
485 {
486 in_c[2 * i] = testInput_i16_scaled[2 * i];
487 in_c[2 * i + 1] = testInput_i16_scaled[2 * i + 1];
488 in_c[2 * (fftSize - i)] = in_c[2 * i];
489 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
490 }
491 in_c[0] = testInput_i16_scaled[0];
492 in_c[1] = 0;
493 in_c[fftSize] = testInput_i16_scaled[1];
494 in_c[fftSize + 1] = 0;
495 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int16_t));
496
497 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int16_t));
498 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int16_t));
499
500 ne10_fft_c2r_1d_int16_c (out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 1);
501 ne10_fft_c2r_1d_int16_neon (out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1);
502
503 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int16_t));
504 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int16_t));
505
506 //conformance test
507 for (i = 0; i < fftSize; i++)
508 {
509 out_c_tmp[i] = (ne10_float32_t) out_c[i];
510 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
511 }
512 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
513 assert_false ( (snr < SNR_THRESHOLD_INT16));
514
515 NE10_FREE (cfg);
516 }
517
518 NE10_FREE (guarded_in_c);
519 NE10_FREE (guarded_in_neon);
520 NE10_FREE (guarded_out_c);
521 NE10_FREE (guarded_out_neon);
522 NE10_FREE (out_c_tmp);
523 NE10_FREE (out_neon_tmp);
524}
525
526void test_fft_r2c_1d_int16_performance()
527{
528
529 ne10_int32_t i = 0;
530 ne10_int32_t fftSize = 0;
532 ne10_int32_t test_loop = 0;
533
534 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
535 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
536
537 /* init input memory */
538 guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
539 guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
540 in_c = guarded_in_c + ARRAY_GUARD_LEN;
541 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
542
543 /* init dst memory */
544 guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
545 guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
546 out_c = guarded_out_c + ARRAY_GUARD_LEN;
547 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
548
549 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
550 {
551 testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
552 testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
553 }
554 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
555 {
556 fprintf (stdout, "FFT size %d\n", fftSize);
557 cfg = ne10_fft_alloc_r2c_int16 (fftSize);
558 if (cfg == NULL)
559 {
560 fprintf (stdout, "======ERROR, FFT alloc fails\n");
561 return;
562 }
563 test_loop = TEST_COUNT / fftSize;
564
565 /* unscaled FFT test */
566 memcpy (in_c, testInput_i16_unscaled , fftSize * sizeof (ne10_int16_t));
567 memcpy (in_neon, testInput_i16_unscaled , fftSize * sizeof (ne10_int16_t));
568
569 GET_TIME
570 (
571 time_c,
572 {
573 for (i = 0; i < test_loop; i++)
574 ne10_fft_r2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, in_c, cfg, 0);
575 }
576 );
577 GET_TIME
578 (
579 time_neon,
580 {
581 for (i = 0; i < test_loop; i++)
582 ne10_fft_r2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, in_neon, cfg, 0);
583 }
584 );
585
586 time_speedup = (ne10_float32_t) time_c / time_neon;
587 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
588 ne10_log (__FUNCTION__, "Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
589
590 /* IFFT test */
591 for (i = 1; i < (fftSize / 2); i++)
592 {
593 in_c[2 * i] = testInput_i16_unscaled[2 * i];
594 in_c[2 * i + 1] = testInput_i16_unscaled[2 * i + 1];
595 in_c[2 * (fftSize - i)] = in_c[2 * i];
596 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
597 }
598 in_c[0] = testInput_i16_unscaled[0];
599 in_c[1] = 0;
600 in_c[fftSize] = testInput_i16_unscaled[1];
601 in_c[fftSize + 1] = 0;
602 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int16_t));
603
604 GET_TIME
605 (
606 time_c,
607 {
608 for (i = 0; i < test_loop; i++)
609 ne10_fft_c2r_1d_int16_c (out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 0);
610 }
611 );
612 GET_TIME
613 (
614 time_neon,
615 {
616 for (i = 0; i < test_loop; i++)
617 ne10_fft_c2r_1d_int16_neon (out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0);
618 }
619 );
620
621 time_speedup = (ne10_float32_t) time_c / time_neon;
622 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
623 ne10_log (__FUNCTION__, "Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
624
625 /* scaled FFT test */
626 memcpy (in_c, testInput_i16_scaled , fftSize * sizeof (ne10_int16_t));
627 memcpy (in_neon, testInput_i16_scaled , fftSize * sizeof (ne10_int16_t));
628
629 GET_TIME
630 (
631 time_c,
632 {
633 for (i = 0; i < test_loop; i++)
634 ne10_fft_r2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, in_c, cfg, 1);
635 }
636 );
637 GET_TIME
638 (
639 time_neon,
640 {
641 for (i = 0; i < test_loop; i++)
642 ne10_fft_r2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, in_neon, cfg, 1);
643 }
644 );
645
646 time_speedup = (ne10_float32_t) time_c / time_neon;
647 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
648 ne10_log (__FUNCTION__, "Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
649
650 /* IFFT test */
651 for (i = 1; i < (fftSize / 2); i++)
652 {
653 in_c[2 * i] = testInput_i16_scaled[2 * i];
654 in_c[2 * i + 1] = testInput_i16_scaled[2 * i + 1];
655 in_c[2 * (fftSize - i)] = in_c[2 * i];
656 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
657 }
658 in_c[0] = testInput_i16_scaled[0];
659 in_c[1] = 0;
660 in_c[fftSize] = testInput_i16_scaled[1];
661 in_c[fftSize + 1] = 0;
662 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int16_t));
663
664 GET_TIME
665 (
666 time_c,
667 {
668 for (i = 0; i < test_loop; i++)
669 ne10_fft_c2r_1d_int16_c (out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 1);
670 }
671 );
672 GET_TIME
673 (
674 time_neon,
675 {
676 for (i = 0; i < test_loop; i++)
677 ne10_fft_c2r_1d_int16_neon (out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1);
678 }
679 );
680
681 time_speedup = (ne10_float32_t) time_c / time_neon;
682 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
683 ne10_log (__FUNCTION__, "Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
684
685 NE10_FREE (cfg);
686 }
687
688 NE10_FREE (guarded_in_c);
689 NE10_FREE (guarded_in_neon);
690 NE10_FREE (guarded_out_c);
691 NE10_FREE (guarded_out_neon);
692}
693
694void test_fft_c2c_1d_int16()
695{
696#if defined (SMOKE_TEST)||(REGRESSION_TEST)
697 test_fft_c2c_1d_int16_conformance();
698#endif
699
700#if defined (PERFORMANCE_TEST)
701 test_fft_c2c_1d_int16_performance();
702#endif
703}
704
705void test_fft_r2c_1d_int16()
706{
707#if defined (SMOKE_TEST)||(REGRESSION_TEST)
708 test_fft_r2c_1d_int16_conformance();
709#endif
710
711#if defined (PERFORMANCE_TEST)
712 test_fft_r2c_1d_int16_performance();
713#endif
714}
715
716static void my_test_setup (void)
717{
718 ne10_log_buffer_ptr = ne10_log_buffer;
719}
720
721void test_fixture_fft_c2c_1d_int16 (void)
722{
723 test_fixture_start(); // starts a fixture
724
725 fixture_setup (my_test_setup);
726
727 run_test (test_fft_c2c_1d_int16); // run tests
728
729 test_fixture_end(); // ends a fixture
730}
731
732void test_fixture_fft_r2c_1d_int16 (void)
733{
734 test_fixture_start(); // starts a fixture
735
736 fixture_setup (my_test_setup);
737
738 run_test (test_fft_r2c_1d_int16); // run tests
739
740 test_fixture_end(); // ends a fixture
741}
void ne10_fft_c2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 16-bit fixed point data.
void ne10_fft_c2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 16-bit fixed point data.
ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
void ne10_fft_r2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int16 data.
void ne10_fft_c2r_1d_int16_c(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int16 data.
ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
void ne10_fft_r2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int16 data.
void ne10_fft_c2r_1d_int16_neon(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int16 data.
structure for the 16 bits fixed point FFT function.
Definition NE10_types.h:298