Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_fft_bfly.h
1/*
2 * Copyright 2014-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : dsp/NE10_fft_bfly.h
30 */
31
32#include "NE10_types.h"
33#include "NE10_fft_cplx_ops.h"
34#include "NE10_fft_common_varibles.h"
35
36#ifndef NE10_FFT_BFLY_H
37#define NE10_FFT_BFLY_H
38
39// R2C FFT size==4
40// In[4] R[0],R[1],R[2],R[3]
41// OUT[4] R[0],R[1],I[1],R[2]
42#define NE10_FFT_R2C_4R_RCR(OUT,IN) \
43 do { \
44 ne10_float32_t SCRATCH [2]; \
45 SCRATCH[0] = IN[0] + IN[2]; \
46 SCRATCH[1] = IN[1] + IN[3]; \
47 OUT[0] = SCRATCH[0] + SCRATCH[1]; \
48 OUT[1] = IN[0] - IN[2]; \
49 OUT[2] = IN[3] - IN[1]; \
50 OUT[3] = SCRATCH[0] - SCRATCH[1]; \
51 } while (0)
52
53// C2R FFT size==4 - inversed of R2C FFT
54// In[4] R[0],R[1],I[1],R[2]
55// OUT[4] R[0],R[1],R[2],R[3]
56#define NE10_FFT_C2R_RCR_4R(OUT,IN) \
57 do { \
58 ne10_float32_t SCRATCH [4]; \
59 SCRATCH[0] =(IN[0] + IN[3]); \
60 SCRATCH[1] =(IN[0] - IN[3]); \
61 SCRATCH[2] = IN[1] + IN[1]; \
62 SCRATCH[3] = IN[2] + IN[2]; \
63 OUT[0] = SCRATCH[0] + SCRATCH[2]; \
64 OUT[1] = SCRATCH[1] - SCRATCH[3]; \
65 OUT[2] = SCRATCH[0] - SCRATCH[2]; \
66 OUT[3] = SCRATCH[1] + SCRATCH[3]; \
67 } while (0)
68
69// R2C FFT size==4
70// In[4] R[0],R[1],R[2],R[3]
71// OUT[4] R[0],I[0],R[1],I[1]
72#define NE10_FFT_R2C_4R_CC(OUT,IN) \
73 do { \
74 ne10_float32_t SCRATCH [2]; \
75 ne10_float32_t TMP [2]; \
76 SCRATCH[0] = (IN[3] - IN[1]) * TW_81N_F32; \
77 SCRATCH[1] = (IN[3] + IN[1]) * TW_81N_F32; \
78 OUT[0] = IN[0] + SCRATCH[0]; \
79 OUT[2] = IN[0] - SCRATCH[0]; \
80 OUT[1] = SCRATCH[1] - IN[2]; \
81 OUT[3] = SCRATCH[1] + IN[2]; \
82 } while (0)
83
84// C2R FFT size==4 - inversed of R2C FFT
85// In[4] R[0],I[0],R[1],I[1]
86// OUT[4] R[0],R[1],R[2],R[3]
87#define NE10_FFT_C2R_CC_4R(OUT,IN) \
88 do { \
89 ne10_float32_t SCRATCH [4]; \
90 OUT[0] = ( IN[0] + IN[2]); \
91 OUT[2] = (-IN[1] + IN[3]); \
92 OUT[0] = OUT[0] + OUT[0]; \
93 OUT[2] = OUT[2] + OUT[2]; \
94 SCRATCH[0] = (IN[0] - IN[2]); \
95 SCRATCH[1] = (IN[1] + IN[3]); \
96 SCRATCH[2] = (SCRATCH[0] + SCRATCH[1]); \
97 SCRATCH[3] = (SCRATCH[0] - SCRATCH[1]); \
98 OUT[3] = SCRATCH[2] / TW_81N_F32; \
99 OUT[1] = SCRATCH[3] / TW_81_F32; \
100 } while (0)
101
102// R2C FFT size==4
103// In[4] R[0],I[0],R[1],I[1]
104// OUT[4] R[0],I[0],R[1],I[1]
105#define NE10_FFT_R2C_CC_CC(OUT,IN) \
106 do { \
107 ne10_fft_cpx_float32_t TMP[4]; \
108 ne10_float32_t TMP_SWAP; \
109 NE10_CPX_ADD (TMP[0], IN[0], IN[2]); \
110 NE10_CPX_SUB (TMP[1], IN[0], IN[2]); \
111 NE10_CPX_ADD (TMP[2], IN[1], IN[3]); \
112 NE10_CPX_SUB (TMP[3], IN[1], IN[3]); \
113 TMP_SWAP = TMP[3].i; \
114 TMP[3].i = - TMP[3].r; \
115 TMP[3].r = TMP_SWAP; \
116 OUT[0].r = TMP[0].r + TMP[2].r; \
117 OUT[0].i = TMP[0].i + TMP[2].i; \
118 OUT[2].r = TMP[0].r - TMP[2].r; \
119 OUT[2].i = -(TMP[0].i - TMP[2].i); \
120 OUT[1].r = TMP[1].r + TMP[3].r; \
121 OUT[1].i = TMP[1].i + TMP[3].i; \
122 OUT[3].r = TMP[1].r - TMP[3].r; \
123 OUT[3].i = -(TMP[1].i - TMP[3].i); \
124 } while (0)
125
126// C2R FFT size==4 - inversed of R2C FFT
127// In[4] R[0],I[0],R[1],I[1]
128// OUT[4] R[0],I[0],R[1],I[1]
129#define NE10_FFT_C2R_CC_CC(OUT,IN) \
130 do { \
131 ne10_fft_cpx_float32_t SCRATCH[4]; \
132 SCRATCH[0].r = (IN[0].r + IN[1].r); \
133 SCRATCH[2].r = (IN[0].r - IN[1].r); \
134 SCRATCH[2].i = (IN[0].i + IN[1].i); \
135 SCRATCH[0].i = (IN[0].i - IN[1].i); \
136 SCRATCH[1].r = (IN[2].r + IN[3].r); \
137 SCRATCH[3].i = (IN[2].r - IN[3].r); \
138 SCRATCH[3].r = (IN[2].i + IN[3].i) * -1.0f; \
139 SCRATCH[1].i = (IN[2].i - IN[3].i); \
140 OUT[0].r = (SCRATCH[0].r + SCRATCH[1].r); \
141 OUT[2].r = (SCRATCH[0].r - SCRATCH[1].r); \
142 OUT[0].i = (SCRATCH[0].i + SCRATCH[1].i); \
143 OUT[2].i = (SCRATCH[0].i - SCRATCH[1].i); \
144 OUT[1].r = (SCRATCH[2].r + SCRATCH[3].r); \
145 OUT[3].r = (SCRATCH[2].r - SCRATCH[3].r); \
146 OUT[1].i = (SCRATCH[2].i + SCRATCH[3].i); \
147 OUT[3].i = (SCRATCH[2].i - SCRATCH[3].i); \
148 } while (0)
149
150#endif // NE10_FFT_BFLY_H