59 v128_t x1, x0 = wasm_v128_load(address);
60 x1 = wasm_i32x4_shuffle(x0, x0, 2, 3, 2, 3);
61 x0 = wasm_v128_or(x0, x1);
62 x1 = wasm_i32x4_shuffle(x0, x0, 1, 1, 1, 1);
63 x0 = wasm_v128_or(x0, x1);
64 ui32 t = (
ui32)wasm_i32x4_extract_lane(x0, 0);
71 v128_t x1, x0 = wasm_v128_load(address);
72 x1 = wasm_i64x2_shuffle(x0, x0, 1, 1);
73 x0 = wasm_v128_or(x0, x1);
74 ui64 t = (
ui64)wasm_i64x2_extract_lane(x0, 0);
80 float delta_inv,
ui32 count,
ui32* max_val)
85 ui32 shift = 31 - K_max;
86 v128_t m0 = wasm_i32x4_splat(INT_MIN);
87 v128_t zero = wasm_i32x4_splat(0);
88 v128_t one = wasm_i32x4_splat(1);
89 v128_t tmax = wasm_v128_load(max_val);
91 for (
ui32 i = 0; i < count; i += 4, p += 4, dp += 4)
93 v128_t v = wasm_v128_load(p);
94 v128_t sign = wasm_i32x4_lt(v, zero);
95 v128_t val = wasm_v128_xor(v, sign);
96 v128_t ones = wasm_v128_and(sign, one);
97 val = wasm_i32x4_add(val, ones);
98 sign = wasm_v128_and(sign, m0);
99 val = wasm_i32x4_shl(val, shift);
100 tmax = wasm_v128_or(tmax, val);
101 val = wasm_v128_or(val, sign);
102 wasm_v128_store(dp, val);
104 wasm_v128_store(max_val, tmax);
109 float delta_inv,
ui32 count,
ui32* max_val)
115 v128_t d = wasm_f32x4_splat(delta_inv);
116 v128_t zero = wasm_i32x4_splat(0);
117 v128_t one = wasm_i32x4_splat(1);
118 v128_t tmax = wasm_v128_load(max_val);
119 float *p = (
float*)sp;
120 for (
ui32 i = 0; i < count; i += 4, p += 4, dp += 4)
122 v128_t vf = wasm_v128_load(p);
123 vf = wasm_f32x4_mul(vf, d);
124 v128_t val = wasm_i32x4_trunc_sat_f32x4(vf);
125 v128_t sign = wasm_i32x4_lt(val, zero);
126 val = wasm_v128_xor(val, sign);
127 v128_t ones = wasm_v128_and(sign, one);
128 val = wasm_i32x4_add(val, ones);
129 tmax = wasm_v128_or(tmax, val);
130 sign = wasm_i32x4_shl(sign, 31);
131 val = wasm_v128_or(val, sign);
132 wasm_v128_store(dp, val);
134 wasm_v128_store(max_val, tmax);
139 float delta,
ui32 count)
142 ui32 shift = 31 - K_max;
143 v128_t m1 = wasm_i32x4_splat(INT_MAX);
144 v128_t zero = wasm_i32x4_splat(0);
145 v128_t one = wasm_i32x4_splat(1);
147 for (
ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
149 v128_t v = wasm_v128_load((v128_t*)sp);
150 v128_t val = wasm_v128_and(v, m1);
151 val = wasm_i32x4_shr(val, shift);
152 v128_t sign = wasm_i32x4_lt(v, zero);
153 val = wasm_v128_xor(val, sign);
154 v128_t ones = wasm_v128_and(sign, one);
155 val = wasm_i32x4_add(val, ones);
156 wasm_v128_store(p, val);
162 float delta,
ui32 count)
165 v128_t m1 = wasm_i32x4_splat(INT_MAX);
166 v128_t d = wasm_f32x4_splat(delta);
167 float *p = (
float*)dp;
168 for (
ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
170 v128_t v = wasm_v128_load((v128_t*)sp);
171 v128_t vali = wasm_v128_and(v, m1);
172 v128_t valf = wasm_f32x4_convert_i32x4(vali);
173 valf = wasm_f32x4_mul(valf, d);
174 v128_t sign = wasm_v128_andnot(v, m1);
175 valf = wasm_v128_or(valf, sign);
176 wasm_v128_store(p, valf);
182 float delta_inv,
ui32 count,
ui64* max_val)
187 ui32 shift = 63 - K_max;
188 v128_t m0 = wasm_i64x2_splat(LLONG_MIN);
189 v128_t zero = wasm_i64x2_splat(0);
190 v128_t one = wasm_i64x2_splat(1);
191 v128_t tmax = wasm_v128_load(max_val);
193 for (
ui32 i = 0; i < count; i += 2, p += 2, dp += 2)
195 v128_t v = wasm_v128_load(p);
196 v128_t sign = wasm_i64x2_lt(v, zero);
197 v128_t val = wasm_v128_xor(v, sign);
198 v128_t ones = wasm_v128_and(sign, one);
199 val = wasm_i64x2_add(val, ones);
200 sign = wasm_v128_and(sign, m0);
201 val = wasm_i64x2_shl(val, shift);
202 tmax = wasm_v128_or(tmax, val);
203 val = wasm_v128_or(val, sign);
204 wasm_v128_store(dp, val);
206 wasm_v128_store(max_val, tmax);
211 float delta,
ui32 count)
214 ui32 shift = 63 - K_max;
215 v128_t m1 = wasm_i64x2_splat(LLONG_MAX);
216 v128_t zero = wasm_i64x2_splat(0);
217 v128_t one = wasm_i64x2_splat(1);
219 for (
ui32 i = 0; i < count; i += 2, sp += 2, p += 2)
221 v128_t v = wasm_v128_load((v128_t*)sp);
222 v128_t val = wasm_v128_and(v, m1);
223 val = wasm_i64x2_shr(val, shift);
224 v128_t sign = wasm_i64x2_lt(v, zero);
225 val = wasm_v128_xor(val, sign);
226 v128_t ones = wasm_v128_and(sign, one);
227 val = wasm_i64x2_add(val, ones);
228 wasm_v128_store(p, val);