OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_transform_wasm.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2021, Aous Naman
6// Copyright (c) 2021, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2021, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_transform_wasm.cpp
34// Author: Aous Naman
35// Date: 09 February 2021
36//***************************************************************************/
37
38#include <cstdio>
39#include <wasm_simd128.h>
40
41#include "ojph_defs.h"
42#include "ojph_arch.h"
43#include "ojph_mem.h"
44#include "ojph_transform.h"
46
47namespace ojph {
48 namespace local {
49
52 const line_buf* line_src2,
54 {
55 si32 *dst = line_dst->i32;
56 const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
57
58 for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4)
59 {
64 d = wasm_i32x4_sub(d, s1);
66 }
67 }
68
71 const line_buf* line_src2,
73 {
74 si32 *dst = line_dst->i32;
75 const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
76
78 for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4)
79 {
87 }
88 }
89
92 line_buf *line_hdst, ui32 width, bool even)
93 {
94 if (width > 1)
95 {
96 si32 *src = line_src->i32;
97 si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32;
98
99 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
100 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
101
102 // extension
103 src[-1] = src[1];
104 src[width] = src[width-2];
105 // predict
106 const si32* sp = src + (even ? 1 : 0);
107 si32 *dph = hdst;
108 for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4)
109 { //this is doing twice the work it needs to do
110 //it can be definitely written better
111 v128_t s1 = wasm_v128_load(sp - 1);
112 v128_t s2 = wasm_v128_load(sp + 1);
116 sp += 4;
117 s1 = wasm_v128_load(sp - 1);
118 s2 = wasm_v128_load(sp + 1);
122 sp += 4;
123 d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6);
125 }
126
127 // extension
128 hdst[-1] = hdst[0];
129 hdst[H_width] = hdst[H_width-1];
130 // update
131 sp = src + (even ? 0 : 1);
132 const si32* sph = hdst + (even ? 0 : 1);
133 si32 *dpl = ldst;
135 for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4)
136 {
142 v128_t d2 = wasm_v128_load(sp + 4);
143 v128_t d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6);
146 }
147 }
148 else
149 {
150 if (even)
151 line_ldst->i32[0] = line_src->i32[0];
152 else
153 line_hdst->i32[0] = line_src->i32[0] << 1;
154 }
155 }
156
159 const line_buf *line_src2,
161 {
162 si32 *dst = line_dst->i32;
163 const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
164
165 for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4)
166 {
171 d = wasm_i32x4_add(d, s1);
173 }
174 }
175
178 const line_buf *line_src2,
180 {
181 si32 *dst = line_dst->i32;
182 const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
183
185 for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4)
186 {
194 }
195 }
196
199 line_buf *line_hsrc, ui32 width, bool even)
200 {
201 if (width > 1)
202 {
203 si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32;
204 si32 *dst = line_dst->i32;
205
206 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
207 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
208
209 // extension
210 hsrc[-1] = hsrc[0];
211 hsrc[H_width] = hsrc[H_width-1];
212 //inverse update
213 const si32 *sph = hsrc + (even ? 0 : 1);
214 si32 *spl = lsrc;
216 for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, spl+=4)
217 {
225 }
226
227 // extension
228 lsrc[-1] = lsrc[0];
229 lsrc[L_width] = lsrc[L_width - 1];
230 // inverse predict and combine
231 si32 *dp = dst + (even ? 0 : -1);
232 spl = lsrc + (even ? 0 : -1);
233 sph = hsrc;
234 ui32 width = L_width + (even ? 0 : 1);
235 for (ui32 i = (width + 3) >> 2; i > 0; --i, sph+=4, spl+=4, dp+=8)
236 {
241 d = wasm_i32x4_add(d, s2);
242 wasm_v128_store(dp, wasm_i32x4_shuffle(s1, d, 0, 4, 1, 5));
243 wasm_v128_store(dp + 4, wasm_i32x4_shuffle(s1, d, 2, 6, 3, 7));
244 }
245 }
246 else
247 {
248 if (even)
249 line_dst->i32[0] = line_lsrc->i32[0];
250 else
251 line_dst->i32[0] = line_hsrc->i32[0] >> 1;
252 }
253 }
254
257 const line_buf *line_src2,
259 ui32 repeat)
260 {
261 float *dst = line_dst->f32;
262 const float *src1 = line_src1->f32, *src2 = line_src2->f32;
263
265 for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4)
266 {
272 }
273 }
274
278 {
279 float *dst = line_dst->f32;
280 const float *src = line_src->f32;
281
285 for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src+=4)
286 {
289 }
290 }
291
294 line_buf *line_hdst, ui32 width,
295 bool even)
296 {
297 if (width > 1)
298 {
299 float *src = line_src->f32;
300 float *ldst = line_ldst->f32, *hdst = line_hdst->f32;
301
302 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
303 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
304
305 //extension
306 src[-1] = src[1];
307 src[width] = src[width-2];
308 // predict
309 const float* sp = src + (even ? 1 : 0);
310 float *dph = hdst;
312 for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4)
313 { //this is doing twice the work it needs to do
314 //it can be definitely written better
315 v128_t s1 = wasm_v128_load(sp - 1);
316 v128_t s2 = wasm_v128_load(sp + 1);
320 sp += 4;
321 s1 = wasm_v128_load(sp - 1);
322 s2 = wasm_v128_load(sp + 1);
326 sp += 4;
327 d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6);
329 }
330
331 // extension
332 hdst[-1] = hdst[0];
333 hdst[H_width] = hdst[H_width-1];
334 // update
336 sp = src + (even ? 0 : 1);
337 const float* sph = hdst + (even ? 0 : 1);
338 float *dpl = ldst;
339 for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4)
340 {
345 v128_t d2 = wasm_v128_load(sp + 4);
346 v128_t d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6);
347 d = wasm_f32x4_add(d, s1);
349 }
350
351 //extension
352 ldst[-1] = ldst[0];
353 ldst[L_width] = ldst[L_width-1];
354 //predict
356 const float* spl = ldst + (even ? 1 : 0);
357 dph = hdst;
358 for (ui32 i = (H_width + 3) >> 2; i > 0; --i, spl+=4, dph+=4)
359 {
364 d = wasm_f32x4_add(d, s1);
366 }
367
368 // extension
369 hdst[-1] = hdst[0];
370 hdst[H_width] = hdst[H_width-1];
371 // update
373 sph = hdst + (even ? 0 : 1);
374 dpl = ldst;
375 for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, dpl+=4)
376 {
381 d = wasm_f32x4_add(d, s1);
383 }
384
385 //multipliers
386 float *dp = ldst;
388 for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4)
389 {
392 }
393 dp = hdst;
395 for (int i = (H_width + 3) >> 2; i > 0; --i, dp+=4)
396 {
399 }
400 }
401 else
402 {
403 if (even)
404 line_ldst->f32[0] = line_src->f32[0];
405 else
406 line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0];
407 }
408 }
409
412 line_buf *line_hsrc, ui32 width,
413 bool even)
414 {
415 if (width > 1)
416 {
417 float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32;
418 float *dst = line_dst->f32;
419
420 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
421 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
422
423 //multipliers
424 float *dp = lsrc;
426 for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4)
427 {
430 }
431 dp = hsrc;
433 for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dp+=4)
434 {
437 }
438
439 //extension
440 hsrc[-1] = hsrc[0];
441 hsrc[H_width] = hsrc[H_width-1];
442 //inverse update
444 const float *sph = hsrc + (even ? 0 : 1);
445 float *dpl = lsrc;
446 for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4)
447 {
452 d = wasm_f32x4_add(d, s1);
454 }
455
456 //extension
457 lsrc[-1] = lsrc[0];
458 lsrc[L_width] = lsrc[L_width-1];
459 //inverse perdict
461 const float *spl = lsrc + (even ? 0 : -1);
462 float *dph = hsrc;
463 for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4, spl+=4)
464 {
469 d = wasm_f32x4_add(d, s1);
471 }
472
473 //extension
474 hsrc[-1] = hsrc[0];
475 hsrc[H_width] = hsrc[H_width-1];
476 //inverse update
478 sph = hsrc + (even ? 0 : 1);
479 dpl = lsrc;
480 for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4)
481 {
486 d = wasm_f32x4_add(d, s1);
488 }
489
490 //extension
491 lsrc[-1] = lsrc[0];
492 lsrc[L_width] = lsrc[L_width-1];
493 //inverse perdict and combine
495 dp = dst + (even ? 0 : -1);
496 spl = lsrc + (even ? 0 : -1);
497 sph = hsrc;
498 ui32 width = L_width + (even ? 0 : 1);
499 for (ui32 i = (width + 3) >> 2; i > 0; --i, spl+=4, sph+=4, dp+=8)
500 {
505 d = wasm_f32x4_add(d, s2);
506 wasm_v128_store(dp, wasm_i32x4_shuffle(s1, d, 0, 4, 1, 5));
507 wasm_v128_store(dp + 4, wasm_i32x4_shuffle(s1, d, 2, 6, 3, 7));
508 }
509 }
510 else
511 {
512 if (even)
513 line_dst->f32[0] = line_lsrc->f32[0];
514 else
515 line_dst->f32[0] = line_hsrc->f32[0] * 0.5f;
516 }
517 }
518
519 }
520}
void wasm_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void wasm_rev_vert_wvlt_fwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void wasm_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void wasm_irrev_horz_wvlt_bwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void wasm_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void wasm_irrev_vert_wvlt_step(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, int step_num, ui32 repeat)
void wasm_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void wasm_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void wasm_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void wasm_rev_vert_wvlt_bwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
int32_t si32
Definition ojph_defs.h:55
uint32_t ui32
Definition ojph_defs.h:54