45#ifndef INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
46#define INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
49 unsigned char t[64 / 8 ];
50 unsigned int w[64 / 32];
51 unsigned short s[64 / 16];
52 unsigned char c[64 / 8];
60static inline void renormalize(
unsigned char* X,
unsigned char threshold)
65 unsigned char min = X[0];
67 for (i = 0; i < NUMSTATES; i++)
70 for (i = 0; i < NUMSTATES; i++)
77static inline void BFLY(
int i,
83 unsigned char* Branchtab)
85 int j, decision0, decision1;
86 unsigned char metric, m0, m1, m2, m3;
91 int PRECISIONSHIFT = 2;
94 for (j = 0; j < RATE; j++)
95 metric += (Branchtab[i + j * NUMSTATES / 2] ^ syms[s * RATE + j]) >> METRICSHIFT;
96 metric = metric >> PRECISIONSHIFT;
98 unsigned char max = ((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
101 m1 = X[i + NUMSTATES / 2] + (max - metric);
102 m2 = X[i] + (max - metric);
103 m3 = X[i + NUMSTATES / 2] + metric;
105 decision0 = (
signed int)(m0 - m1) > 0;
106 decision1 = (
signed int)(m2 - m3) > 0;
108 Y[2 * i] = decision0 ? m1 : m0;
109 Y[2 * i + 1] = decision1 ? m3 : m2;
111 d->
w[i / (
sizeof(
unsigned int) * 8 / 2) +
112 s * (
sizeof(
decision_t) /
sizeof(
unsigned int))] |=
113 (decision0 | decision1 << 1) << ((2 * i) & (
sizeof(
unsigned int) * 8 - 1));
324#include <emmintrin.h>
326#include <pmmintrin.h>
328#include <xmmintrin.h>
334 unsigned int framebits,
336 unsigned char* Branchtab)
339 for (i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
340 unsigned char a75, a81;
342 short int s20, s21, s26, s27;
343 unsigned char *a74, *a80, *b6;
344 short int *a110, *a111, *a91, *a93, *a94;
345 __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83, *a95, *a96, *a97, *a98, *a99;
347 __m128i a100, a101, a103, a104, a107, a108, a109, a76, a78, a79, a82, a84, a85,
348 a88, a89, a90, d10, d11, d12, d9, m23, m24, m25, m26, m27, m28, m29, m30, s18,
349 s19, s22, s23, s24, s25, s28, s29, t13, t14, t15, t16, t17, t18;
374 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
376 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
387 a91 = ((
short int*)dec);
416 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
418 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
440 if ((((
unsigned char*)Y)[0] > 210)) {
461 unsigned char a188, a194;
463 short int s48, s49, s54, s55;
464 unsigned char *a187, *a193, *b15;
465 short int *a204, *a206, *a207, *a223, *a224, *b16;
466 __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210, *a211, *a212, *a215,
468 __m128i a199, a200, a218, a219;
469 __m128i a189, a191, a192, a195, a197, a198, a201, a202, a203, a213, a214, a216,
470 a217, a220, a221, a222, d17, d18, d19, d20, m39, m40, m41, m42, m43, m44, m45,
471 m46, s46, s47, s50, s51, s52, s53, s56, s57, t25, t26, t27, t28, t29, t30;
496 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
498 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
509 a204 = ((
short int*)dec);
539 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
541 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
563 if ((((
unsigned char*)X)[0] > 210)) {
596 for (j = 0; j < (framebits + excess) % 2; ++j) {
598 for (i = 0; i < 64 / 2; i++) {
600 (((framebits + excess) >> 1) << 1) + j,
630 unsigned int framebits,
632 unsigned char* Branchtab)
635 for (i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
636 unsigned char a75, a81;
638 short int s20, s21, s26, s27;
639 unsigned char *a74, *a80, *b6;
640 short int *a110, *a111, *a91, *a93, *a94;
641 __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83, *a95, *a96, *a97, *a98, *a99;
643 __m128i a100, a101, a103, a104, a107, a108, a109, a76, a78, a79, a82, a84, a85,
644 a88, a89, a90, d10, d11, d12, d9, m23, m24, m25, m26, m27, m28, m29, m30, s18,
645 s19, s22, s23, s24, s25, s28, s29, t13, t14, t15, t16, t17, t18;
670 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
672 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
683 a91 = ((
short int*)dec);
712 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
714 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
736 if ((((
unsigned char*)Y)[0] > 210)) {
757 unsigned char a188, a194;
759 short int s48, s49, s54, s55;
760 unsigned char *a187, *a193, *b15;
761 short int *a204, *a206, *a207, *a223, *a224, *b16;
762 __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210, *a211, *a212, *a215,
764 __m128i a199, a200, a218, a219;
765 __m128i a189, a191, a192, a195, a197, a198, a201, a202, a203, a213, a214, a216,
766 a217, a220, a221, a222, d17, d18, d19, d20, m39, m40, m41, m42, m43, m44, m45,
767 m46, s46, s47, s50, s51, s52, s53, s56, s57, t25, t26, t27, t28, t29, t30;
792 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
794 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
805 a204 = ((
short int*)dec);
835 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
837 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
859 if ((((
unsigned char*)X)[0] > 210)) {
892 for (j = 0; j < (framebits + excess) % 2; ++j) {
894 for (i = 0; i < 64 / 2; i++) {
896 (((framebits + excess) >> 1) << 1) + j,
924 unsigned int framebits,
926 unsigned char* Branchtab)
928 int nbits = framebits + excess;
930 int RENORMALIZE_THRESHOLD = 210;
933 for (s = 0; s < nbits; s++) {
935 for (i = 0; i < NUMSTATES / 2; i++) {
944 Y = (
unsigned char*)tmp;