libmsnumpress
Numerical compression schemes for proteomics mass spectrometry data
Loading...
Searching...
No Matches
Functions | Variables
ms::numpress::MSNumpress Namespace Reference

Functions

static bool is_little_endian ()
 
static void encodeFixedPoint (double fixedPoint, unsigned char *result)
 
static double decodeFixedPoint (const unsigned char *data)
 
static void encodeInt (const unsigned int x, unsigned char *res, size_t *res_length)
 
static void decodeInt (const unsigned char *data, size_t *di, size_t max_di, size_t *half, unsigned int *res)
 
double optimalLinearFixedPointMass (const double *data, size_t dataSize, double mass_acc)
 
double optimalLinearFixedPoint (const double *data, size_t dataSize)
 
size_t encodeLinear (const double *data, size_t dataSize, unsigned char *result, double fixedPoint)
 
size_t decodeLinear (const unsigned char *data, const size_t dataSize, double *result)
 
void encodeLinear (const std::vector< double > &data, std::vector< unsigned char > &result, double fixedPoint)
 
void decodeLinear (const std::vector< unsigned char > &data, std::vector< double > &result)
 
size_t encodeSafe (const double *data, const size_t dataSize, unsigned char *result)
 
size_t decodeSafe (const unsigned char *data, const size_t dataSize, double *result)
 
size_t encodePic (const double *data, size_t dataSize, unsigned char *result)
 
size_t decodePic (const unsigned char *data, const size_t dataSize, double *result)
 
void encodePic (const std::vector< double > &data, std::vector< unsigned char > &result)
 
void decodePic (const std::vector< unsigned char > &data, std::vector< double > &result)
 
double optimalSlofFixedPoint (const double *data, size_t dataSize)
 
size_t encodeSlof (const double *data, size_t dataSize, unsigned char *result, double fixedPoint)
 
size_t decodeSlof (const unsigned char *data, const size_t dataSize, double *result)
 
void encodeSlof (const std::vector< double > &data, std::vector< unsigned char > &result, double fixedPoint)
 
void decodeSlof (const std::vector< unsigned char > &data, std::vector< double > &result)
 

Variables

const int ONE = 1
 
bool IS_LITTLE_ENDIAN = is_little_endian()
 

Function Documentation

◆ decodeFixedPoint()

static double ms::numpress::MSNumpress::decodeFixedPoint ( const unsigned char *  data)
static

Definition at line 62 of file MSNumpress.cpp.

64 {
65 int i;
66 double fixedPoint;
67 unsigned char *fp = (unsigned char*)&fixedPoint;
68
69 for (i=0; i<8; i++) {
70 fp[i] = data[IS_LITTLE_ENDIAN ? (7-i) : i];
71 }
72
73 return fixedPoint;
74}

References IS_LITTLE_ENDIAN.

Referenced by decodeLinear(), and decodeSlof().

Here is the caller graph for this function:

◆ decodeInt()

static void ms::numpress::MSNumpress::decodeInt ( const unsigned char *  data,
size_t *  di,
size_t  max_di,
size_t *  half,
unsigned int *  res 
)
static

Decodes an int from the half bytes in bp. Lossless reverse of encodeInt

Parameters
dataptr to the char data to decode
diposition in the char data array to start decoding (will be advanced)
max_disize of data array
halfhelper variable (do not change between multiple calls)
resresult (a 32 bit integer)
Note
the helper variable indicates whether we look at the first half byte or second half byte of the current data (thus whether to interpret the first half byte of data[*di] or the second half byte).

Definition at line 151 of file MSNumpress.cpp.

157 {
158 size_t n, i;
159 unsigned int mask, m;
160 unsigned char head;
161 unsigned char hb;
162
163 // Extract the first half byte, specifying the number of leading zero half
164 // bytes of the final integer.
165 // If half is zero, we look at the first half byte, otherwise we look at
166 // the second (lower) half byte and advance the counter to the next char.
167 if (*half == 0) {
168 head = data[*di] >> 4;
169 } else {
170 head = data[*di] & 0xf;
171 (*di)++;
172 }
173
174 *half = 1-(*half); // switch to other half byte
175 *res = 0;
176
177 if (head <= 8) {
178 n = head;
179 } else { // we have n leading ones, fill n half bytes in res with 0xf
180 n = head - 8;
181 mask = 0xf0000000;
182 for (i=0; i<n; i++) {
183 m = mask >> (4*i);
184 *res = *res | m;
185 }
186 }
187
188 if (n == 8) {
189 return;
190 }
191
192 if (*di + ((8 - n) - (1 - *half)) / 2 >= max_di) {
193 throw "[MSNumpress::decodeInt] Corrupt input data! ";
194 }
195
196 for (i=n; i<8; i++) {
197 if (*half == 0) {
198 hb = data[*di] >> 4;
199 } else {
200 hb = data[*di] & 0xf;
201 (*di)++;
202 }
203 *res = *res | ( static_cast<unsigned int>(hb) << ((i-n)*4));
204 *half = 1 - (*half);
205 }
206}

Referenced by decodeLinear(), and decodePic().

Here is the caller graph for this function:

◆ decodeLinear() [1/2]

void ms::numpress::MSNumpress::decodeLinear ( const std::vector< unsigned char > &  data,
std::vector< double > &  result 
)

Calls lower level decodeLinear while handling vector sizes appropriately

Note that this method may throw a const char* if it deems the input data to be corrupt, i.e.. that the last encoded int does not use the last byte in the data. In addition the last encoded int need to use either the last halfbyte, or the second last followed by a 0x0 halfbyte.

@data vector of bytes to be decoded

Returns
vector of resulting double (will be resized to the number of doubles)

Definition at line 451 of file MSNumpress.cpp.

454 {
455 size_t dataSize = data.size();
456 result.resize((dataSize - 8) * 2);
457 size_t decodedLength = decodeLinear(&data[0], dataSize, &result[0]);
458 result.resize(decodedLength);
459}
size_t decodeLinear(const unsigned char *data, const size_t dataSize, double *result)

References decodeLinear().

Here is the call graph for this function:

◆ decodeLinear() [2/2]

size_t ms::numpress::MSNumpress::decodeLinear ( const unsigned char *  data,
const size_t  dataSize,
double *  result 
)

Decodes data encoded by encodeLinear.

result vector guaranteed to be shorter or equal to (|data| - 8) * 2

Note that this method may throw a const char* if it deems the input data to be corrupt, i.e. that the last encoded int does not use the last byte in the data. In addition the last encoded int need to use either the last halfbyte, or the second last followed by a 0x0 halfbyte.

@data pointer to array of bytes to be decoded (need memorycont. repr.) @dataSize number of bytes from *data to decode

Returns
pointer to were resulting doubles should be stored
the number of decoded doubles, or -1 if dataSize < 4 or 4 < dataSize < 8

Definition at line 361 of file MSNumpress.cpp.

365 {
366 size_t i;
367 size_t ri = 0;
368 unsigned int init, buff;
369 int diff;
370 long long ints[3];
371 //double d;
372 size_t di;
373 size_t half;
374 long long extrapol;
375 long long y;
376 double fixedPoint;
377
378 //printf("Decoding %d bytes with fixed point %f\n", (int)dataSize, fixedPoint);
379
380 if (dataSize == 8) return 0;
381
382 if (dataSize < 8)
383 throw "[MSNumpress::decodeLinear] Corrupt input data: not enough bytes to read fixed point! ";
384
385 fixedPoint = decodeFixedPoint(data);
386
387
388 if (dataSize < 12)
389 throw "[MSNumpress::decodeLinear] Corrupt input data: not enough bytes to read first value! ";
390
391 ints[1] = 0;
392 for (i=0; i<4; i++) {
393 ints[1] = ints[1] | ((0xff & (init = data[8+i])) << (i*8));
394 }
395 result[0] = ints[1] / fixedPoint;
396
397 if (dataSize == 12) return 1;
398 if (dataSize < 16)
399 throw "[MSNumpress::decodeLinear] Corrupt input data: not enough bytes to read second value! ";
400
401 ints[2] = 0;
402 for (i=0; i<4; i++) {
403 ints[2] = ints[2] | ((0xff & (init = data[12+i])) << (i*8));
404 }
405 result[1] = ints[2] / fixedPoint;
406
407 half = 0;
408 ri = 2;
409 di = 16;
410
411 //printf(" di ri half int[0] int[1] extrapol diff\n");
412
413 while (di < dataSize) {
414 if (di == (dataSize - 1) && half == 1) {
415 if ((data[di] & 0xf) == 0x0) {
416 break;
417 }
418 }
419 //printf("%7d %7d %7d %lu %lu %ld", di, ri, half, ints[0], ints[1], extrapol);
420
421 ints[0] = ints[1];
422 ints[1] = ints[2];
423 decodeInt(data, &di, dataSize, &half, &buff);
424 diff = static_cast<int>(buff);
425
426 extrapol = ints[1] + (ints[1] - ints[0]);
427 y = extrapol + diff;
428 //printf(" %d \n", diff);
429 result[ri++] = y / fixedPoint;
430 ints[2] = y;
431 }
432
433 return ri;
434}
static double decodeFixedPoint(const unsigned char *data)
static void decodeInt(const unsigned char *data, size_t *di, size_t max_di, size_t *half, unsigned int *res)

References decodeFixedPoint(), and decodeInt().

Referenced by decodeLinear(), decodeLinearCorrupt1(), decodeLinearCorrupt2(), decodeLinearNice(), decodeLinearNiceLowFP(), decodeLinearWierd(), encodeDecodeLinear(), encodeDecodeLinear5(), and encodeDecodeLinearStraight().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ decodePic() [1/2]

void ms::numpress::MSNumpress::decodePic ( const std::vector< unsigned char > &  data,
std::vector< double > &  result 
)

Calls lower level decodePic while handling vector sizes appropriately

Note that this method may throw a const char* if it deems the input data to be corrupt, i.e. that the last encoded int does not use the last byte in the data. In addition the last encoded int need to use either the last halfbyte, or the second last followed by a 0x0 halfbyte.

@data vector of bytes to be decoded

Returns
vector of resulting double (will be resized to the number of doubles)

Definition at line 665 of file MSNumpress.cpp.

668 {
669 size_t dataSize = data.size();
670 result.resize(dataSize * 2);
671 size_t decodedLength = decodePic(&data[0], dataSize, &result[0]);
672 result.resize(decodedLength);
673}
size_t decodePic(const unsigned char *data, const size_t dataSize, double *result)

References decodePic().

Here is the call graph for this function:

◆ decodePic() [2/2]

size_t ms::numpress::MSNumpress::decodePic ( const unsigned char *  data,
const size_t  dataSize,
double *  result 
)

Decodes data encoded by encodePic

result vector guaranteed to be shorter of equal to |data| * 2

Note that this method may throw a const char* if it deems the input data to be corrupt, i.e. that the last encoded int does not use the last byte in the data. In addition the last encoded int need to use either the last halfbyte, or the second last followed by a 0x0 halfbyte.

@data pointer to array of bytes to be decoded (need memorycont. repr.) @dataSize number of bytes from *data to decode

Returns
pointer to were resulting doubles should be stored
the number of decoded doubles

Definition at line 617 of file MSNumpress.cpp.

621 {
622 size_t ri;
623 unsigned int x;
624 size_t di;
625 size_t half;
626
627 //printf("ri di half dSize count\n");
628
629 half = 0;
630 ri = 0;
631 di = 0;
632
633 while (di < dataSize) {
634 if (di == (dataSize - 1) && half == 1) {
635 if ((data[di] & 0xf) == 0x0) {
636 break;
637 }
638 }
639
640 decodeInt(&data[0], &di, dataSize, &half, &x);
641
642 //printf("%7d %7d %7d %7d %7d\n", ri, di, half, dataSize, count);
643
644 //printf("count: %d \n", count);
645 result[ri++] = static_cast<double>(x);
646 }
647
648 return ri;
649}

References decodeInt().

Referenced by decodePic(), encodeDecodePic(), encodeDecodePic5(), and testErroneousDecodePic().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ decodeSafe()

size_t ms::numpress::MSNumpress::decodeSafe ( const unsigned char *  data,
const size_t  dataSize,
double *  result 
)

Decodes data encoded by encodeSafe.

result vector is the same size as the input data.

Might throw const char* is something goes wrong during decoding.

@data pointer to array of bytes to be decoded (need memorycont. repr.) @dataSize number of bytes from *data to decode

Returns
pointer to were resulting doubles should be stored
the number of decoded bytes

Definition at line 510 of file MSNumpress.cpp.

514 {
515 size_t i, di, ri;
516 double extrapol, diff;
517 double latest[3];
518 unsigned char *fp;
519
520 if (dataSize % 8 != 0)
521 throw "[MSNumpress::decodeSafe] Corrupt input data: number of bytes needs to be multiple of 8! ";
522
523 //printf("d0 d1 extrapol diff\td2\n");
524
525 try {
526 fp = (unsigned char*)&(latest[1]);
527 for (i=0; i<8; i++) {
528 fp[i] = data[IS_LITTLE_ENDIAN ? (7-i) : i];
529 }
530 result[0] = latest[1];
531
532 if (dataSize == 8) return 1;
533
534 fp = (unsigned char*)&(latest[2]);
535 for (i=0; i<8; i++) {
536 fp[i] = data[8 + (IS_LITTLE_ENDIAN ? (7-i) : i)];
537 }
538 result[1] = latest[2];
539
540 ri = 2;
541
542 fp = (unsigned char*)&diff;
543 for (di = 16; di < dataSize; di += 8) {
544 latest[0] = latest[1];
545 latest[1] = latest[2];
546
547 for (i=0; i<8; i++) {
548 fp[i] = data[di + (IS_LITTLE_ENDIAN ? (7-i) : i)];
549 }
550
551 extrapol = latest[1] + (latest[1] - latest[0]);
552 latest[2] = extrapol + diff;
553
554 //printf("%f %f %f %f\t%f \n", latest[0], latest[1], extrapol, diff, latest[2]);
555
556 result[ri++] = latest[2];
557 }
558 } catch (...) {
559 throw "[MSNumpress::decodeSafe] Unknown error during decode! ";
560 }
561
562 return ri;
563}

References IS_LITTLE_ENDIAN.

Referenced by encodeDecodeSafe(), and encodeDecodeSafeStraight().

Here is the caller graph for this function:

◆ decodeSlof() [1/2]

void ms::numpress::MSNumpress::decodeSlof ( const std::vector< unsigned char > &  data,
std::vector< double > &  result 
)

Calls lower level decodeSlof while handling vector sizes appropriately

Note that this method may throw a const char* if it deems the input data to be corrupt.

@data vector of bytes to be decoded

Returns
vector of resulting double (will be resized to the number of doubles)

Definition at line 770 of file MSNumpress.cpp.

773 {
774 size_t dataSize = data.size();
775 result.resize((dataSize - 8) / 2);
776 size_t decodedLength = decodeSlof(&data[0], dataSize, &result[0]);
777 result.resize(decodedLength);
778}
size_t decodeSlof(const unsigned char *data, const size_t dataSize, double *result)

References decodeSlof().

Here is the call graph for this function:

◆ decodeSlof() [2/2]

size_t ms::numpress::MSNumpress::decodeSlof ( const unsigned char *  data,
const size_t  dataSize,
double *  result 
)

Decodes data encoded by encodeSlof

The return will include exactly (|data| - 8) / 2 doubles.

Note that this method may throw a const char* if it deems the input data to be corrupt.

@data pointer to array of bytes to be decoded (need memorycont. repr.) @dataSize number of bytes from *data to decode

Returns
pointer to were resulting doubles should be stored
the number of decoded doubles

Definition at line 733 of file MSNumpress.cpp.

737 {
738 size_t i, ri;
739 unsigned short x;
740 double fixedPoint;
741
742 if (dataSize < 8)
743 throw "[MSNumpress::decodeSlof] Corrupt input data: not enough bytes to read fixed point! ";
744
745 ri = 0;
746 fixedPoint = decodeFixedPoint(data);
747
748 for (i=8; i<dataSize; i+=2) {
749 x = static_cast<unsigned short>(data[i] | (data[i+1] << 8));
750 result[ri++] = exp(x / fixedPoint) - 1;
751 }
752 return ri;
753}

References decodeFixedPoint().

Referenced by decodeSlof(), encodeDecodeSlof(), and encodeDecodeSlof5().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ encodeFixedPoint()

static void ms::numpress::MSNumpress::encodeFixedPoint ( double  fixedPoint,
unsigned char *  result 
)
static

Definition at line 49 of file MSNumpress.cpp.

52 {
53 int i;
54 unsigned char *fp = (unsigned char*)&fixedPoint;
55 for (i=0; i<8; i++) {
56 result[i] = fp[IS_LITTLE_ENDIAN ? (7-i) : i];
57 }
58}

References IS_LITTLE_ENDIAN.

Referenced by encodeLinear(), and encodeSlof().

Here is the caller graph for this function:

◆ encodeInt()

static void ms::numpress::MSNumpress::encodeInt ( const unsigned int  x,
unsigned char *  res,
size_t *  res_length 
)
static

Encodes the int x as a number of halfbytes in res. res_length is incremented by the number of halfbytes, which will be 1 <= n <= 9

Definition at line 83 of file MSNumpress.cpp.

87 {
88 // get the bit pattern of a signed int x_inp
89 unsigned int m;
90 unsigned char i, l; // numbers between 0 and 9
91
92 unsigned int mask = 0xf0000000;
93 unsigned int init = x & mask;
94
95 if (init == 0) {
96 l = 8;
97 for (i=0; i<8; i++) {
98 m = mask >> (4*i);
99 if ((x & m) != 0) {
100 l = i;
101 break;
102 }
103 }
104 res[0] = l;
105 for (i=l; i<8; i++) {
106 res[1+i-l] = static_cast<unsigned char>( x >> (4*(i-l)) );
107 }
108 *res_length += 1+8-l;
109
110 } else if (init == mask) {
111 l = 7;
112 for (i=0; i<8; i++) {
113 m = mask >> (4*i);
114 if ((x & m) != m) {
115 l = i;
116 break;
117 }
118 }
119 res[0] = l + 8;
120 for (i=l; i<8; i++) {
121 res[1+i-l] = static_cast<unsigned char>( x >> (4*(i-l)) );
122 }
123 *res_length += 1+8-l;
124
125 } else {
126 res[0] = 0;
127 for (i=0; i<8; i++) {
128 res[1+i] = static_cast<unsigned char>( x >> (4*i) );
129 }
130 *res_length += 9;
131
132 }
133}

Referenced by encodeLinear(), and encodePic().

Here is the caller graph for this function:

◆ encodeLinear() [1/2]

size_t ms::numpress::MSNumpress::encodeLinear ( const double *  data,
const size_t  dataSize,
unsigned char *  result,
double  fixedPoint 
)

Encodes the doubles in data by first using a

  • lossy conversion to a 4 byte 5 decimal fixed point representation
  • storing the residuals from a linear prediction after first two values
  • encoding by encodeInt (see above)

The resulting binary is maximally 8 + dataSize * 5 bytes, but much less if the data is reasonably smooth on the first order.

This encoding is suitable for typical m/z or retention time binary arrays. On a test set, the encoding was empirically show to be accurate to at least 0.002 ppm.

@data pointer to array of double to be encoded (need memorycont. repr.) @dataSize number of doubles from *data to encode

Returns
pointer to where resulting bytes should be stored @fixedPoint the scaling factor used for getting the fixed point repr. This is stored in the binary and automatically extracted on decoding.
the number of encoded bytes

Definition at line 270 of file MSNumpress.cpp.

275 {
276 long long ints[3];
277 size_t i, ri;
278 unsigned char halfBytes[10];
279 size_t halfByteCount;
280 size_t hbi;
281 long long extrapol;
282 int diff;
283
284 //printf("Encoding %d doubles with fixed point %f\n", (int)dataSize, fixedPoint);
285 encodeFixedPoint(fixedPoint, result);
286
287
288 if (dataSize == 0) return 8;
289
290 ints[1] = static_cast<long long>(data[0] * fixedPoint + 0.5);
291 for (i=0; i<4; i++) {
292 result[8+i] = (ints[1] >> (i*8)) & 0xff;
293 }
294
295 if (dataSize == 1) return 12;
296
297 ints[2] = static_cast<long long>(data[1] * fixedPoint + 0.5);
298 for (i=0; i<4; i++) {
299 result[12+i] = (ints[2] >> (i*8)) & 0xff;
300 }
301
302 halfByteCount = 0;
303 ri = 16;
304
305 for (i=2; i<dataSize; i++) {
306 ints[0] = ints[1];
307 ints[1] = ints[2];
308 if (THROW_ON_OVERFLOW &&
309 data[i] * fixedPoint + 0.5 > LLONG_MAX ) {
310 throw "[MSNumpress::encodeLinear] Next number overflows LLONG_MAX.";
311 }
312
313 ints[2] = static_cast<long long>(data[i] * fixedPoint + 0.5);
314 extrapol = ints[1] + (ints[1] - ints[0]);
315
316 if (THROW_ON_OVERFLOW &&
317 ( ints[2] - extrapol > INT_MAX
318 || ints[2] - extrapol < INT_MIN )) {
319 throw "[MSNumpress::encodeLinear] Cannot encode a number that exceeds the bounds of [-INT_MAX, INT_MAX].";
320 }
321
322 diff = static_cast<int>(ints[2] - extrapol);
323 //printf("%lu %lu %lu, extrapol: %ld diff: %d \n", ints[0], ints[1], ints[2], extrapol, diff);
324 encodeInt(
325 static_cast<unsigned int>(diff),
326 &halfBytes[halfByteCount],
327 &halfByteCount
328 );
329 /*
330 printf("%d (%d): ", diff, (int)halfByteCount);
331 for (size_t j=0; j<halfByteCount; j++) {
332 printf("%x ", halfBytes[j] & 0xf);
333 }
334 printf("\n");
335 */
336
337
338 for (hbi=1; hbi < halfByteCount; hbi+=2) {
339 result[ri] = static_cast<unsigned char>(
340 (halfBytes[hbi-1] << 4) | (halfBytes[hbi] & 0xf)
341 );
342 //printf("%x \n", result[ri]);
343 ri++;
344 }
345 if (halfByteCount % 2 != 0) {
346 halfBytes[0] = halfBytes[halfByteCount-1];
347 halfByteCount = 1;
348 } else {
349 halfByteCount = 0;
350 }
351 }
352 if (halfByteCount == 1) {
353 result[ri] = static_cast<unsigned char>(halfBytes[0] << 4);
354 ri++;
355 }
356 return ri;
357}
#define THROW_ON_OVERFLOW
static void encodeFixedPoint(double fixedPoint, unsigned char *result)
static void encodeInt(const unsigned int x, unsigned char *res, size_t *res_length)

References encodeFixedPoint(), encodeInt(), and THROW_ON_OVERFLOW.

Referenced by decodeLinearCorrupt2(), decodeLinearNice(), decodeLinearNiceLowFP(), decodeLinearWierd(), decodeLinearWierd_int_overflow(), decodeLinearWierd_int_underflow(), decodeLinearWierd_llong_overflow(), encodeDecodeLinear(), encodeDecodeLinear5(), encodeDecodeLinearStraight(), encodeLinear(), and encodeLinear1().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ encodeLinear() [2/2]

void ms::numpress::MSNumpress::encodeLinear ( const std::vector< double > &  data,
std::vector< unsigned char > &  result,
double  fixedPoint 
)

Calls lower level encodeLinear while handling vector sizes appropriately

@data vector of doubles to be encoded

Returns
vector of resulting bytes (will be resized to the number of bytes)

Definition at line 438 of file MSNumpress.cpp.

442 {
443 size_t dataSize = data.size();
444 result.resize(dataSize * 5 + 8);
445 size_t encodedLength = encodeLinear(&data[0], dataSize, &result[0], fixedPoint);
446 result.resize(encodedLength);
447}
void encodeLinear()

References encodeLinear().

Here is the call graph for this function:

◆ encodePic() [1/2]

size_t ms::numpress::MSNumpress::encodePic ( const double *  data,
const size_t  dataSize,
unsigned char *  result 
)

Encodes ion counts by simply rounding to the nearest 4 byte integer, and compressing each integer with encodeInt.

The handleable range is therefore 0 -> 4294967294. The resulting binary is maximally dataSize * 5 bytes, but much less if the data is close to 0 on average.

@data pointer to array of double to be encoded (need memorycont. repr.) @dataSize number of doubles from *data to encode

Returns
pointer to were resulting bytes should be stored
the number of encoded bytes

Definition at line 568 of file MSNumpress.cpp.

572 {
573 size_t i, ri;
574 unsigned int x;
575 unsigned char halfBytes[10];
576 size_t halfByteCount;
577 size_t hbi;
578
579 //printf("Encoding %d doubles\n", (int)dataSize);
580
581 halfByteCount = 0;
582 ri = 0;
583
584 for (i=0; i<dataSize; i++) {
585
586 if (THROW_ON_OVERFLOW &&
587 (data[i] + 0.5 > INT_MAX || data[i] < -0.5) ){
588 throw "[MSNumpress::encodePic] Cannot use Pic to encode a number larger than INT_MAX or smaller than 0.";
589 }
590 x = static_cast<unsigned int>(data[i] + 0.5);
591 //printf("%d %d %d, extrapol: %d diff: %d \n", ints[0], ints[1], ints[2], extrapol, diff);
592 encodeInt(x, &halfBytes[halfByteCount], &halfByteCount);
593
594 for (hbi=1; hbi < halfByteCount; hbi+=2) {
595 result[ri] = static_cast<unsigned char>(
596 (halfBytes[hbi-1] << 4) | (halfBytes[hbi] & 0xf)
597 );
598 //printf("%x \n", result[ri]);
599 ri++;
600 }
601 if (halfByteCount % 2 != 0) {
602 halfBytes[0] = halfBytes[halfByteCount-1];
603 halfByteCount = 1;
604 } else {
605 halfByteCount = 0;
606 }
607 }
608 if (halfByteCount == 1) {
609 result[ri] = static_cast<unsigned char>(halfBytes[0] << 4);
610 ri++;
611 }
612 return ri;
613}

References encodeInt(), and THROW_ON_OVERFLOW.

Referenced by encodeDecodePic(), encodeDecodePic5(), and encodePic().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ encodePic() [2/2]

void ms::numpress::MSNumpress::encodePic ( const std::vector< double > &  data,
std::vector< unsigned char > &  result 
)

Calls lower level encodePic while handling vector sizes appropriately

@data vector of doubles to be encoded

Returns
vector of resulting bytes (will be resized to the number of bytes)

Definition at line 653 of file MSNumpress.cpp.

656 {
657 size_t dataSize = data.size();
658 result.resize(dataSize * 5);
659 size_t encodedLength = encodePic(&data[0], dataSize, &result[0]);
660 result.resize(encodedLength);
661}
size_t encodePic(const double *data, size_t dataSize, unsigned char *result)

References encodePic().

Here is the call graph for this function:

◆ encodeSafe()

size_t ms::numpress::MSNumpress::encodeSafe ( const double *  data,
const size_t  dataSize,
unsigned char *  result 
)

Encodes the doubles in data by storing the residuals from a linear prediction after first two values.

The resulting binary is the same size as the input data.

This encoding is suitable for typical m/z or retention time binary arrays, and is intended to be used before zlib compression to improve compression.

@data pointer to array of doubles to be encoded (need memorycont. repr.) @dataSize number of doubles from *data to encode

Returns
pointer to were resulting bytes should be stored

Definition at line 464 of file MSNumpress.cpp.

468 {
469 size_t i, j, ri = 0;
470 double latest[3];
471 double extrapol, diff;
472 const unsigned char *fp;
473
474 //printf("d0 d1 d2 extrapol diff\n");
475
476 if (dataSize == 0) return ri;
477
478 latest[1] = data[0];
479 fp = (unsigned char*)data;
480 for (i=0; i<8; i++) {
481 result[ri++] = fp[IS_LITTLE_ENDIAN ? (7-i) : i];
482 }
483
484 if (dataSize == 1) return ri;
485
486 latest[2] = data[1];
487 fp = (unsigned char*)&(data[1]);
488 for (i=0; i<8; i++) {
489 result[ri++] = fp[IS_LITTLE_ENDIAN ? (7-i) : i];
490 }
491
492 fp = (unsigned char*)&diff;
493 for (i=2; i<dataSize; i++) {
494 latest[0] = latest[1];
495 latest[1] = latest[2];
496 latest[2] = data[i];
497 extrapol = latest[1] + (latest[1] - latest[0]);
498 diff = latest[2] - extrapol;
499 //printf("%f %f %f %f %f\n", latest[0], latest[1], latest[2], extrapol, diff);
500 for (j=0; j<8; j++) {
501 result[ri++] = fp[IS_LITTLE_ENDIAN ? (7-j) : j];
502 }
503 }
504
505 return ri;
506}

References IS_LITTLE_ENDIAN.

Referenced by encodeDecodeSafe(), and encodeDecodeSafeStraight().

Here is the caller graph for this function:

◆ encodeSlof() [1/2]

size_t ms::numpress::MSNumpress::encodeSlof ( const double *  data,
const size_t  dataSize,
unsigned char *  result,
double  fixedPoint 
)

Encodes ion counts by taking the natural logarithm, and storing a fixed point representation of this. This is calculated as

unsigned short fp = log(d + 1) * fixedPoint + 0.5

the result vector is exactly |data| * 2 + 8 bytes long

@data pointer to array of double to be encoded (need memorycont. repr.) @dataSize number of doubles from *data to encode

Returns
pointer to were resulting bytes should be stored
the number of encoded bytes

Definition at line 704 of file MSNumpress.cpp.

709 {
710 size_t i, ri;
711 double temp;
712 unsigned short x;
713 encodeFixedPoint(fixedPoint, result);
714
715 ri = 8;
716 for (i=0; i<dataSize; i++) {
717 temp = log(data[i]+1) * fixedPoint;
718
719 if (THROW_ON_OVERFLOW &&
720 temp > USHRT_MAX ) {
721 throw "[MSNumpress::encodeSlof] Cannot encode a number that overflows USHRT_MAX.";
722 }
723
724 x = static_cast<unsigned short>(temp + 0.5);
725 result[ri++] = x & 0xff;
726 result[ri++] = (x >> 8) & 0xff;
727 }
728 return ri;
729}

References encodeFixedPoint(), and THROW_ON_OVERFLOW.

Referenced by encodeDecodeSlof(), encodeDecodeSlof5(), and encodeSlof().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ encodeSlof() [2/2]

void ms::numpress::MSNumpress::encodeSlof ( const std::vector< double > &  data,
std::vector< unsigned char > &  result,
double  fixedPoint 
)

Calls lower level encodeSlof while handling vector sizes appropriately

@data vector of doubles to be encoded

Returns
vector of resulting bytes (will be resized to the number of bytes)

Definition at line 757 of file MSNumpress.cpp.

761 {
762 size_t dataSize = data.size();
763 result.resize(dataSize * 2 + 8);
764 size_t encodedLength = encodeSlof(&data[0], dataSize, &result[0], fixedPoint);
765 result.resize(encodedLength);
766}
size_t encodeSlof(const double *data, size_t dataSize, unsigned char *result, double fixedPoint)

References encodeSlof().

Here is the call graph for this function:

◆ is_little_endian()

static bool ms::numpress::MSNumpress::is_little_endian ( )
static

Definition at line 40 of file MSNumpress.cpp.

40 {
41 return *((char*)&(ONE)) == 1;
42}

References ONE.

◆ optimalLinearFixedPoint()

double ms::numpress::MSNumpress::optimalLinearFixedPoint ( const double *  data,
size_t  dataSize 
)

Compute the maximal linear fixed point that prevents integer overflow.

@data pointer to array of double to be encoded (need memorycont. repr.) @dataSize number of doubles from *data to encode

Returns
the linear fixed point safe to use

Definition at line 234 of file MSNumpress.cpp.

237 {
238 /*
239 * safer impl - apparently not needed though
240 *
241 if (dataSize == 0) return 0;
242
243 double maxDouble = 0;
244 double x;
245
246 for (size_t i=0; i<dataSize; i++) {
247 x = data[i];
248 maxDouble = max(maxDouble, x);
249 }
250
251 return floor(0xFFFFFFFF / maxDouble);
252 */
253 if (dataSize == 0) return 0;
254 if (dataSize == 1) return floor(0x7FFFFFFFl / data[0]);
255 double maxDouble = max(data[0], data[1]);
256 double extrapol;
257 double diff;
258
259 for (size_t i=2; i<dataSize; i++) {
260 extrapol = data[i-1] + (data[i-1] - data[i-2]);
261 diff = data[i] - extrapol;
262 maxDouble = max(maxDouble, ceil(abs(diff)+1));
263 }
264
265 return floor(0x7FFFFFFFl / maxDouble);
266}

Referenced by decodeLinearCorrupt2(), decodeLinearNice(), decodeLinearWierd(), encodeDecodeLinear(), encodeDecodeLinear5(), encodeDecodeLinearStraight(), and optimalLinearFixedPoint().

Here is the caller graph for this function:

◆ optimalLinearFixedPointMass()

double ms::numpress::MSNumpress::optimalLinearFixedPointMass ( const double *  data,
size_t  dataSize,
double  mass_acc 
)

Compute the optimal linear fixed point with a desired m/z accuracy.

Note
If the desired accuracy cannot be reached without overflowing 64 bit integers, then a negative value is returned. You need to check for this and in that case abandon numpress or use optimalLinearFixedPoint which returns the largest safe value.

@data pointer to array of double to be encoded (need memorycont. repr.) @dataSize number of doubles from *data to encode @mass_acc desired m/z accuracy in Th

Returns
the linear fixed point that satisfies the accuracy requirement (or -1 in case of failure).

Definition at line 213 of file MSNumpress.cpp.

217 {
218 if (dataSize < 3) return 0; // we just encode the first two points as floats
219
220 // We calculate the maximal fixedPoint we need to achieve a specific mass
221 // accuracy. Note that the maximal error we will make by encoding as int is
222 // 0.5 due to rounding errors.
223 double maxFP = 0.5 / mass_acc;
224
225 // There is a maximal value for the FP given by the int length (32bit)
226 // which means we cannot choose a value higher than that. In case we cannot
227 // achieve the desired accuracy, return failure (-1).
228 double maxFP_overflow = optimalLinearFixedPoint(data, dataSize);
229 if (maxFP > maxFP_overflow) return -1;
230
231 return maxFP;
232}
void optimalLinearFixedPoint()

References optimalLinearFixedPoint().

Referenced by decodeLinearNiceLowFP(), and optimalLinearFixedPointMass().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ optimalSlofFixedPoint()

double ms::numpress::MSNumpress::optimalSlofFixedPoint ( const double *  data,
size_t  dataSize 
)

Definition at line 679 of file MSNumpress.cpp.

682 {
683 if (dataSize == 0) return 0;
684
685 double maxDouble = 1;
686 double x;
687 double fp;
688
689 for (size_t i=0; i<dataSize; i++) {
690 x = log(data[i]+1);
691 maxDouble = max(maxDouble, x);
692 }
693
694 fp = floor(0xFFFF / maxDouble);
695
696 //cout << " max val: " << maxDouble << endl;
697 //cout << "fixed point: " << fp << endl;
698
699 return fp;
700}

Referenced by encodeDecodeSlof(), encodeDecodeSlof5(), and optimalSlofFixedPoint().

Here is the caller graph for this function:

Variable Documentation

◆ IS_LITTLE_ENDIAN

bool ms::numpress::MSNumpress::IS_LITTLE_ENDIAN = is_little_endian()

Definition at line 43 of file MSNumpress.cpp.

Referenced by decodeFixedPoint(), decodeSafe(), encodeFixedPoint(), and encodeSafe().

◆ ONE

const int ms::numpress::MSNumpress::ONE = 1

Definition at line 39 of file MSNumpress.cpp.

Referenced by is_little_endian().