libmsnumpress
Numerical compression schemes for proteomics mass spectrometry data
Functions | Variables
ms::numpress::MSNumpress Namespace Reference

Functions

static bool is_little_endian ()
 
static void encodeFixedPoint (double fixedPoint, unsigned char *result)
 
static double decodeFixedPoint (const unsigned char *data)
 
static void encodeInt (const unsigned int x, unsigned char *res, size_t *res_length)
 
static void decodeInt (const unsigned char *data, size_t *di, size_t max_di, size_t *half, unsigned int *res)
 
double optimalLinearFixedPointMass (const double *data, size_t dataSize, double mass_acc)
 
double optimalLinearFixedPoint (const double *data, size_t dataSize)
 
size_t encodeLinear (const double *data, size_t dataSize, unsigned char *result, double fixedPoint)
 
size_t decodeLinear (const unsigned char *data, const size_t dataSize, double *result)
 
void encodeLinear (const std::vector< double > &data, std::vector< unsigned char > &result, double fixedPoint)
 
void decodeLinear (const std::vector< unsigned char > &data, std::vector< double > &result)
 
size_t encodeSafe (const double *data, const size_t dataSize, unsigned char *result)
 
size_t decodeSafe (const unsigned char *data, const size_t dataSize, double *result)
 
size_t encodePic (const double *data, size_t dataSize, unsigned char *result)
 
size_t decodePic (const unsigned char *data, const size_t dataSize, double *result)
 
void encodePic (const std::vector< double > &data, std::vector< unsigned char > &result)
 
void decodePic (const std::vector< unsigned char > &data, std::vector< double > &result)
 
double optimalSlofFixedPoint (const double *data, size_t dataSize)
 
size_t encodeSlof (const double *data, size_t dataSize, unsigned char *result, double fixedPoint)
 
size_t decodeSlof (const unsigned char *data, const size_t dataSize, double *result)
 
void encodeSlof (const std::vector< double > &data, std::vector< unsigned char > &result, double fixedPoint)
 
void decodeSlof (const std::vector< unsigned char > &data, std::vector< double > &result)
 

Variables

const int ONE = 1
 
bool IS_LITTLE_ENDIAN = is_little_endian()
 

Function Documentation

◆ decodeFixedPoint()

static double ms::numpress::MSNumpress::decodeFixedPoint ( const unsigned char *  data)
static

Definition at line 62 of file MSNumpress.cpp.

Referenced by decodeLinear(), and decodeSlof().

64  {
65  int i;
66  double fixedPoint;
67  unsigned char *fp = (unsigned char*)&fixedPoint;
68 
69  for (i=0; i<8; i++) {
70  fp[i] = data[IS_LITTLE_ENDIAN ? (7-i) : i];
71  }
72 
73  return fixedPoint;
74 }
Here is the caller graph for this function:

◆ decodeInt()

static void ms::numpress::MSNumpress::decodeInt ( const unsigned char *  data,
size_t *  di,
size_t  max_di,
size_t *  half,
unsigned int *  res 
)
static

Decodes an int from the half bytes in bp. Lossless reverse of encodeInt

Parameters
dataptr to the char data to decode
diposition in the char data array to start decoding (will be advanced)
max_disize of data array
halfhelper variable (do not change between multiple calls)
resresult (a 32 bit integer)
Note
the helper variable indicates whether we look at the first half byte or second half byte of the current data (thus whether to interpret the first half byte of data[*di] or the second half byte).

Definition at line 151 of file MSNumpress.cpp.

Referenced by decodeLinear(), and decodePic().

157  {
158  size_t n, i;
159  unsigned int mask, m;
160  unsigned char head;
161  unsigned char hb;
162 
163  // Extract the first half byte, specifying the number of leading zero half
164  // bytes of the final integer.
165  // If half is zero, we look at the first half byte, otherwise we look at
166  // the second (lower) half byte and advance the counter to the next char.
167  if (*half == 0) {
168  head = data[*di] >> 4;
169  } else {
170  head = data[*di] & 0xf;
171  (*di)++;
172  }
173 
174  *half = 1-(*half); // switch to other half byte
175  *res = 0;
176 
177  if (head <= 8) {
178  n = head;
179  } else { // we have n leading ones, fill n half bytes in res with 0xf
180  n = head - 8;
181  mask = 0xf0000000;
182  for (i=0; i<n; i++) {
183  m = mask >> (4*i);
184  *res = *res | m;
185  }
186  }
187 
188  if (n == 8) {
189  return;
190  }
191 
192  if (*di + ((8 - n) - (1 - *half)) / 2 >= max_di) {
193  throw "[MSNumpress::decodeInt] Corrupt input data! ";
194  }
195 
196  for (i=n; i<8; i++) {
197  if (*half == 0) {
198  hb = data[*di] >> 4;
199  } else {
200  hb = data[*di] & 0xf;
201  (*di)++;
202  }
203  *res = *res | ( static_cast<unsigned int>(hb) << ((i-n)*4));
204  *half = 1 - (*half);
205  }
206 }
Here is the caller graph for this function:

◆ decodeLinear() [1/2]

size_t ms::numpress::MSNumpress::decodeLinear ( const unsigned char *  data,
const size_t  dataSize,
double *  result 
)

Decodes data encoded by encodeLinear.

result vector guaranteed to be shorter or equal to (|data| - 8) * 2

Note that this method may throw a const char* if it deems the input data to be corrupt, i.e. that the last encoded int does not use the last byte in the data. In addition the last encoded int need to use either the last halfbyte, or the second last followed by a 0x0 halfbyte.

pointer to array of bytes to be decoded (need memorycont. repr.) number of bytes from *data to decode

Returns
pointer to were resulting doubles should be stored
the number of decoded doubles, or -1 if dataSize < 4 or 4 < dataSize < 8

Definition at line 361 of file MSNumpress.cpp.

References decodeFixedPoint(), and decodeInt().

Referenced by decodeLinear(), decodeLinearCorrupt1(), decodeLinearCorrupt2(), decodeLinearNice(), decodeLinearNiceLowFP(), decodeLinearWierd(), encodeDecodeLinear(), encodeDecodeLinear5(), and encodeDecodeLinearStraight().

365  {
366  size_t i;
367  size_t ri = 0;
368  unsigned int init, buff;
369  int diff;
370  long long ints[3];
371  //double d;
372  size_t di;
373  size_t half;
374  long long extrapol;
375  long long y;
376  double fixedPoint;
377 
378  //printf("Decoding %d bytes with fixed point %f\n", (int)dataSize, fixedPoint);
379 
380  if (dataSize == 8) return 0;
381 
382  if (dataSize < 8)
383  throw "[MSNumpress::decodeLinear] Corrupt input data: not enough bytes to read fixed point! ";
384 
385  fixedPoint = decodeFixedPoint(data);
386 
387 
388  if (dataSize < 12)
389  throw "[MSNumpress::decodeLinear] Corrupt input data: not enough bytes to read first value! ";
390 
391  ints[1] = 0;
392  for (i=0; i<4; i++) {
393  ints[1] = ints[1] | ((0xff & (init = data[8+i])) << (i*8));
394  }
395  result[0] = ints[1] / fixedPoint;
396 
397  if (dataSize == 12) return 1;
398  if (dataSize < 16)
399  throw "[MSNumpress::decodeLinear] Corrupt input data: not enough bytes to read second value! ";
400 
401  ints[2] = 0;
402  for (i=0; i<4; i++) {
403  ints[2] = ints[2] | ((0xff & (init = data[12+i])) << (i*8));
404  }
405  result[1] = ints[2] / fixedPoint;
406 
407  half = 0;
408  ri = 2;
409  di = 16;
410 
411  //printf(" di ri half int[0] int[1] extrapol diff\n");
412 
413  while (di < dataSize) {
414  if (di == (dataSize - 1) && half == 1) {
415  if ((data[di] & 0xf) == 0x0) {
416  break;
417  }
418  }
419  //printf("%7d %7d %7d %lu %lu %ld", di, ri, half, ints[0], ints[1], extrapol);
420 
421  ints[0] = ints[1];
422  ints[1] = ints[2];
423  decodeInt(data, &di, dataSize, &half, &buff);
424  diff = static_cast<int>(buff);
425 
426  extrapol = ints[1] + (ints[1] - ints[0]);
427  y = extrapol + diff;
428  //printf(" %d \n", diff);
429  result[ri++] = y / fixedPoint;
430  ints[2] = y;
431  }
432 
433  return ri;
434 }
static void decodeInt(const unsigned char *data, size_t *di, size_t max_di, size_t *half, unsigned int *res)
Definition: MSNumpress.cpp:151
static double decodeFixedPoint(const unsigned char *data)
Definition: MSNumpress.cpp:62
Here is the call graph for this function:
Here is the caller graph for this function:

◆ decodeLinear() [2/2]

void ms::numpress::MSNumpress::decodeLinear ( const std::vector< unsigned char > &  data,
std::vector< double > &  result 
)

Calls lower level decodeLinear while handling vector sizes appropriately

Note that this method may throw a const char* if it deems the input data to be corrupt, i.e.. that the last encoded int does not use the last byte in the data. In addition the last encoded int need to use either the last halfbyte, or the second last followed by a 0x0 halfbyte.

vector of bytes to be decoded

Returns
vector of resulting double (will be resized to the number of doubles)

Definition at line 451 of file MSNumpress.cpp.

References decodeLinear().

454  {
455  size_t dataSize = data.size();
456  result.resize((dataSize - 8) * 2);
457  size_t decodedLength = decodeLinear(&data[0], dataSize, &result[0]);
458  result.resize(decodedLength);
459 }
void decodeLinear(const std::vector< unsigned char > &data, std::vector< double > &result)
Definition: MSNumpress.cpp:451
Here is the call graph for this function:

◆ decodePic() [1/2]

size_t ms::numpress::MSNumpress::decodePic ( const unsigned char *  data,
const size_t  dataSize,
double *  result 
)

Decodes data encoded by encodePic

result vector guaranteed to be shorter of equal to |data| * 2

Note that this method may throw a const char* if it deems the input data to be corrupt, i.e. that the last encoded int does not use the last byte in the data. In addition the last encoded int need to use either the last halfbyte, or the second last followed by a 0x0 halfbyte.

pointer to array of bytes to be decoded (need memorycont. repr.) number of bytes from *data to decode

Returns
pointer to were resulting doubles should be stored
the number of decoded doubles

Definition at line 617 of file MSNumpress.cpp.

References decodeInt().

Referenced by decodePic(), encodeDecodePic(), encodeDecodePic5(), and testErroneousDecodePic().

621  {
622  size_t ri;
623  unsigned int x;
624  size_t di;
625  size_t half;
626 
627  //printf("ri di half dSize count\n");
628 
629  half = 0;
630  ri = 0;
631  di = 0;
632 
633  while (di < dataSize) {
634  if (di == (dataSize - 1) && half == 1) {
635  if ((data[di] & 0xf) == 0x0) {
636  break;
637  }
638  }
639 
640  decodeInt(&data[0], &di, dataSize, &half, &x);
641 
642  //printf("%7d %7d %7d %7d %7d\n", ri, di, half, dataSize, count);
643 
644  //printf("count: %d \n", count);
645  result[ri++] = static_cast<double>(x);
646  }
647 
648  return ri;
649 }
static void decodeInt(const unsigned char *data, size_t *di, size_t max_di, size_t *half, unsigned int *res)
Definition: MSNumpress.cpp:151
Here is the call graph for this function:
Here is the caller graph for this function:

◆ decodePic() [2/2]

void ms::numpress::MSNumpress::decodePic ( const std::vector< unsigned char > &  data,
std::vector< double > &  result 
)

Calls lower level decodePic while handling vector sizes appropriately

Note that this method may throw a const char* if it deems the input data to be corrupt, i.e. that the last encoded int does not use the last byte in the data. In addition the last encoded int need to use either the last halfbyte, or the second last followed by a 0x0 halfbyte.

vector of bytes to be decoded

Returns
vector of resulting double (will be resized to the number of doubles)

Definition at line 665 of file MSNumpress.cpp.

References decodePic().

668  {
669  size_t dataSize = data.size();
670  result.resize(dataSize * 2);
671  size_t decodedLength = decodePic(&data[0], dataSize, &result[0]);
672  result.resize(decodedLength);
673 }
void decodePic(const std::vector< unsigned char > &data, std::vector< double > &result)
Definition: MSNumpress.cpp:665
Here is the call graph for this function:

◆ decodeSafe()

size_t ms::numpress::MSNumpress::decodeSafe ( const unsigned char *  data,
const size_t  dataSize,
double *  result 
)

Decodes data encoded by encodeSafe.

result vector is the same size as the input data.

Might throw const char* is something goes wrong during decoding.

pointer to array of bytes to be decoded (need memorycont. repr.) number of bytes from *data to decode

Returns
pointer to were resulting doubles should be stored
the number of decoded bytes

Definition at line 510 of file MSNumpress.cpp.

Referenced by encodeDecodeSafe(), and encodeDecodeSafeStraight().

514  {
515  size_t i, di, ri;
516  double extrapol, diff;
517  double latest[3];
518  unsigned char *fp;
519 
520  if (dataSize % 8 != 0)
521  throw "[MSNumpress::decodeSafe] Corrupt input data: number of bytes needs to be multiple of 8! ";
522 
523  //printf("d0 d1 extrapol diff\td2\n");
524 
525  try {
526  fp = (unsigned char*)&(latest[1]);
527  for (i=0; i<8; i++) {
528  fp[i] = data[IS_LITTLE_ENDIAN ? (7-i) : i];
529  }
530  result[0] = latest[1];
531 
532  if (dataSize == 8) return 1;
533 
534  fp = (unsigned char*)&(latest[2]);
535  for (i=0; i<8; i++) {
536  fp[i] = data[8 + (IS_LITTLE_ENDIAN ? (7-i) : i)];
537  }
538  result[1] = latest[2];
539 
540  ri = 2;
541 
542  fp = (unsigned char*)&diff;
543  for (di = 16; di < dataSize; di += 8) {
544  latest[0] = latest[1];
545  latest[1] = latest[2];
546 
547  for (i=0; i<8; i++) {
548  fp[i] = data[di + (IS_LITTLE_ENDIAN ? (7-i) : i)];
549  }
550 
551  extrapol = latest[1] + (latest[1] - latest[0]);
552  latest[2] = extrapol + diff;
553 
554  //printf("%f %f %f %f\t%f \n", latest[0], latest[1], extrapol, diff, latest[2]);
555 
556  result[ri++] = latest[2];
557  }
558  } catch (...) {
559  throw "[MSNumpress::decodeSafe] Unknown error during decode! ";
560  }
561 
562  return ri;
563 }
Here is the caller graph for this function:

◆ decodeSlof() [1/2]

size_t ms::numpress::MSNumpress::decodeSlof ( const unsigned char *  data,
const size_t  dataSize,
double *  result 
)

Decodes data encoded by encodeSlof

The return will include exactly (|data| - 8) / 2 doubles.

Note that this method may throw a const char* if it deems the input data to be corrupt.

pointer to array of bytes to be decoded (need memorycont. repr.) number of bytes from *data to decode

Returns
pointer to were resulting doubles should be stored
the number of decoded doubles

Definition at line 733 of file MSNumpress.cpp.

References decodeFixedPoint().

Referenced by decodeSlof(), encodeDecodeSlof(), and encodeDecodeSlof5().

737  {
738  size_t i, ri;
739  unsigned short x;
740  double fixedPoint;
741 
742  if (dataSize < 8)
743  throw "[MSNumpress::decodeSlof] Corrupt input data: not enough bytes to read fixed point! ";
744 
745  ri = 0;
746  fixedPoint = decodeFixedPoint(data);
747 
748  for (i=8; i<dataSize; i+=2) {
749  x = static_cast<unsigned short>(data[i] | (data[i+1] << 8));
750  result[ri++] = exp(x / fixedPoint) - 1;
751  }
752  return ri;
753 }
static double decodeFixedPoint(const unsigned char *data)
Definition: MSNumpress.cpp:62
Here is the call graph for this function:
Here is the caller graph for this function:

◆ decodeSlof() [2/2]

void ms::numpress::MSNumpress::decodeSlof ( const std::vector< unsigned char > &  data,
std::vector< double > &  result 
)

Calls lower level decodeSlof while handling vector sizes appropriately

Note that this method may throw a const char* if it deems the input data to be corrupt.

vector of bytes to be decoded

Returns
vector of resulting double (will be resized to the number of doubles)

Definition at line 770 of file MSNumpress.cpp.

References decodeSlof().

773  {
774  size_t dataSize = data.size();
775  result.resize((dataSize - 8) / 2);
776  size_t decodedLength = decodeSlof(&data[0], dataSize, &result[0]);
777  result.resize(decodedLength);
778 }
void decodeSlof(const std::vector< unsigned char > &data, std::vector< double > &result)
Definition: MSNumpress.cpp:770
Here is the call graph for this function:

◆ encodeFixedPoint()

static void ms::numpress::MSNumpress::encodeFixedPoint ( double  fixedPoint,
unsigned char *  result 
)
static

Definition at line 49 of file MSNumpress.cpp.

Referenced by encodeLinear(), and encodeSlof().

52  {
53  int i;
54  unsigned char *fp = (unsigned char*)&fixedPoint;
55  for (i=0; i<8; i++) {
56  result[i] = fp[IS_LITTLE_ENDIAN ? (7-i) : i];
57  }
58 }
Here is the caller graph for this function:

◆ encodeInt()

static void ms::numpress::MSNumpress::encodeInt ( const unsigned int  x,
unsigned char *  res,
size_t *  res_length 
)
static

Encodes the int x as a number of halfbytes in res. res_length is incremented by the number of halfbytes, which will be 1 <= n <= 9

Definition at line 83 of file MSNumpress.cpp.

Referenced by encodeLinear(), and encodePic().

87  {
88  // get the bit pattern of a signed int x_inp
89  unsigned int m;
90  unsigned char i, l; // numbers between 0 and 9
91 
92  unsigned int mask = 0xf0000000;
93  unsigned int init = x & mask;
94 
95  if (init == 0) {
96  l = 8;
97  for (i=0; i<8; i++) {
98  m = mask >> (4*i);
99  if ((x & m) != 0) {
100  l = i;
101  break;
102  }
103  }
104  res[0] = l;
105  for (i=l; i<8; i++) {
106  res[1+i-l] = static_cast<unsigned char>( x >> (4*(i-l)) );
107  }
108  *res_length += 1+8-l;
109 
110  } else if (init == mask) {
111  l = 7;
112  for (i=0; i<8; i++) {
113  m = mask >> (4*i);
114  if ((x & m) != m) {
115  l = i;
116  break;
117  }
118  }
119  res[0] = l + 8;
120  for (i=l; i<8; i++) {
121  res[1+i-l] = static_cast<unsigned char>( x >> (4*(i-l)) );
122  }
123  *res_length += 1+8-l;
124 
125  } else {
126  res[0] = 0;
127  for (i=0; i<8; i++) {
128  res[1+i] = static_cast<unsigned char>( x >> (4*i) );
129  }
130  *res_length += 9;
131 
132  }
133 }
Here is the caller graph for this function:

◆ encodeLinear() [1/2]

size_t ms::numpress::MSNumpress::encodeLinear ( const double *  data,
const size_t  dataSize,
unsigned char *  result,
double  fixedPoint 
)

Encodes the doubles in data by first using a

  • lossy conversion to a 4 byte 5 decimal fixed point representation
  • storing the residuals from a linear prediction after first two values
  • encoding by encodeInt (see above)

The resulting binary is maximally 8 + dataSize * 5 bytes, but much less if the data is reasonably smooth on the first order.

This encoding is suitable for typical m/z or retention time binary arrays. On a test set, the encoding was empirically show to be accurate to at least 0.002 ppm.

pointer to array of double to be encoded (need memorycont. repr.) number of doubles from *data to encode

Returns
pointer to where resulting bytes should be stored the scaling factor used for getting the fixed point repr. This is stored in the binary and automatically extracted on decoding.
the number of encoded bytes

Definition at line 270 of file MSNumpress.cpp.

References encodeFixedPoint(), encodeInt(), and THROW_ON_OVERFLOW.

Referenced by decodeLinearCorrupt2(), decodeLinearNice(), decodeLinearNiceLowFP(), decodeLinearWierd(), decodeLinearWierd_int_overflow(), decodeLinearWierd_int_underflow(), decodeLinearWierd_llong_overflow(), encodeDecodeLinear(), encodeDecodeLinear5(), encodeDecodeLinearStraight(), encodeLinear(), encodeLinear(), and encodeLinear1().

275  {
276  long long ints[3];
277  size_t i, ri;
278  unsigned char halfBytes[10];
279  size_t halfByteCount;
280  size_t hbi;
281  long long extrapol;
282  int diff;
283 
284  //printf("Encoding %d doubles with fixed point %f\n", (int)dataSize, fixedPoint);
285  encodeFixedPoint(fixedPoint, result);
286 
287 
288  if (dataSize == 0) return 8;
289 
290  ints[1] = static_cast<long long>(data[0] * fixedPoint + 0.5);
291  for (i=0; i<4; i++) {
292  result[8+i] = (ints[1] >> (i*8)) & 0xff;
293  }
294 
295  if (dataSize == 1) return 12;
296 
297  ints[2] = static_cast<long long>(data[1] * fixedPoint + 0.5);
298  for (i=0; i<4; i++) {
299  result[12+i] = (ints[2] >> (i*8)) & 0xff;
300  }
301 
302  halfByteCount = 0;
303  ri = 16;
304 
305  for (i=2; i<dataSize; i++) {
306  ints[0] = ints[1];
307  ints[1] = ints[2];
308  if (THROW_ON_OVERFLOW &&
309  data[i] * fixedPoint + 0.5 > LLONG_MAX ) {
310  throw "[MSNumpress::encodeLinear] Next number overflows LLONG_MAX.";
311  }
312 
313  ints[2] = static_cast<long long>(data[i] * fixedPoint + 0.5);
314  extrapol = ints[1] + (ints[1] - ints[0]);
315 
316  if (THROW_ON_OVERFLOW &&
317  ( ints[2] - extrapol > INT_MAX
318  || ints[2] - extrapol < INT_MIN )) {
319  throw "[MSNumpress::encodeLinear] Cannot encode a number that exceeds the bounds of [-INT_MAX, INT_MAX].";
320  }
321 
322  diff = static_cast<int>(ints[2] - extrapol);
323  //printf("%lu %lu %lu, extrapol: %ld diff: %d \n", ints[0], ints[1], ints[2], extrapol, diff);
324  encodeInt(
325  static_cast<unsigned int>(diff),
326  &halfBytes[halfByteCount],
327  &halfByteCount
328  );
329  /*
330  printf("%d (%d): ", diff, (int)halfByteCount);
331  for (size_t j=0; j<halfByteCount; j++) {
332  printf("%x ", halfBytes[j] & 0xf);
333  }
334  printf("\n");
335  */
336 
337 
338  for (hbi=1; hbi < halfByteCount; hbi+=2) {
339  result[ri] = static_cast<unsigned char>(
340  (halfBytes[hbi-1] << 4) | (halfBytes[hbi] & 0xf)
341  );
342  //printf("%x \n", result[ri]);
343  ri++;
344  }
345  if (halfByteCount % 2 != 0) {
346  halfBytes[0] = halfBytes[halfByteCount-1];
347  halfByteCount = 1;
348  } else {
349  halfByteCount = 0;
350  }
351  }
352  if (halfByteCount == 1) {
353  result[ri] = static_cast<unsigned char>(halfBytes[0] << 4);
354  ri++;
355  }
356  return ri;
357 }
#define THROW_ON_OVERFLOW
Definition: MSNumpress.hpp:50
static void encodeInt(const unsigned int x, unsigned char *res, size_t *res_length)
Definition: MSNumpress.cpp:83
static void encodeFixedPoint(double fixedPoint, unsigned char *result)
Definition: MSNumpress.cpp:49
Here is the call graph for this function:
Here is the caller graph for this function:

◆ encodeLinear() [2/2]

void ms::numpress::MSNumpress::encodeLinear ( const std::vector< double > &  data,
std::vector< unsigned char > &  result,
double  fixedPoint 
)

Calls lower level encodeLinear while handling vector sizes appropriately

vector of doubles to be encoded

Returns
vector of resulting bytes (will be resized to the number of bytes)

Definition at line 438 of file MSNumpress.cpp.

References encodeLinear().

442  {
443  size_t dataSize = data.size();
444  result.resize(dataSize * 5 + 8);
445  size_t encodedLength = encodeLinear(&data[0], dataSize, &result[0], fixedPoint);
446  result.resize(encodedLength);
447 }
void encodeLinear()
Here is the call graph for this function:

◆ encodePic() [1/2]

size_t ms::numpress::MSNumpress::encodePic ( const double *  data,
const size_t  dataSize,
unsigned char *  result 
)

Encodes ion counts by simply rounding to the nearest 4 byte integer, and compressing each integer with encodeInt.

The handleable range is therefore 0 -> 4294967294. The resulting binary is maximally dataSize * 5 bytes, but much less if the data is close to 0 on average.

pointer to array of double to be encoded (need memorycont. repr.) number of doubles from *data to encode

Returns
pointer to were resulting bytes should be stored
the number of encoded bytes

Definition at line 568 of file MSNumpress.cpp.

References encodeInt(), and THROW_ON_OVERFLOW.

Referenced by encodeDecodePic(), encodeDecodePic5(), and encodePic().

572  {
573  size_t i, ri;
574  unsigned int x;
575  unsigned char halfBytes[10];
576  size_t halfByteCount;
577  size_t hbi;
578 
579  //printf("Encoding %d doubles\n", (int)dataSize);
580 
581  halfByteCount = 0;
582  ri = 0;
583 
584  for (i=0; i<dataSize; i++) {
585 
586  if (THROW_ON_OVERFLOW &&
587  (data[i] + 0.5 > INT_MAX || data[i] < -0.5) ){
588  throw "[MSNumpress::encodePic] Cannot use Pic to encode a number larger than INT_MAX or smaller than 0.";
589  }
590  x = static_cast<unsigned int>(data[i] + 0.5);
591  //printf("%d %d %d, extrapol: %d diff: %d \n", ints[0], ints[1], ints[2], extrapol, diff);
592  encodeInt(x, &halfBytes[halfByteCount], &halfByteCount);
593 
594  for (hbi=1; hbi < halfByteCount; hbi+=2) {
595  result[ri] = static_cast<unsigned char>(
596  (halfBytes[hbi-1] << 4) | (halfBytes[hbi] & 0xf)
597  );
598  //printf("%x \n", result[ri]);
599  ri++;
600  }
601  if (halfByteCount % 2 != 0) {
602  halfBytes[0] = halfBytes[halfByteCount-1];
603  halfByteCount = 1;
604  } else {
605  halfByteCount = 0;
606  }
607  }
608  if (halfByteCount == 1) {
609  result[ri] = static_cast<unsigned char>(halfBytes[0] << 4);
610  ri++;
611  }
612  return ri;
613 }
#define THROW_ON_OVERFLOW
Definition: MSNumpress.hpp:50
static void encodeInt(const unsigned int x, unsigned char *res, size_t *res_length)
Definition: MSNumpress.cpp:83
Here is the call graph for this function:
Here is the caller graph for this function:

◆ encodePic() [2/2]

void ms::numpress::MSNumpress::encodePic ( const std::vector< double > &  data,
std::vector< unsigned char > &  result 
)

Calls lower level encodePic while handling vector sizes appropriately

vector of doubles to be encoded

Returns
vector of resulting bytes (will be resized to the number of bytes)

Definition at line 653 of file MSNumpress.cpp.

References encodePic().

656  {
657  size_t dataSize = data.size();
658  result.resize(dataSize * 5);
659  size_t encodedLength = encodePic(&data[0], dataSize, &result[0]);
660  result.resize(encodedLength);
661 }
void encodePic(const std::vector< double > &data, std::vector< unsigned char > &result)
Definition: MSNumpress.cpp:653
Here is the call graph for this function:

◆ encodeSafe()

size_t ms::numpress::MSNumpress::encodeSafe ( const double *  data,
const size_t  dataSize,
unsigned char *  result 
)

Encodes the doubles in data by storing the residuals from a linear prediction after first two values.

The resulting binary is the same size as the input data.

This encoding is suitable for typical m/z or retention time binary arrays, and is intended to be used before zlib compression to improve compression.

pointer to array of doubles to be encoded (need memorycont. repr.) number of doubles from *data to encode

Returns
pointer to were resulting bytes should be stored

Definition at line 464 of file MSNumpress.cpp.

Referenced by encodeDecodeSafe(), and encodeDecodeSafeStraight().

468  {
469  size_t i, j, ri = 0;
470  double latest[3];
471  double extrapol, diff;
472  const unsigned char *fp;
473 
474  //printf("d0 d1 d2 extrapol diff\n");
475 
476  if (dataSize == 0) return ri;
477 
478  latest[1] = data[0];
479  fp = (unsigned char*)data;
480  for (i=0; i<8; i++) {
481  result[ri++] = fp[IS_LITTLE_ENDIAN ? (7-i) : i];
482  }
483 
484  if (dataSize == 1) return ri;
485 
486  latest[2] = data[1];
487  fp = (unsigned char*)&(data[1]);
488  for (i=0; i<8; i++) {
489  result[ri++] = fp[IS_LITTLE_ENDIAN ? (7-i) : i];
490  }
491 
492  fp = (unsigned char*)&diff;
493  for (i=2; i<dataSize; i++) {
494  latest[0] = latest[1];
495  latest[1] = latest[2];
496  latest[2] = data[i];
497  extrapol = latest[1] + (latest[1] - latest[0]);
498  diff = latest[2] - extrapol;
499  //printf("%f %f %f %f %f\n", latest[0], latest[1], latest[2], extrapol, diff);
500  for (j=0; j<8; j++) {
501  result[ri++] = fp[IS_LITTLE_ENDIAN ? (7-j) : j];
502  }
503  }
504 
505  return ri;
506 }
Here is the caller graph for this function:

◆ encodeSlof() [1/2]

size_t ms::numpress::MSNumpress::encodeSlof ( const double *  data,
const size_t  dataSize,
unsigned char *  result,
double  fixedPoint 
)

Encodes ion counts by taking the natural logarithm, and storing a fixed point representation of this. This is calculated as

unsigned short fp = log(d + 1) * fixedPoint + 0.5

the result vector is exactly |data| * 2 + 8 bytes long

pointer to array of double to be encoded (need memorycont. repr.) number of doubles from *data to encode

Returns
pointer to were resulting bytes should be stored
the number of encoded bytes

Definition at line 704 of file MSNumpress.cpp.

References encodeFixedPoint(), and THROW_ON_OVERFLOW.

Referenced by encodeDecodeSlof(), encodeDecodeSlof5(), and encodeSlof().

709  {
710  size_t i, ri;
711  double temp;
712  unsigned short x;
713  encodeFixedPoint(fixedPoint, result);
714 
715  ri = 8;
716  for (i=0; i<dataSize; i++) {
717  temp = log(data[i]+1) * fixedPoint;
718 
719  if (THROW_ON_OVERFLOW &&
720  temp > USHRT_MAX ) {
721  throw "[MSNumpress::encodeSlof] Cannot encode a number that overflows USHRT_MAX.";
722  }
723 
724  x = static_cast<unsigned short>(temp + 0.5);
725  result[ri++] = x & 0xff;
726  result[ri++] = (x >> 8) & 0xff;
727  }
728  return ri;
729 }
#define THROW_ON_OVERFLOW
Definition: MSNumpress.hpp:50
static void encodeFixedPoint(double fixedPoint, unsigned char *result)
Definition: MSNumpress.cpp:49
Here is the call graph for this function:
Here is the caller graph for this function:

◆ encodeSlof() [2/2]

void ms::numpress::MSNumpress::encodeSlof ( const std::vector< double > &  data,
std::vector< unsigned char > &  result,
double  fixedPoint 
)

Calls lower level encodeSlof while handling vector sizes appropriately

vector of doubles to be encoded

Returns
vector of resulting bytes (will be resized to the number of bytes)

Definition at line 757 of file MSNumpress.cpp.

References encodeSlof().

761  {
762  size_t dataSize = data.size();
763  result.resize(dataSize * 2 + 8);
764  size_t encodedLength = encodeSlof(&data[0], dataSize, &result[0], fixedPoint);
765  result.resize(encodedLength);
766 }
void encodeSlof(const std::vector< double > &data, std::vector< unsigned char > &result, double fixedPoint)
Definition: MSNumpress.cpp:757
Here is the call graph for this function:

◆ is_little_endian()

static bool ms::numpress::MSNumpress::is_little_endian ( )
static

Definition at line 40 of file MSNumpress.cpp.

40  {
41  return *((char*)&(ONE)) == 1;
42 }

◆ optimalLinearFixedPoint()

double ms::numpress::MSNumpress::optimalLinearFixedPoint ( const double *  data,
size_t  dataSize 
)

Compute the maximal linear fixed point that prevents integer overflow.

pointer to array of double to be encoded (need memorycont. repr.) number of doubles from *data to encode

Returns
the linear fixed point safe to use

Definition at line 234 of file MSNumpress.cpp.

Referenced by decodeLinearCorrupt2(), decodeLinearNice(), decodeLinearWierd(), encodeDecodeLinear(), encodeDecodeLinear5(), encodeDecodeLinearStraight(), optimalLinearFixedPoint(), and optimalLinearFixedPointMass().

237  {
238  /*
239  * safer impl - apparently not needed though
240  *
241  if (dataSize == 0) return 0;
242 
243  double maxDouble = 0;
244  double x;
245 
246  for (size_t i=0; i<dataSize; i++) {
247  x = data[i];
248  maxDouble = max(maxDouble, x);
249  }
250 
251  return floor(0xFFFFFFFF / maxDouble);
252  */
253  if (dataSize == 0) return 0;
254  if (dataSize == 1) return floor(0x7FFFFFFFl / data[0]);
255  double maxDouble = max(data[0], data[1]);
256  double extrapol;
257  double diff;
258 
259  for (size_t i=2; i<dataSize; i++) {
260  extrapol = data[i-1] + (data[i-1] - data[i-2]);
261  diff = data[i] - extrapol;
262  maxDouble = max(maxDouble, ceil(abs(diff)+1));
263  }
264 
265  return floor(0x7FFFFFFFl / maxDouble);
266 }
Here is the caller graph for this function:

◆ optimalLinearFixedPointMass()

double ms::numpress::MSNumpress::optimalLinearFixedPointMass ( const double *  data,
size_t  dataSize,
double  mass_acc 
)

Compute the optimal linear fixed point with a desired m/z accuracy.

Note
If the desired accuracy cannot be reached without overflowing 64 bit integers, then a negative value is returned. You need to check for this and in that case abandon numpress or use optimalLinearFixedPoint which returns the largest safe value.

pointer to array of double to be encoded (need memorycont. repr.) number of doubles from *data to encode desired m/z accuracy in Th

Returns
the linear fixed point that satisfies the accuracy requirement (or -1 in case of failure).

Definition at line 213 of file MSNumpress.cpp.

References optimalLinearFixedPoint().

Referenced by decodeLinearNiceLowFP(), and optimalLinearFixedPointMass().

217  {
218  if (dataSize < 3) return 0; // we just encode the first two points as floats
219 
220  // We calculate the maximal fixedPoint we need to achieve a specific mass
221  // accuracy. Note that the maximal error we will make by encoding as int is
222  // 0.5 due to rounding errors.
223  double maxFP = 0.5 / mass_acc;
224 
225  // There is a maximal value for the FP given by the int length (32bit)
226  // which means we cannot choose a value higher than that. In case we cannot
227  // achieve the desired accuracy, return failure (-1).
228  double maxFP_overflow = optimalLinearFixedPoint(data, dataSize);
229  if (maxFP > maxFP_overflow) return -1;
230 
231  return maxFP;
232 }
void optimalLinearFixedPoint()
Here is the call graph for this function:
Here is the caller graph for this function:

◆ optimalSlofFixedPoint()

double ms::numpress::MSNumpress::optimalSlofFixedPoint ( const double *  data,
size_t  dataSize 
)

Definition at line 679 of file MSNumpress.cpp.

Referenced by encodeDecodeSlof(), encodeDecodeSlof5(), and optimalSlofFixedPoint().

682  {
683  if (dataSize == 0) return 0;
684 
685  double maxDouble = 1;
686  double x;
687  double fp;
688 
689  for (size_t i=0; i<dataSize; i++) {
690  x = log(data[i]+1);
691  maxDouble = max(maxDouble, x);
692  }
693 
694  fp = floor(0xFFFF / maxDouble);
695 
696  //cout << " max val: " << maxDouble << endl;
697  //cout << "fixed point: " << fp << endl;
698 
699  return fp;
700 }
Here is the caller graph for this function:

Variable Documentation

◆ IS_LITTLE_ENDIAN

bool ms::numpress::MSNumpress::IS_LITTLE_ENDIAN = is_little_endian()

Definition at line 43 of file MSNumpress.cpp.

◆ ONE

const int ms::numpress::MSNumpress::ONE = 1

Definition at line 39 of file MSNumpress.cpp.