ProteoWizard
MSNumpress.hpp
Go to the documentation of this file.
1 /*
2  $Id: MSNumpress.hpp 5009 2013-10-03 22:33:08Z pcbrefugee $
3 
4  Author: johan.teleman@immun.lth.se
5 
6  Copyright 2013 Johan Teleman
7 
8  Licensed under the Apache License, Version 2.0 (the "License");
9  you may not use this file except in compliance with the License.
10  You may obtain a copy of the License at
11 
12  http://www.apache.org/licenses/LICENSE-2.0
13 
14  Unless required by applicable law or agreed to in writing, software
15  distributed under the License is distributed on an "AS IS" BASIS,
16  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  See the License for the specific language governing permissions and
18  limitations under the License.
19  */
20 /*
21  ==================== encodeInt ====================
22  Some of the encodings described below use a integer compression refered to simply as
23 
24  encodeInt()
25 
26  This encoding works on a 4 byte integer, by truncating initial zeros or ones.
27  If the initial (most significant) half byte is 0x0 or 0xf, the number of such
28  halfbytes starting from the most significant is stored in a halfbyte. This initial
29  count is then followed by the rest of the ints halfbytes, in little-endian order.
30  A count halfbyte c of
31 
32  0 <= c <= 8 is interpreted as an initial c 0x0 halfbytes
33  9 <= c <= 15 is interpreted as an initial (c-8) 0xf halfbytes
34 
35  Ex:
36  int c rest
37  0 => 0x8
38  -1 => 0xf 0xf
39  23 => 0x6 0x7 0x1
40  */
41 
42 #ifndef _MSNUMPRESS_HPP_
43 #define _MSNUMPRESS_HPP_
44 
45 #include <cstddef>
46 #include <vector>
47 
48 namespace ms {
49 namespace numpress {
50 
51 namespace MSNumpress {
52 
54  const double *data,
55  size_t dataSize);
56 
57  /**
58  * Encodes the doubles in data by first using a
59  * - lossy conversion to a 4 byte 5 decimal fixed point repressentation
60  * - storing the residuals from a linear prediction after first to values
61  * - encoding by encodeInt (see above)
62  *
63  * The resulting binary is maximally dataSize * 5 bytes, but much less if the
64  * data is reasonably smooth on the first order.
65  *
66  * This encoding is suitable for typical m/z or retention time binary arrays.
67  * For masses above 100 m/z the encoding is accurate to at least 0.1 ppm.
68  *
69  * @data pointer to array of double to be encoded (need memorycont. repr.)
70  * @dataSize number of doubles from *data to encode
71  * @result pointer to were resulting bytes should be stored
72  * @fixedPoint the scaling factor used for getting the fixed point repr.
73  * This is stored in the binary and automatically extracted
74  * on decoding. Automatically (and maybe slowly) determined if 0.
75  * @return the number of encoded bytes
76  */
77  size_t encodeLinear(
78  const double *data,
79  const size_t dataSize,
80  unsigned char *result,
81  double fixedPoint);
82 
83  /**
84  * Calls lower level encodeLinear while handling vector sizes appropriately
85  *
86  * @data vector of doubles to be encoded
87  * @result vector of resulting bytes (will be resized to the number of bytes)
88  */
89  void encodeLinear(
90  const std::vector<double> &data,
91  std::vector<unsigned char> &result,
92  double fixedPoint);
93 
94  /**
95  * Decodes data encoded by encodeLinear. Note that the compression
96  * discard any information < 1e-5, so data is only guaranteed
97  * to be within +- 5e-6 of the original value.
98  *
99  * Further, values > ~42000 will also be truncated because of the
100  * fixed point representation, so this scheme is stronly discouraged
101  * if values above might be above this size.
102  *
103  * result vector guaranteedly shorter than twice the data length (in nbr of values)
104  *
105  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
106  * @dataSize number of bytes from *data to decode
107  * @result pointer to were resulting doubles should be stored
108  * @return the number of decoded doubles, or -1 if dataSize < 4 or 4 < dataSize < 8
109  */
110  size_t decodeLinear(
111  const unsigned char *data,
112  const size_t dataSize,
113  double *result);
114 
115  /**
116  * Calls lower level decodeLinear while handling vector sizes appropriately
117  *
118  * @data vector of bytes to be decoded
119  * @result vector of resulting double (will be resized to the number of doubles)
120  */
121  void decodeLinear(
122  const std::vector<unsigned char> &data,
123  std::vector<double> &result);
124 
125 /////////////////////////////////////////////////////////////
126 
127  /**
128  * Encodes ion counts by simply rounding to the nearest 4 byte integer,
129  * and compressing each integer with encodeInt.
130  *
131  * The handleable range is therefore 0 -> 4294967294.
132  * The resulting binary is maximally dataSize * 5 bytes, but much less if the
133  * data is close to 0 on average.
134  *
135  * @data pointer to array of double to be encoded (need memorycont. repr.)
136  * @dataSize number of doubles from *data to encode
137  * @result pointer to were resulting bytes should be stored
138  * @return the number of encoded bytes
139  */
140  size_t encodePic(
141  const double *data,
142  const size_t dataSize,
143  unsigned char *result);
144 
145  /**
146  * Calls lower level encodePic while handling vector sizes appropriately
147  *
148  * @data vector of doubles to be encoded
149  * @result vector of resulting bytes (will be resized to the number of bytes)
150  */
151  void encodePic(
152  const std::vector<double> &data,
153  std::vector<unsigned char> &result);
154 
155  /**
156  * Decodes data encoded by encodePic
157  *
158  * result vector guaranteedly shorter than twice the data length (in nbr of values)
159  *
160  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
161  * @dataSize number of bytes from *data to decode
162  * @result pointer to were resulting doubles should be stored
163  * @return the number of decoded doubles
164  */
165  void decodePic(
166  const std::vector<unsigned char> &data,
167  std::vector<double> &result);
168 
169  /**
170  * Calls lower level decodePic while handling vector sizes appropriately
171  *
172  * @data vector of bytes to be decoded
173  * @result vector of resulting double (will be resized to the number of doubles)
174  */
175  size_t decodePic(
176  const unsigned char *data,
177  const size_t dataSize,
178  double *result);
179 
180 /////////////////////////////////////////////////////////////
181 
182 
183  double optimalSlofFixedPoint(
184  const double *data,
185  size_t dataSize);
186 
187  /**
188  * Encodes ion counts by taking the natural logarithm, and storing a
189  * fixed point representation of this. This is calculated as
190  *
191  * unsigned short fp = log(d + 1) * 3000.0 + 0.5
192  *
193  * Note that this fixed point will mean any d < 0.00016667 will be
194  * stored as a zero and mapped back to a zero.
195  *
196  * result vector is exactly twice the data length (in nbr of values)
197  *
198  * @data pointer to array of double to be encoded (need memorycont. repr.)
199  * @dataSize number of doubles from *data to encode
200  * @result pointer to were resulting bytes should be stored
201  * &fixedPoint automatically (and maybe slowly) determined if 0.
202  * @return the number of encoded bytes
203  */
204  size_t encodeSlof(
205  const double *data,
206  const size_t dataSize,
207  unsigned char *result,
208  double fixedPoint);
209 
210  /**
211  * Calls lower level encodeSlof while handling vector sizes appropriately
212  *
213  * @data vector of doubles to be encoded
214  * @result vector of resulting bytes (will be resized to the number of bytes)
215  */
216  void encodeSlof(
217  const std::vector<double> &data,
218  std::vector<unsigned char> &result,
219  double fixedPoint);
220 
221  /**
222  * Decodes data encoded by encodeSlof
223  *
224  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
225  * @dataSize number of bytes from *data to decode
226  * @result pointer to were resulting doubles should be stored
227  * @return the number of decoded doubles
228  */
229  size_t decodeSlof(
230  const unsigned char *data,
231  const size_t dataSize,
232  double *result);
233 
234  /**
235  * Calls lower level decodeSlof while handling vector sizes appropriately
236  *
237  * @data vector of bytes to be decoded
238  * @result vector of resulting double (will be resized to the number of doubles)
239  */
240  void decodeSlof(
241  const std::vector<unsigned char> &data,
242  std::vector<double> &result);
243 
244 } // namespace MSNumpress
245 } // namespace msdata
246 } // namespace pwiz
247 
248 #endif // _MSNUMPRESS_HPP_
size_t encodePic(const double *data, const size_t dataSize, unsigned char *result)
Encodes ion counts by simply rounding to the nearest 4 byte integer, and compressing each integer wit...
double optimalSlofFixedPoint(const double *data, size_t dataSize)
double optimalLinearFixedPoint(const double *data, size_t dataSize)
size_t encodeLinear(const double *data, const size_t dataSize, unsigned char *result, double fixedPoint)
Encodes the doubles in data by first using a.
size_t decodeLinear(const unsigned char *data, const size_t dataSize, double *result)
Decodes data encoded by encodeLinear.
void decodePic(const std::vector< unsigned char > &data, std::vector< double > &result)
Decodes data encoded by encodePic.
size_t decodeSlof(const unsigned char *data, const size_t dataSize, double *result)
Decodes data encoded by encodeSlof.
size_t encodeSlof(const double *data, const size_t dataSize, unsigned char *result, double fixedPoint)
Encodes ion counts by taking the natural logarithm, and storing a fixed point representation of this...