[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

multi_array_chunked_hdf5.hxx VIGRA

1 /************************************************************************/
2 /* */
3 /* Copyright 2012-2014 by Ullrich Koethe and Thorben Kroeger */
4 /* */
5 /* This file is part of the VIGRA computer vision library. */
6 /* The VIGRA Website is */
7 /* http://hci.iwr.uni-heidelberg.de/vigra/ */
8 /* Please direct questions, bug reports, and contributions to */
9 /* ullrich.koethe@iwr.uni-heidelberg.de or */
10 /* vigra@informatik.uni-hamburg.de */
11 /* */
12 /* Permission is hereby granted, free of charge, to any person */
13 /* obtaining a copy of this software and associated documentation */
14 /* files (the "Software"), to deal in the Software without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of the Software, and to permit persons to whom the */
18 /* Software is furnished to do so, subject to the following */
19 /* conditions: */
20 /* */
21 /* The above copyright notice and this permission notice shall be */
22 /* included in all copies or substantial portions of the */
23 /* Software. */
24 /* */
25 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32 /* OTHER DEALINGS IN THE SOFTWARE. */
33 /* */
34 /************************************************************************/
35 
36 #ifndef VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
37 #define VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
38 
39 #include <queue>
40 
41 #include "multi_array_chunked.hxx"
42 #include "hdf5impex.hxx"
43 
44 // Bounds checking Macro used if VIGRA_CHECK_BOUNDS is defined.
45 #ifdef VIGRA_CHECK_BOUNDS
46 #define VIGRA_ASSERT_INSIDE(diff) \
47  vigra_precondition(this->isInside(diff), "Index out of bounds")
48 #else
49 #define VIGRA_ASSERT_INSIDE(diff)
50 #endif
51 
52 namespace vigra {
53 
54 /** \addtogroup ChunkedArrayClasses
55 */
56 //@{
57 
58 /** Implement ChunkedArray as a chunked dataset in an HDF5 file.
59 
60  <b>\#include</b> <vigra/multi_array_chunked_hdf5.hxx> <br/>
61  Namespace: vigra
62 
63  This uses the native chunking and compression functionality provided by the
64  HDF5 library. Note: This file must only be included when the HDF5 headers
65  and libraries are installed on the system.
66 */
67 template <unsigned int N, class T, class Alloc = std::allocator<T> >
69 : public ChunkedArray<N, T>
70 {
71  /* REMARKS
72  Alternatives are:
73  * Back chunks by HDF5 chunks, possibly using on-the-fly compression. This
74  is in particular useful for existing HDF5 files.
75  * Back chunks by HDF5 datasets. This can be combined with compression
76  (both explicit and on-the-fly) or with memory mapping (using the
77  function H5Dget_offset() to get the offset from the beginning of the file).
78  */
79 
80  public:
81 
82  class Chunk
83  : public ChunkBase<N, T>
84  {
85  public:
86  typedef typename MultiArrayShape<N>::type shape_type;
87  typedef T value_type;
88  typedef value_type * pointer;
89  typedef value_type & reference;
90 
91  Chunk(shape_type const & shape, shape_type const & start,
92  ChunkedArrayHDF5 * array, Alloc const & alloc)
93  : ChunkBase<N, T>(detail::defaultStride(shape))
94  , shape_(shape)
95  , start_(start)
96  , array_(array)
97  , alloc_(alloc)
98  {}
99 
100  ~Chunk()
101  {
102  write();
103  }
104 
105  std::size_t size() const
106  {
107  return prod(shape_);
108  }
109 
110  void write(bool deallocate = true)
111  {
112  if(this->pointer_ != 0)
113  {
114  if(!array_->file_.isReadOnly())
115  {
116  herr_t status = array_->file_.writeBlock(array_->dataset_, start_,
117  MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
118  vigra_postcondition(status >= 0,
119  "ChunkedArrayHDF5: write to dataset failed.");
120  }
121  if(deallocate)
122  {
123  alloc_.deallocate(this->pointer_, this->size());
124  this->pointer_ = 0;
125  }
126  }
127  }
128 
129  pointer read()
130  {
131  if(this->pointer_ == 0)
132  {
133  this->pointer_ = alloc_.allocate(this->size());
134  herr_t status = array_->file_.readBlock(array_->dataset_, start_, shape_,
135  MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
136  vigra_postcondition(status >= 0,
137  "ChunkedArrayHDF5: read from dataset failed.");
138  }
139  return this->pointer_;
140  }
141 
142  shape_type shape_, start_;
143  ChunkedArrayHDF5 * array_;
144  Alloc alloc_;
145 
146  private:
147  Chunk & operator=(Chunk const &);
148  };
149 
150  typedef ChunkedArray<N, T> base_type;
152  typedef typename ChunkStorage::difference_type shape_type;
153  typedef T value_type;
154  typedef value_type * pointer;
155  typedef value_type & reference;
156 
157  /** \brief Construct with given 'shape', 'chunk_shape' and 'options',
158  using 'alloc' to manage the in-memory version of the data..
159 
160  The data are placed in 'file' at the internal path 'dataset'. Argument
161  'mode' must be one of the following:
162  <ul>
163  <li>HDF5File::New: Create new dataset, possibly deleting any existing content.
164  It is an error to request this mode when the entire
165  'file' is read-only.
166  <li>HDF5File::Replace: Same as New.
167  <li>HDF5File::ReadWrite: Open the dataset for reading and writing. Create
168  the datset if it doesn't exist. It is an error
169  to request this mode when 'file' is read-only.
170  <li>HDF5File::ReadOnly: Open the dataset for reading. It is an error to
171  request this mode when the dataset doesn't exist.
172  <li>HDF5File::Default: Resolves to ReadOnly when the dataset exists, and
173  to New otherwise.
174  </ul>
175  The supported compression algorithms are:
176  <ul>
177  <li>ZLIB_FAST: Fast compression using 'zlib' (slower than LZ4, but higher compression).
178  <li>ZLIB_BEST: Best compression using 'zlib', slow.
179  <li>ZLIB_NONE: Use 'zlib' format without compression.
180  <li>DEFAULT_COMPRESSION: Same as ZLIB_FAST.
181  </ul>
182  */
183  ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
184  HDF5File::OpenMode mode,
185  shape_type const & shape,
186  shape_type const & chunk_shape=shape_type(),
187  ChunkedArrayOptions const & options = ChunkedArrayOptions(),
188  Alloc const & alloc = Alloc())
189  : ChunkedArray<N, T>(shape, chunk_shape, options),
190  file_(file),
191  dataset_name_(dataset),
192  dataset_(),
193  compression_(options.compression_method),
194  alloc_(alloc)
195  {
196  init(mode);
197  }
198 
199  /** \brief Construct for an already existing dataset with given 'options',
200  using 'alloc' to manage the in-memory version of the data.
201 
202  The data must be located in 'file' at the internal path 'dataset'. The
203  array's shape and chunk_shape are read from the file. It is an error
204  to use this constructor when 'dataset' doesn't exist.
205 
206  Argument 'mode' must be one of the following:
207  <ul>
208  <li>HDF5File::ReadWrite: Open the dataset for reading and writing. It is an error
209  to request this mode when 'file' is read-only.
210  <li>HDF5File::ReadOnly: Open the dataset for reading (default).
211  <li>HDF5File::Default: Same as ReadOnly.
212  </ul>
213  The supported compression algorithms are:
214  <ul>
215  <li>ZLIB_FAST: Fast compression using 'zlib' (slower than LZ4, but higher compression).
216  <li>ZLIB_BEST: Best compression using 'zlib', slow.
217  <li>ZLIB_NONE: Use 'zlib' format without compression.
218  <li>DEFAULT_COMPRESSION: Same as ZLIB_FAST.
219  </ul>
220  */
221  ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
222  HDF5File::OpenMode mode = HDF5File::ReadOnly,
223  ChunkedArrayOptions const & options = ChunkedArrayOptions(),
224  Alloc const & alloc = Alloc())
225  : ChunkedArray<N, T>(shape_type(), shape_type(), options),
226  file_(file),
227  dataset_name_(dataset),
228  dataset_(),
229  compression_(options.compression_method),
230  alloc_(alloc)
231  {
232  init(mode);
233  }
234 
235  void init(HDF5File::OpenMode mode)
236  {
237  bool exists = file_.existsDataset(dataset_name_);
238 
239  if(mode == HDF5File::Replace)
240  {
241  mode = HDF5File::New;
242  }
243  else if(mode == HDF5File::Default)
244  {
245  if(exists)
246  mode = HDF5File::ReadOnly;
247  else
248  mode = HDF5File::New;
249  }
250 
251  if(mode == HDF5File::ReadOnly)
252  file_.setReadOnly();
253  else
254  vigra_precondition(!file_.isReadOnly(),
255  "ChunkedArrayHDF5(): 'mode' is incompatible with read-only file.");
256 
257  vigra_precondition(exists || !file_.isReadOnly(),
258  "ChunkedArrayHDF5(): dataset does not exist, but file is read-only.");
259 
260  if(!exists || mode == HDF5File::New)
261  {
262  // FIXME: set rdcc_nbytes to 0 (disable cache, because we don't
263  // need two caches
264  // H5Pset_chunk_cache (dapl, rdcc_nslots, rdcc_nbytes, rdcc_w0);
265  // Chunk cache size (rdcc_nbytes) should be large
266  // enough to hold all the chunks in a selection
267  // • If this is not possible, it may be best to disable chunk
268  // caching altogether (set rdcc_nbytes to 0)
269  // • rdcc_slots should be a prime number that is at
270  // least 10 to 100 times the number of chunks that can fit
271  // into rdcc_nbytes
272  // • rdcc_w0 should be set to 1 if chunks that have been
273  // fully read/written will never be read/written again
274  //
275  // the above may be WRONG in general - it may only apply if the
276  // chunk size in the file matches the chunk size in the CachedArray.
277  // Otherwise, make sure that the file cache can hold at least as many
278  // chunks as are needed for a single array chunk.
279  if(compression_ == DEFAULT_COMPRESSION)
280  compression_ = ZLIB_FAST;
281  vigra_precondition(compression_ != LZ4,
282  "ChunkedArrayHDF5(): HDF5 does not support LZ4 compression.");
283 
284  vigra_precondition(this->size() > 0,
285  "ChunkedArrayHDF5(): invalid shape.");
286  typename detail::HDF5TypeTraits<T>::value_type init(this->fill_scalar_);
287  dataset_ = file_.createDataset<N, T>(dataset_name_,
288  this->shape_,
289  init,
290  this->chunk_shape_,
291  compression_);
292  }
293  else
294  {
295  dataset_ = file_.getDatasetHandleShared(dataset_name_);
296 
297  // check shape
298  ArrayVector<hsize_t> fileShape(file_.getDatasetShape(dataset_name_));
299  typedef detail::HDF5TypeTraits<T> TypeTraits;
300  if(TypeTraits::numberOfBands() > 1)
301  {
302  vigra_precondition(fileShape.size() == N+1,
303  "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
304  vigra_precondition(fileShape[0] == TypeTraits::numberOfBands(),
305  "ChunkedArrayHDF5(file, dataset): dataset has wrong number of bands.");
306  shape_type shape(fileShape.begin()+1);
307  if(this->size() > 0)
308  {
309  vigra_precondition(shape == this->shape_,
310  "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
311  }
312  else
313  {
314  this->shape_ = shape;
315  }
316  }
317  else
318  {
319  vigra_precondition(fileShape.size() == N,
320  "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
321  shape_type shape(fileShape.begin());
322  if(this->size() > 0)
323  {
324  vigra_precondition(shape == this->shape_,
325  "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
326  }
327  else
328  {
329  this->shape_ = shape;
330  ChunkStorage(detail::computeChunkArrayShape(shape, this->bits_, this->mask_)).swap(this->handle_array_);
331  }
332  }
333  typename ChunkStorage::iterator i = this->handle_array_.begin(),
334  end = this->handle_array_.end();
335  for(; i != end; ++i)
336  {
337  i->chunk_state_.store(base_type::chunk_asleep);
338  }
339  }
340  }
341 
343  {
344  closeImpl(true);
345  }
346 
347  void close()
348  {
349  closeImpl(false);
350  }
351 
352  void closeImpl(bool force_destroy)
353  {
354  flushToDiskImpl(true, force_destroy);
355  file_.close();
356  }
357 
358  void flushToDisk()
359  {
360  flushToDiskImpl(false, false);
361  }
362 
363  void flushToDiskImpl(bool destroy, bool force_destroy)
364  {
365  if(file_.isReadOnly())
366  return;
367 
368  threading::lock_guard<threading::mutex> guard(*this->chunk_lock_);
369  typename ChunkStorage::iterator i = this->handle_array_.begin(),
370  end = this->handle_array_.end();
371  if(destroy && !force_destroy)
372  {
373  for(; i != end; ++i)
374  {
375  vigra_precondition(i->chunk_state_.load() <= 0,
376  "ChunkedArrayHDF5::close(): cannot close file because there are active chunks.");
377  }
378  i = this->handle_array_.begin();
379  }
380  for(; i != end; ++i)
381  {
382  Chunk * chunk = static_cast<Chunk*>(i->pointer_);
383  if(!chunk)
384  continue;
385  if(destroy)
386  {
387  delete chunk;
388  i->pointer_ = 0;
389  }
390  else
391  {
392  chunk->write(false);
393  }
394  }
395  file_.flushToDisk();
396  }
397 
398  virtual bool isReadOnly() const
399  {
400  return file_.isReadOnly();
401  }
402 
403  virtual pointer loadChunk(ChunkBase<N, T> ** p, shape_type const & index)
404  {
405  vigra_precondition(file_.isOpen(),
406  "ChunkedArrayHDF5::loadChunk(): file was already closed.");
407  if(*p == 0)
408  {
409  *p = new Chunk(this->chunkShape(index), index*this->chunk_shape_, this, alloc_);
410  this->overhead_bytes_ += sizeof(Chunk);
411  }
412  return static_cast<Chunk *>(*p)->read();
413  }
414 
415  virtual bool unloadChunk(ChunkBase<N, T> * chunk, bool /* destroy */)
416  {
417  if(!file_.isOpen())
418  return true;
419  static_cast<Chunk *>(chunk)->write();
420  return false;
421  }
422 
423  virtual std::string backend() const
424  {
425  return "ChunkedArrayHDF5<'" + file_.filename() + "/" + dataset_name_ + "'>";
426  }
427 
428  virtual std::size_t dataBytes(ChunkBase<N,T> * c) const
429  {
430  return c->pointer_ == 0
431  ? 0
432  : static_cast<Chunk*>(c)->size()*sizeof(T);
433  }
434 
435  virtual std::size_t overheadBytesPerChunk() const
436  {
437  return sizeof(Chunk) + sizeof(SharedChunkHandle<N, T>);
438  }
439 
440  std::string fileName() const
441  {
442  return file_.filename();
443  }
444 
445  std::string datasetName() const
446  {
447  return dataset_name_;
448  }
449 
450  HDF5File file_;
451  std::string dataset_name_;
452  HDF5HandleShared dataset_;
453  CompressionMethod compression_;
454  Alloc alloc_;
455 };
456 
457 //@}
458 
459 } // namespace vigra
460 
461 #undef VIGRA_ASSERT_INSIDE
462 
463 #endif /* VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX */
Sequential iterator for MultiArrayView.
Definition: multi_fwd.hxx:161
std::size_t dataBytes() const
Bytes of main memory occupied by the array&#39;s data.
Definition: multi_array_chunked.hxx:1677
Option object for ChunkedArray construction.
Definition: multi_array_chunked.hxx:1274
MultiArrayIndex size() const
Return the number of elements in this array.
ChunkedArrayHDF5(HDF5File const &file, std::string const &dataset, HDF5File::OpenMode mode=HDF5File::ReadOnly, ChunkedArrayOptions const &options=ChunkedArrayOptions(), Alloc const &alloc=Alloc())
Construct for an already existing dataset with given &#39;options&#39;, using &#39;alloc&#39; to manage the in-memory...
Definition: multi_array_chunked_hdf5.hxx:221
Main MultiArray class containing the memory management.
Definition: multi_array.hxx:2422
shape_type const & shape() const
Return the shape in this array.
Interface and base class for chunked arrays.
Definition: multi_array_chunked.hxx:470
Definition: accessor.hxx:43
view_type::difference_type difference_type
Definition: multi_array.hxx:2470
shape_type const & chunkShape() const
Return the global chunk shape.
Wrapper for shared hid_t objects.
Definition: hdf5impex.hxx:410
NumericTraits< V >::Promote prod(TinyVectorBase< V, SIZE, D1, D2 > const &l)
product of the vector&#39;s elements
Definition: tinyvector.hxx:2097
Definition: multi_array_chunked_hdf5.hxx:68
ChunkedArrayHDF5(HDF5File const &file, std::string const &dataset, HDF5File::OpenMode mode, shape_type const &shape, shape_type const &chunk_shape=shape_type(), ChunkedArrayOptions const &options=ChunkedArrayOptions(), Alloc const &alloc=Alloc())
Construct with given &#39;shape&#39;, &#39;chunk_shape&#39; and &#39;options&#39;, using &#39;alloc&#39; to manage the in-memory vers...
Definition: multi_array_chunked_hdf5.hxx:183
iterator end()
Create the end iterator for scan-order iteration over the entire chunked array.
Definition: multi_array_chunked.hxx:2391
OpenMode
Set how a file is opened.
Definition: hdf5impex.hxx:1013
Class for fixed size vectors.This class contains an array of size SIZE of the specified VALUETYPE...
Definition: accessor.hxx:940
Base class for, and view to, vigra::MultiArray.
Definition: multi_array.hxx:652
virtual std::size_t overheadBytesPerChunk() const
Bytes of main memory needed to manage a single chunk.
Definition: multi_array_chunked_hdf5.hxx:435
Access to HDF5 files.
Definition: hdf5impex.hxx:956

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.11.0