PartlyPrecomputedMatrix.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Partly Precomputed version of a matrix for quadratic programming
6  *
7  *
8  * \par
9  *
10  *
11  *
12  * \author T. Glasmachers, A. Demircioglu
13  * \date 2007-2014
14  *
15  *
16  * \par Copyright 1995-2015 Shark Development Team
17  *
18  * <BR><HR>
19  * This file is part of Shark.
20  * <http://image.diku.dk/shark/>
21  *
22  * Shark is free software: you can redistribute it and/or modify
23  * it under the terms of the GNU Lesser General Public License as published
24  * by the Free Software Foundation, either version 3 of the License, or
25  * (at your option) any later version.
26  *
27  * Shark is distributed in the hope that it will be useful,
28  * but WITHOUT ANY WARRANTY; without even the implied warranty of
29  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30  * GNU Lesser General Public License for more details.
31  *
32  * You should have received a copy of the GNU Lesser General Public License
33  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
34  *
35  */
36 //===========================================================================
37 
38 
39 #ifndef SHARK_LINALG_PARTLYPRECOMPUTEDMATRIX_H
40 #define SHARK_LINALG_PARTLYPRECOMPUTEDMATRIX_H
41 
42 #include <shark/Data/Dataset.h>
43 #include <shark/LinAlg/Base.h>
44 
45 #include <vector>
46 #include <cmath>
47 
48 
49 namespace shark
50 {
51 
52 ///
53 /// \brief Partly Precomputed version of a matrix for quadratic programming
54 ///
55 /// \par
56 /// The PartlyPrecomputedMatrix class computes all pairs of kernel
57 /// evaluations that fits the given cachesize in its constructor and
58 /// stores them im memory.
59 ///
60 /// \par
61 /// Use of this class may be beneficial for certain model
62 /// selection strategies, where the whole matrix does not fit into
63 /// memory, and the LRU cache will produce too much hit rates,
64 /// so that at least partially caching the kernel matrix will help.
65 /// In particular this will help the KernelSGD/Pegasos algorithm.
66 ///
67 template <class Matrix>
69 {
70 public:
71  typedef typename Matrix::QpFloatType QpFloatType;
72 
73  /// Constructor
74  /// \param[in] base matrix to be cached. it is assumed that this matrix is not precomputed,
75  /// but the (costy) computation takes place every time an entry is queried.
76  /// \param[in] cachesize size of the cache to use in bytes. the size of the cached matrix will
77  // depend on this value.
78  PartlyPrecomputedMatrix(Matrix* base, std::size_t cachesize = 0x4000000)
79  : m_cacheSize(cachesize)
80  , m_baseMatrix(base)
81  {
82  if((m_baseMatrix == NULL) || (m_baseMatrix ->size() == 0))
83  throw SHARKEXCEPTION("Cannot cache a NULL matrix!");
84 
85  // remember the original size of the matrix
87 
88  // determine how many bytes we need for a single row
89  size_t rowSizeBytes = m_originalNumberOfRows * sizeof(QpFloatType);
90 
91  // how many rows fit into our cache?
92  size_t m_nRows = (size_t) m_cacheSize / rowSizeBytes;
93  if(m_nRows < 1)
94  throw SHARKEXCEPTION("Cache size is smaller than the size of a row!");
95 
96  // if we have more space than needed, well, we do not need it.
97  if(m_nRows > m_originalNumberOfRows)
98  m_nRows = m_originalNumberOfRows ;
99 
100  // resize matrix
101  m_cachedMatrix.resize(m_nRows, m_baseMatrix ->size());
102 
103  // copy the rows
104  for(std::size_t r = 0; r < m_cachedMatrix.size1(); r++)
105  {
106  for(std::size_t j = 0; j < m_baseMatrix->size(); j++)
107  {
108  m_cachedMatrix(r, j) = (*m_baseMatrix)(r, j);
109  }
110  }
111  }
112 
113 
114 
115  /// return, if a given row is cached
116  /// \param[in] k row to check
117  /// \return is given row in cached matrix or not?
118  bool isCached(std::size_t k) const
119  {
120  if(k < m_cachedMatrix.size1())
121  return true;
122  return false;
123  }
124 
125 
126 
127  /// return a complete row of the matrix.
128  /// if the row is cached, it will be returned from there, if not, it will
129  /// be recomputed on-the-fly and not stored.
130  /// param[in] k row to compute
131  /// param[in] storage vector to store the row. must be the same size as a row!
132  void row(std::size_t k, blas::vector<QpFloatType> &storage) const
133  {
135  RANGE_CHECK(0 <= k);
136  SIZE_CHECK(storage.size() == m_cachedMatrix.size2());
137  if(isCached(k) == true)
138  {
139  for(std::size_t j = 0; j < m_cachedMatrix.size2(); j++)
140  {
141  storage[j] = m_cachedMatrix(k, j);
142  }
143  }
144  else
145  {
146  for(std::size_t j = 0; j < m_cachedMatrix.size2(); j++)
147  {
148  storage[j] = (*m_baseMatrix)(k, j);
149  }
150  }
151  }
152 
153 
154 
155  /// return a single matrix entry
156  /// param[in] i row of entry
157  /// param[in] j column entry
158  /// @return value of matrix at given position
159  QpFloatType operator()(std::size_t i, std::size_t j) const
160  {
161  return entry(i, j);
162  }
163 
164 
165 
166  /// return a single matrix entry
167  /// param[in] i row of entry
168  /// param[in] j column entry
169  /// @return value of matrix at given position
170  QpFloatType entry(std::size_t i, std::size_t j) const
171  {
172  // check if we have to compute that or not
173  if(isCached(i))
174  return m_cachedMatrix(i, j);
175 
176  // ok, need to compute that element
177  return (*m_baseMatrix)(i, j);
178  }
179 
180 
181 
182  /// return the number of cached rows
183  /// @return number of rows that are cached
184  std::size_t size() const
185  {
186  return m_cachedMatrix.size();
187  }
188 
189 
190 
191  /// return size of cached matrix in QpFloatType units
192  /// @return the capacity of the cached matrix in QpFloatType units
193  std::size_t getMaxCacheSize()
194  {
195  return m_cachedMatrix.size() * m_cachedMatrix.size2();
196  }
197 
198 
199 
200  /// return the dimension of a row in the cache (as we do not shorten our
201  /// rows, this must be the same as the dimension of a row in the original kernel matrix).
202  /// @return dimension of any cached row
203  std::size_t getCacheRowSize() const
204  {
205  return m_cachedMatrix.size2();
206  }
207 
208 
209 
210 protected:
211  /// container for precomputed values
213 
214  // maximal size of cache
215  size_t m_cacheSize;
216 
217  // original kernel matrix, will be accessed if entries outsied the cache are requested
218  Matrix* m_baseMatrix;
219 
220  // remember how big the original matrix was to prevent access errors
222 };
223 
224 }
225 #endif