VDCMA.h
Go to the documentation of this file.
1 /*!
2  * \brief Implements the VD-CMA-ES Algorithm
3  *
4  * \author Oswin Krause
5  * \date April 2014
6  *
7  * \par Copyright 1995-2015 Shark Development Team
8  *
9  * <BR><HR>
10  * This file is part of Shark.
11  * <http://image.diku.dk/shark/>
12  *
13  * Shark is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU Lesser General Public License as published
15  * by the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * Shark is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public License
24  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
25  *
26  */
27 
28 #ifndef SHARK_ALGORITHMS_DIRECT_SEARCH_VD_CMA_H
29 #define SHARK_ALGORITHMS_DIRECT_SEARCH_VD_CMA_H
30 
34 
38 
39 
40 /// \brief Implements the VD-CMA-ES Algorithm
41 ///
42 /// The VD-CMA-ES implements a restricted form of the CMA-ES where the covariance matrix is restriced to be (D+vv^T)
43 /// where D is a diagonal matrix and v a single vector. Therefore this variant is capable of large-scale optimisation
44 ///
45 /// For more reference, see the paper
46 /// Akimoto, Y., A. Auger, and N. Hansen (2014). Comparison-Based Natural Gradient Optimization in High Dimension.
47 /// To appear in Genetic and Evolutionary Computation Conference (GECCO 2014), Proceedings, ACM
48 ///
49 /// The implementation differs from the paper to be closer to the reference implementation and to have better numerical
50 /// accuracy.
51 namespace shark {
52 class VDCMA : public AbstractSingleObjectiveOptimizer<RealVector >
53 {
54 private:
55  double chi( std::size_t n ) {
56  return( std::sqrt( static_cast<double>( n ) )*(1. - 1./(4.*n) + 1./(21.*n*n)) );
57  }
58 public:
59 
60  /// \brief Default c'tor.
61  VDCMA(DefaultRngType& rng = Rng::globalRng):m_initialSigma(0.0), mpe_rng(&rng){
63  }
64 
65  /// \brief From INameable: return the class name.
66  std::string name() const
67  { return "VDCMA-ES"; }
68 
69  /// \brief Calculates lambda for the supplied dimensionality n.
70  std::size_t suggestLambda( std::size_t dimension ) {
71  return unsigned( 4. + ::floor( 3. * ::log( static_cast<double>( dimension ) ) ) ); // eq. (44)
72  }
73 
74  /// \brief Calculates mu for the supplied lambda and the recombination strategy.
75  std::size_t suggestMu( std::size_t lambda) {
76  return lambda / 2; // eq. (44)
77  }
78 
80 
81  void init( ObjectiveFunctionType& function, SearchPointType const& p) {
82  checkFeatures(function);
83  function.init();
84 
85  std::size_t lambda = suggestLambda( p.size() );
86  std::size_t mu = suggestMu( lambda );
87  double sigma = m_initialSigma;
88  if(m_initialSigma == 0)
89  sigma = 1.0/std::sqrt(double(p.size()));
90 
91  init( function,
92  p,
93  lambda,
94  mu,
95  sigma
96  );
97  }
98 
99  /// \brief Initializes the algorithm for the supplied objective function.
100  void init(
101  ObjectiveFunctionType const& function,
102  SearchPointType const& initialSearchPoint,
103  std::size_t lambda,
104  std::size_t mu,
105  double initialSigma
106  ) {
107 
108  m_numberOfVariables = function.numberOfVariables();
109  m_lambda = lambda;
110  m_mu = mu;
111  m_sigma = initialSigma;
112 
113  m_mean = blas::repeat(0.0,m_numberOfVariables);
114  m_vn.resize(m_numberOfVariables);
115  for(std::size_t i = 0; i != m_numberOfVariables;++i){
116  m_vn(i) = uni(*mpe_rng,0,1.0/m_numberOfVariables);
117  }
118  m_normv = norm_2(m_vn);
119  m_vn /= m_normv;
120 
121  m_D = blas::repeat(1.0,m_numberOfVariables);
122  m_evolutionPathC = blas::repeat(0.0,m_numberOfVariables);
123  m_evolutionPathSigma = blas::repeat(0.0,m_numberOfVariables);
124 
125  //set initial point
126  m_mean = initialSearchPoint;
127  m_best.point = initialSearchPoint;
128  m_best.value = function(initialSearchPoint);
129 
130  m_counter = 0;//first iteration
131 
132  //weighting of the mu-best individuals
133  m_weights.resize(m_mu);
134  for (std::size_t i = 0; i < m_mu; i++){
135  m_weights(i) = ::log(mu + 0.5) - ::log(1. + i);
136  }
137  m_weights /= sum(m_weights);
138 
139  // constants based on (4) and Step 3 in the algorithm
140  m_muEff = 1. / sum(sqr(m_weights)); // equal to sum(m_weights)^2 / sum(sqr(m_weights))
141  m_cSigma = 0.5/(1+std::sqrt(m_numberOfVariables/m_muEff));
142  m_dSigma = 1. + 2. * std::max(0., std::sqrt((m_muEff-1.)/(m_numberOfVariables+1)) - 1.) + m_cSigma;
143 
144  m_cC = (4. + m_muEff / m_numberOfVariables) / (m_numberOfVariables + 4. + 2 * m_muEff / m_numberOfVariables);
145  double correction = (m_numberOfVariables - 5.0)/6.0;
146  m_c1 = correction*2 / (sqr(m_numberOfVariables + 1.3) + m_muEff);
147  m_cMu = std::min(1. - m_c1, correction* 2 * (m_muEff - 2. + 1./m_muEff) / (sqr(m_numberOfVariables + 2) + m_muEff));
148  }
149 
150  /// \brief Executes one iteration of the algorithm.
151  void step(ObjectiveFunctionType const& function){
152 
153  std::vector< Individual<RealVector, double, RealVector> > offspring( m_lambda );
154 
155  PenalizingEvaluator penalizingEvaluator;
156  for( std::size_t i = 0; i < offspring.size(); i++ ) {
157  createSample(offspring[i].searchPoint(),offspring[i].chromosome());
158  }
159  penalizingEvaluator( function, offspring.begin(), offspring.end() );
160 
161  // Selection
162  std::vector< Individual<RealVector, double, RealVector> > parents( m_mu );
164  selection(offspring.begin(),offspring.end(),parents.begin(), parents.end());
165  // Strategy parameter update
166  m_counter++; // increase generation counter
167  updateStrategyParameters( parents );
168 
169  m_best.point= parents[ 0 ].searchPoint();
170  m_best.value= parents[ 0 ].unpenalizedFitness();
171  }
172 
173  /// \brief Accesses the current step size.
174  double sigma() const {
175  return m_sigma;
176  }
177 
178  /// \brief Accesses the current step size.
179  void setSigma(double sigma) {
180  m_sigma = sigma;
181  }
182 
183  /// \brief set the initial step size of the algorithm.
184  ///
185  /// Sets the initial sigma at init to a given value. If this is 0, which it is
186  /// by default, the default initialisation will be sigma= 1/sqrt(N) where N
187  /// is the number of variables to optimize.
188  ///
189  /// this method is the prefered one instead of init()
190  void setInitialSigma(double initialSigma){
191  m_initialSigma = initialSigma;
192  }
193 
194 
195  /// \brief Accesses the current population mean.
196  RealVector const& mean() const {
197  return m_mean;
198  }
199 
200  /// \brief Accesses the current weighting vector.
201  RealVector const& weights() const {
202  return m_weights;
203  }
204 
205  /// \brief Accesses the evolution path for the covariance matrix update.
206  RealVector const& evolutionPath() const {
207  return m_evolutionPathC;
208  }
209 
210  /// \brief Accesses the evolution path for the step size update.
211  RealVector const& evolutionPathSigma() const {
212  return m_evolutionPathSigma;
213  }
214 
215  ///\brief Returns the size of the parent population \f$\mu\f$.
216  std::size_t mu() const {
217  return m_mu;
218  }
219 
220  ///\brief Returns a mutabl reference to the size of the parent population \f$\mu\f$.
221  std::size_t& mu(){
222  return m_mu;
223  }
224 
225  ///\brief Returns a immutable reference to the size of the offspring population \f$\mu\f$.
226  std::size_t lambda()const{
227  return m_lambda;
228  }
229 
230  ///\brief Returns a mutable reference to the size of the offspring population \f$\mu\f$.
231  std::size_t & lambda(){
232  return m_lambda;
233  }
234 
235 private:
236  /// \brief Updates the strategy parameters based on the supplied offspring population.
237  ///
238  /// The chromosome stores the y-vector that is the step from the mean in D=1, sigma=1 space.
239  void updateStrategyParameters( std::vector<Individual<RealVector, double, RealVector> >& offspring ) {
240  RealVector m( m_numberOfVariables, 0. );
241  RealVector z( m_numberOfVariables, 0. );
242 
243  for( std::size_t j = 0; j < offspring.size(); j++ ){
244  noalias(m) += m_weights( j ) * offspring[j].searchPoint();
245  noalias(z) += m_weights( j ) * offspring[j].chromosome();
246  }
247  //compute z from y= (1+(sqrt(1+||v||^2)-1)v_n v_n^T)z
248  //therefore z= (1+(1/sqrt(1+||v||^2)-1)v_n v_n^T)y
249  double b=(1/std::sqrt(1+sqr(m_normv))-1);
250  noalias(z)+= b*inner_prod(z,m_vn)*m_vn;
251 
252  //update paths
253  noalias(m_evolutionPathSigma) = (1. - m_cSigma)*m_evolutionPathSigma + std::sqrt( m_cSigma * (2. - m_cSigma) * m_muEff ) * z;
254  // compute h_sigma
255  double hSigLHS = norm_2( m_evolutionPathSigma ) / std::sqrt(1. - pow((1 - m_cSigma), 2.*(m_counter+1)));
256  double hSigRHS = (1.4 + 2 / (m_numberOfVariables+1.)) * chi( m_numberOfVariables );
257  double hSig = 0;
258  if(hSigLHS < hSigRHS) hSig = 1.;
259  noalias(m_evolutionPathC) = (1. - m_cC ) * m_evolutionPathC + hSig * std::sqrt( m_cC * (2. - m_cC) * m_muEff ) * (m - m_mean) / m_sigma;
260 
261 
262 
263  //we split the computation of s and t in the paper in two parts
264  //we compute the first two steps and then compute the weighted mean over samples and
265  //evolution path. afterwards we compute the rest using the mean result
266  //the paper describes this as first computing S and T for all samples and compute the weighted
267  //mean of that, but the reference implementation does it the other way to prevent numerical instabilities
268  RealVector meanS(m_numberOfVariables,0.0);
269  RealVector meanT(m_numberOfVariables,0.0);
270  for(std::size_t j = 0; j != mu(); ++j){
271  computeSAndTFirst(offspring[j].chromosome(),meanS,meanT,m_cMu*m_weights(j));
272  }
273  computeSAndTFirst(m_evolutionPathC/m_D,meanS,meanT,hSig*m_c1);
274 
275  //compute the remaining mean S and T steps
276  computeSAndTSecond(meanS,meanT);
277 
278  //compute update to v and d
279  noalias(m_D) += m_D*meanS;
280  noalias(m_vn) = m_vn*m_normv+meanT/m_normv;//result is v and not vn
281  //store the new v separately as vn and its norm
282  m_normv = norm_2(m_vn);
283  m_vn /= m_normv;
284 
285  //update step length
286  m_sigma *= std::exp( (m_cSigma / m_dSigma) * (norm_2(m_evolutionPathSigma)/ chi( m_numberOfVariables ) - 1.) ); // eq. (39)
287 
288  //update mean
289  m_mean = m;
290  }
291 
292  //samples a point and stores additionally y=(x-m_mean)/(sigma*D)
293  //as this is required for calculation later
294  void createSample(RealVector& x,RealVector& y)const{
295  x.resize(m_numberOfVariables);
296  y.resize(m_numberOfVariables);
297  for(std::size_t i = 0; i != m_numberOfVariables; ++i){
298  y(i) = gauss(*mpe_rng,0,1);
299  }
300  double a = std::sqrt(1+sqr(m_normv))-1;
301  a *= inner_prod(y,m_vn);
302  noalias(y) +=a*m_vn;
303  noalias(x) = m_mean+ m_sigma*m_D*y;
304  }
305 
306  ///\brief computes the sample wise first two steps of S and T of theorem 3.6 in the paper
307  ///
308  /// S and T arguments accordingly
309  void computeSAndTFirst(RealVector const& y, RealVector& s,RealVector& t, double weight )const{
310  if(weight == 0) return;//nothing to do
311  double yvn = inner_prod(y,m_vn);
312  double normv2 = sqr(m_normv);
313  double gammav = 1+normv2;
314  //step 1
315  noalias(s) += weight*(sqr(y) - (normv2/gammav*yvn)*(y*m_vn)- 1.0);
316  //step 2
317  noalias(t) += weight*(yvn*y - 0.5*(sqr(yvn)+gammav)*m_vn);
318  }
319 
320  ///\brief computes the last three steps of S and T of theorem 3.6 in the paper
321  void computeSAndTSecond(RealVector& s,RealVector& t)const{
322  RealVector vn2 = m_vn*m_vn;
323  double normv2 = sqr(m_normv);
324  double gammav = 1+normv2;
325  //alpha of 3.5
326  double alpha = sqr(normv2)+(2*gammav - std::sqrt(gammav))/max(vn2);
327  alpha=std::sqrt(alpha);
328  alpha /= 2+normv2;
329  alpha = std::min(alpha,1.0);
330  //constants (b,A) of 3.4
331  double b=-(1-sqr(alpha))*sqr(normv2)/gammav+2*sqr(alpha);
332  RealVector A= 2.0 - (b+2*sqr(alpha))*vn2;
333  RealVector invAvn2= vn2/A;
334 
335  //step 3
336  noalias(s) -= alpha/gammav*((2+normv2)*(m_vn*t)-normv2*inner_prod(m_vn,t)*vn2);
337  //step 4
338  noalias(s) = s/A -b*inner_prod(s,invAvn2)/(1+b*inner_prod(vn2,invAvn2))*invAvn2;
339  //step 5
340  noalias(t) -= alpha*((2+normv2)*(m_vn*s)-inner_prod(s,vn2)*m_vn);
341  }
342 
343  std::size_t m_numberOfVariables; ///< Stores the dimensionality of the search space.
344  std::size_t m_mu; ///< The size of the parent population.
345  std::size_t m_lambda; ///< The size of the offspring population, needs to be larger than mu.
346 
347  double m_initialSigma;///0 by default which indicates initial sigma = 1/sqrt(N)
348  double m_sigma;
349  double m_cC;
350  double m_c1;
351  double m_cMu;
352  double m_cSigma;
353  double m_dSigma;
354  double m_muEff;
355 
356  RealVector m_mean;
357  RealVector m_weights;
358 
359  RealVector m_evolutionPathC;
360  RealVector m_evolutionPathSigma;
361 
362  ///\brief normalised vector v
363  RealVector m_vn;
364  ///\brief norm of the vector v, therefore v=m_vn*m_normv
365  double m_normv;
366 
367  RealVector m_D;
368 
369  unsigned m_counter; ///< counter for generations
370 
371  DefaultRngType* mpe_rng;
372 
373 
374 };
375 
376 }
377 
378 #endif