casacore
AutoDiff.h
Go to the documentation of this file.
1 //# AutoDiff.h: An automatic differentiating class for functions
2 //# Copyright (C) 1995,1998,1999,2001,2002
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //#
27 //# $Id$
28 
29 #ifndef SCIMATH_AUTODIFF_H
30 #define SCIMATH_AUTODIFF_H
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
34 #include <casacore/scimath/Mathematics/AutoDiffRep.h>
35 #include <casacore/casa/Containers/ObjectPool.h>
36 #include <casacore/casa/OS/Mutex.h>
37 
38 namespace casacore { //# NAMESPACE CASACORE - BEGIN
39 
40 //# Forward declarations
41 template <class T> class Vector;
42 template <class T> class AutoDiff;
43 
44 // <summary>
45 // Class that computes partial derivatives by automatic differentiation.
46 // </summary>
47 //
48 // <use visibility=export>
49 //
50 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tAutoDiff.cc" demos="dAutoDiff.cc">
51 // </reviewed>
52 //
53 // <prerequisite>
54 // <li>
55 // </prerequisite>
56 //
57 // <etymology>
58 // Class that computes partial derivatives by automatic differentiation, thus
59 // AutoDiff.
60 // </etymology>
61 //
62 // <synopsis>
63 // Class that computes partial derivatives by automatic differentiation.
64 // It does this by storing the value of a function and the values of its first
65 // derivatives with respect to its independent parameters. When a mathematical
66 // operation is applied to an AutoDiff object, the derivative values of the
67 // resulting new object are computed according to chain rules
68 // of differentiation.
69 //
70 // Suppose we have a function f(x0,x1,...,xn) and its differential is
71 // <srcblock>
72 // df = (df/dx0)*dx0 + (df/dx1)*dx1 + ... + (df/dxn)*dxn
73 // </srcblock>
74 // We can build a class that has the value of the function,
75 // f(x0,x1,...,xn), and the values of the derivatives, (df/dx0), (df/dx1),
76 // ..., (df/dxn) at (x0,x1,...,xn), as class members.
77 //
78 // Now if we have another function, g(x0,x1,...,xn) and its differential is
79 // dg = (dg/dx0)*dx0 + (dg/dx1)*dx1 + ... + (dg/dxn)*dxn,
80 // since
81 // <srcblock>
82 // d(f+g) = df + dg,
83 // d(f*g) = g*df + f*dg,
84 // d(f/g) = df/g - fdg/g^2,
85 // dsin(f) = cos(f)df,
86 // ...,
87 // </srcblock>
88 // we can calculate
89 // <srcblock>
90 // d(f+g), d(f*g), ...,
91 // </srcblock> based on our information on
92 // <srcblock>
93 // df/dx0, df/dx1, ..., dg/dx0, dg/dx1, ..., dg/dxn.
94 // </srcblock>
95 // All we need to do is to define the operators and derivatives of common
96 // mathematical functions.
97 //
98 // To be able to use the class as an automatic differentiator of a function
99 // object, we need a templated function object, i.e. an object with:
100 // <ul>
101 // <li> a <src> template <class T> T operator()(const T)</src>
102 // <li> or multiple variable input like:
103 // <src> template <class T> T operator()(const Vector<T> &)</src>
104 // <li> all variables and constants used in the calculation of the function
105 // value should have been typed with T
106 // </ul>
107 // A simple example of such a function object could be:
108 // <srcblock>
109 // template <class T> f {
110 // public:
111 // T operator()(const T &x, const T &a, const T &b) {
112 // return a*b*x; }
113 // };
114 // // Instantiate the following versions:
115 // template class f<Double>;
116 // template class f<AutoDiff<Double> >;
117 // </srcblock>
118 // A call with values will produce the function value:
119 // <srcblock>
120 // cout << f(7.0, 2.0, 3.0) << endl;
121 // // will produce the value at x=7 for a=2; b=3:
122 // 42
123 // // But a call indicating that we want derivatives to a and b:
124 // cout << f(AutoDiff<Double>(7.0), AutoDiff<Double>(2.0, 2, 0),
125 // AutoDiff<Double>(3.0, 2, 1)) << endl;
126 // // will produce the value at x=7 for a=2; b=3:
127 // // and the partial derivatives wrt a and b at x=7:
128 // (42, [21, 14])
129 // // The following will calculate the derivate wrt x:
130 // cout << f(AutoDiff<Double>(7.0, 1, 0), AutoDiff<Double>(2.0),
131 // AutoDiff<Double>(3.0)) << endl;
132 // (42,[6])
133 // </srcblock>
134 // In actual practice, there are a few rules to obey for the structure of
135 // the function object if you want to use the function object and its
136 // derivatives in least squares fitting procedures in the Fitting
137 // module. The major one is to view the function object having 'fixed' and
138 // 'variable' parameters. I.e., rather than viewing the function as
139 // depending on parameters <em>a, b, x</em> (<src>f(a,b,x)</src>), the
140 // function is considered to be <src>f(x; a,b)</src>, where <em>a, b</em>
141 // are 'fixed' parameters, and <em>x</em> a variable parameter.
142 // Fixed parameters should be contained in a
143 // <linkto class=FunctionParam>FunctionParam</linkto> container object;
144 // while the variable parameter(s) are given in the function
145 // <src>operator()</src>. See <linkto class=Function>Function</linkto> class
146 // for details.
147 //
148 // A Gaussian spectral profile would in general have the center frequency,
149 // the width and the amplitude as fixed parameters, and the frequency as
150 // a variable. Given a spectrum, you would solve for the fixed parameters,
151 // given spectrum values. However, in other cases the role of the
152 // parameters could be reversed. An example could be a whole stack of
153 // observed (in the laboratory) spectra at different temperatures at
154 // one frequency. In that case the width would be the variable parameter,
155 // and the frequency one of the fixed (and to be solved for)parameters.
156 //
157 // Since the calculation of the derivatives is done with simple overloading,
158 // the calculation of second (and higher) derivatives is easy. It should be
159 // noted that higher deivatives are inefficient in the current incarnation
160 // (there is no knowledge e.g. about symmetry in the Jacobian). However,
161 // it is a very good way to get the correct answers of the derivatives. In
162 // practice actual production code will be better off with specialization
163 // of the <src>f<AutoDiff<> ></src> implementation.
164 //
165 // The <src>AutoDiff</src> class is the class the user communicates with.
166 // Alias classes (<linkto class=AutoDiffA>AutoDiffA</linkto> and
167 // <linkto class=AutoDiffA>AutoDiffX</linkto>) exists
168 // to make it possible to have different incarnations of a templated
169 // method (e.g. a generic one and a specialized one). See the
170 // <src>dAutoDiff</src> demo for an example of its use.
171 //
172 // All operators and functions are declared in <linkto file=AutoDiffMath.h>
173 // AutoDiffMath</linkto>. The output operator in
174 // <linkto file=AutoDiffIO.h>AutoDiffIO</linkto>. The actual structure of the
175 // data block used by <src>AutoDiff</src> is described in
176 // <linkto class=AutoDiffRep>AutoDiffRep</linkto>.
177 // </synopsis>
178 //
179 // <example>
180 // <srcblock>
181 // // First a simple example.
182 // // We have a function of the form f(x,y,z); and want to know the
183 // // value of the function for x=10; y=20; z=30; and for
184 // // the derivatives at those point.
185 // // Specify the values; and indicate 3 derivatives:
186 // AutoDiff<Double> x(10.0, 3, 0);
187 // AutoDiff<Double> y(20.0, 3, 1);
188 // AutoDiff<Double> z(30.0, 3, 2);
189 // // The result will be:
190 // AutoDiff<Double> result = x*y + sin(z);
191 // cout << result.value() << endl;
192 // // 199.012
193 // cout << result.derivatives() << endl;
194 // // [20, 10, 0.154251]
195 // // Note: sin(30) = -0.988; cos(30) = 0.154251;
196 // </srcblock>
197 //
198 // See for an extensive example the demo program dAutoDiff. It is
199 // based on the example given above, and shows also the use of second
200 // derivatives (which is just using <src>AutoDiff<AutoDiff<Double> ></src>
201 // as template argument).
202 // <srcblock>
203 // // The function, with fixed parameters a,b:
204 // template <class T> class f {
205 // public:
206 // T operator()(const T& x) { return a_p*a_p*a_p*b_p*b_p*x; }
207 // void set(const T& a, const T& b) { a_p = a; b_p = b; }
208 // private:
209 // T a_p;
210 // T b_p;
211 // };
212 // // Call it with different template arguments:
213 // Double a0(2), b0(3), x0(7);
214 // f<Double> f0; f0.set(a0, b0);
215 // cout << "Value: " << f0(x0) << endl;
216 //
217 // AutoDiff<Double> a1(2,2,0), b1(3,2,1), x1(7);
218 // f<AutoDiff<Double> > f1; f1.set(a1, b1);
219 // cout << "Diff a,b: " << f1(x1) << endl;
220 //
221 // AutoDiff<Double> a2(2), b2(3), x2(7,1,0);
222 // f<AutoDiff<Double> > f2; f2.set(a2, b2);
223 // cout << "Diff x: " << f2(x2) << endl;
224 //
225 // AutoDiff<AutoDiff<Double> > a3(AutoDiff<Double>(2,2,0),2,0),
226 // b3(AutoDiff<Double>(3,2,1),2,1), x3(AutoDiff<Double>(7),2);
227 // f<AutoDiff<AutoDiff<Double> > > f3; f3.set(a3, b3);
228 // cout << "Diff2 a,b: " << f3(x3) << endl;
229 //
230 // AutoDiff<AutoDiff<Double> > a4(AutoDiff<Double>(2),1),
231 // b4(AutoDiff<Double>(3),1),
232 // x4(AutoDiff<Double>(7,1,0),1,0);
233 // f<AutoDiff<AutoDiff<Double> > > f4; f4.set(a4, b4);
234 // cout << "Diff2 x: " << f4(x4) << endl;
235 //
236 // // Result will be:
237 // // Value: 504
238 // // Diff a,b: (504, [756, 336])
239 // // Diff x: (504, [72])
240 // // Diff2 a,b: ((504, [756, 336]), [(756, [756, 504]), (336, [504, 112])])
241 // // Diff2 x: ((504, [72]), [(72, [0])])
242 //
243 // // It needed the template instantiations definitions:
244 // template class f<Double>;
245 // template class f<AutoDiff<Double> >;
246 // template class f<AutoDiff<AutoDiff<Double> > >;
247 // </srcblock>
248 // </example>
249 //
250 // <motivation>
251 // The creation of the class was motivated by least-squares non-linear fit where
252 // partial derivatives of a fitted function are needed. It would be tedious
253 // to create functionals for all partial derivatives of a function.
254 // </motivation>
255 //
256 // <templating arg=T>
257 // <li> any class that has the standard mathematical and comparisons
258 // defined
259 // </templating>
260 //
261 // <todo asof="2001/06/07">
262 // <li> Nothing I know
263 // </todo>
264 
265 template <class T> class AutoDiff {
266  public:
267  //# Typedefs
268  typedef T value_type;
269  typedef value_type& reference;
270  typedef const value_type& const_reference;
271  typedef value_type* iterator;
272  typedef const value_type* const_iterator;
273 
274  //# Constructors
275  // Construct a constant with a value of zero. Zero derivatives.
276  AutoDiff();
277 
278  // Construct a constant with a value of v. Zero derivatives.
279  AutoDiff(const T &v);
280 
281  // A function f(x0,x1,...,xn,...) with a value of v. The
282  // total number of derivatives is ndiffs, the nth derivative is one, and all
283  // others are zero.
284  AutoDiff(const T &v, const uInt ndiffs, const uInt n);
285 
286  // A function f(x0,x1,...,xn,...) with a value of v. The
287  // total number of derivatives is ndiffs.
288  // All derivatives are zero.
289  AutoDiff(const T &v, const uInt ndiffs);
290 
291  // Construct one from another
292  AutoDiff(const AutoDiff<T> &other);
293 
294  // Construct a function f(x0,x1,...,xn) of a value v and a vector of
295  // derivatives derivs(0) = df/dx0, derivs(1) = df/dx1, ...
296  AutoDiff(const T &v, const Vector<T> &derivs);
297 
298  ~AutoDiff();
299 
300  // Assignment operator. Assign a constant to variable. All derivatives
301  // are zero.
302  AutoDiff<T> &operator=(const T &v);
303 
304  // Assign one to another.
305  AutoDiff<T> &operator=(const AutoDiff<T> &other);
306 
307  // Assignment operators
308  // <group>
309  void operator*=(const AutoDiff<T> &other);
310  void operator/=(const AutoDiff<T> &other);
311  void operator+=(const AutoDiff<T> &other);
312  void operator-=(const AutoDiff<T> &other);
313  void operator*=(const T other);
314  void operator/=(const T other);
315  void operator+=(const T other);
316  void operator-=(const T other);
317  // </group>
318 
319  // Returns the pointer to the structure of value and derivatives.
320  // <group>
321  AutoDiffRep<T> *theRep() { return rep_p; }
322  const AutoDiffRep<T> *theRep() const { return rep_p; }
323  // </group>
324 
325  // Returns the value of the function
326  // <group>
327  T &value() { return rep_p->val_p; }
328  const T &value() const { return rep_p->val_p; }
329  // </group>
330 
331  // Returns a vector of the derivatives of an AutoDiff
332  // <group>
333  Vector<T> derivatives() const;
334  void derivatives(Vector<T> &res) const;
335  // </group>
336 
337  // Returns a specific derivative. The second set does not check for
338  // a valid which; the first set does through Vector addressing.
339  // <group>
340  T &derivative(uInt which) { return rep_p->grad_p(which); }
341  const T &derivative(uInt which) const { return rep_p->grad_p(which); }
342  T &deriv(uInt which) { return rep_p->grad_p[which]; }
343  const T &deriv(uInt which) const { return rep_p->grad_p[which]; }
344  // </group>
345 
346  // Return total number of derivatives
347  uInt nDerivatives() const { return rep_p->nd_p; }
348 
349  // Is it a constant, i.e., with zero derivatives?
350  Bool isConstant() const { return rep_p->nd_p == 0; }
351 
352  // Indicate that we are going to use a temporary value for the last time
353  // (e.g. at the of a function returning by value). This way superfluous
354  // copying can be circumvented.
355  const AutoDiff<T> &ref() { rep_p->nocopy_p = True; return *this; }
356 
357  private:
358  //# Data
359  // Pool of data blocks
361  // Mutex for thread-safe access to theirPool.
363  // Value representation
365 
366  //# Methods
367  // Release a struct of value and derivative data
368  void release() {
369  if (!rep_p->nocopy_p) {
370  ScopedMutexLock locker(theirMutex);
371  theirPool.release(rep_p, rep_p->nd_p);
372  } else {
373  rep_p->nocopy_p = False;
374  }
375  }
376 
377 };
378 
379 
380 } //# NAMESPACE CASACORE - END
381 
382 #ifndef CASACORE_NO_AUTO_TEMPLATES
383 #include <casacore/scimath/Mathematics/AutoDiff.tcc>
384 #endif //# CASACORE_NO_AUTO_TEMPLATES
385 #endif
A 1-D Specialization of the Array class.
Definition: ArrayIO.h:45
AutoDiffRep< T > * theRep()
Returns the pointer to the structure of value and derivatives.
Definition: AutoDiff.h:321
const AutoDiff< T > & ref()
Indicate that we are going to use a temporary value for the last time (e.g.
Definition: AutoDiff.h:355
uInt nd_p
The number of derivatives.
Definition: AutoDiffRep.h:138
AutoDiffRep< T > * rep_p
Value representation.
Definition: AutoDiff.h:364
static Mutex theirMutex
Mutex for thread-safe access to theirPool.
Definition: AutoDiff.h:362
AutoDiff()
Construct a constant with a value of zero.
value_type * iterator
Definition: AutoDiff.h:271
void release(T *obj, const Key key=Key())
Release an object obtained from the pool through get for re-use.
const T & value() const
Definition: AutoDiff.h:328
uInt nDerivatives() const
Return total number of derivatives.
Definition: AutoDiff.h:347
const T & deriv(uInt which) const
Definition: AutoDiff.h:343
Bool nocopy_p
A flag indicating that value will not be used anymore (to stop superfluous copying) ...
Definition: AutoDiffRep.h:141
A parameterized stack of re-usable objects.
Definition: ObjectPool.h:101
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:39
Class that computes partial derivatives by automatic differentiation.
Definition: AutoDiff.h:42
void release()
Release a struct of value and derivative data.
Definition: AutoDiff.h:368
static ObjectPool< AutoDiffRep< T >, uInt > theirPool
Pool of data blocks.
Definition: AutoDiff.h:360
Representation of an automatic differential class data.
Definition: AutoDiffRep.h:85
value_type & reference
Definition: AutoDiff.h:269
const Bool False
Definition: aipstype.h:41
const T & derivative(uInt which) const
Definition: AutoDiff.h:341
const AutoDiffRep< T > * theRep() const
Definition: AutoDiff.h:322
Wrapper around a pthreads mutex.
Definition: Mutex.h:49
void operator/=(const AutoDiff< T > &other)
T & deriv(uInt which)
Definition: AutoDiff.h:342
Vector< T > derivatives() const
Returns a vector of the derivatives of an AutoDiff.
Exception-safe lock/unlock of a mutex.
Definition: Mutex.h:97
Bool isConstant() const
Is it a constant, i.e., with zero derivatives?
Definition: AutoDiff.h:350
void operator-=(const AutoDiff< T > &other)
const value_type * const_iterator
Definition: AutoDiff.h:272
void operator*=(const AutoDiff< T > &other)
Assignment operators.
T & value()
Returns the value of the function.
Definition: AutoDiff.h:327
const Bool True
Definition: aipstype.h:40
this file contains all the compiler specific defines
Definition: mainpage.dox:28
void operator+=(const AutoDiff< T > &other)
T & derivative(uInt which)
Returns a specific derivative.
Definition: AutoDiff.h:340
unsigned int uInt
Definition: aipstype.h:48
AutoDiff< T > & operator=(const T &v)
Assignment operator.
const value_type & const_reference
Definition: AutoDiff.h:270