OnlineRNNet.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Offers the functions to create and to work with a
6  * recurrent neural network.
7  *
8  *
9  *
10  * \author O. Krause
11  * \date 2010
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 #ifndef SHARK_MODELS_ONLINERNNET_H
35 #define SHARK_MODELS_ONLINERNNET_H
36 
37 #include <shark/Core/DLLSupport.h>
40 namespace shark{
41 
42 //! \brief A recurrent neural network regression model optimized
43 //! for online learning.
44 //!
45 //! The OnlineRNNet can only process a single input at a time. Internally
46 //! it stores the last activation as well as the derivatives which get updated
47 //! over the course of the sequence. Instead of feeding in the whole sequence,
48 //! the inputs must be given on after another. However if the whole sequence is
49 //! available in advance, this implementation is not advisable, since it is a lot slower
50 //! than RNNet which is targeted to whole sequences.
51 //!
52 class OnlineRNNet:public AbstractModel<RealVector,RealVector>
53 {
54 public:
55  //! creates a configured neural network
57 
58  /// \brief From INameable: return the class name.
59  std::string name() const
60  { return "OnlineRNNet"; }
61 
62  //! \brief Feeds a timestep of a time series to the model and
63  //! calculates it's output.
64  //!
65  //! \param pattern Input patterns for the network.
66  //! \param output Used to store the outputs of the network.
67  SHARK_EXPORT_SYMBOL void eval(RealMatrix const& pattern,RealMatrix& output);
69 
70  /// obtain the input dimension
71  std::size_t inputSize() const{
72  return mpe_structure->inputs();
73  }
74 
75  /// obtain the output dimension
76  std::size_t outputSize() const{
77  return mpe_structure->outputs();
78  }
79 
80  //!\brief calculates the weighted sum of gradients w.r.t the parameters
81  //!
82  //!Uses an iterative update scheme to calculate the gradient at timestep t from the gradient
83  //!at timestep t-1 using forward propagation. This Methods requires O(n^3) Memory and O(n^4) computations,
84  //!where n is the number of neurons. So if the network is very large, RNNet should be used!
85  //!
86  //! \param pattern the pattern to evaluate
87  //! \param coefficients the oefficients which are used to calculate the weighted sum
88  //! \param gradient the calculated gradient
89  SHARK_EXPORT_SYMBOL void weightedParameterDerivative(RealMatrix const& pattern, RealMatrix const& coefficients, RealVector& gradient);
90 
91  //! get internal parameters of the model
92  RealVector parameterVector() const{
94  }
95  //! set internal parameters of the model
96  void setParameterVector(RealVector const& newParameters){
97  mpe_structure->setParameterVector(newParameters);
98  }
99 
100  //!number of parameters of the network
101  std::size_t numberOfParameters() const{
102  return mpe_structure->parameters();
103  }
104 
105  //!resets the internal state of the network.
106  //!it resets the network to 0 activation and clears the derivative
107  //!this method needs to be called, when a sequence ends and a new sequence is to be started
109  m_lastActivation.clear();
110  m_activation.clear();
111  m_unitGradient.clear();
112  }
113 
114  //! \brief This Method sets the activation of the output neurons
115  //!
116  //! This is usefull when teacher forcing is used. When the network
117  //! is trained to predict a timeseries and diverges from the sequence
118  //! at an early stage, the resulting gradient might not be very helpfull.
119  //! In this case, teacher forcing can be applied to prevent diverging.
120  //! However, the network might become unstable, when teacher-forcing is turned off
121  //! because there is no force which prevents it from diverging anymore.
122  //!
123  //! \param activation Input patterns for the network.
124  void setOutputActivation(RealVector const& activation){
127  }
128 protected:
129 
130  //! the topology of the network.
132 
133  //!the activation of the network at time t (after evaluation)
134  RealVector m_activation;
135  //!the activation of the network at time t-1 (before evaluation)
136  RealVector m_lastActivation;
137 
138  //!\brief the gradient of the hidden units with respect to every weight
139  //!
140  //!The gradient \f$ \frac{\delta y_k(t)}{\delta w_{ij}} \f$ is stored in this
141  //!structure. Using this gradient, the derivative of the Network can be calculated as
142  //!\f[ \frac{\delta E(y(t))}{\delta w_{ij}}=\sum_{k=1}^n\frac{\delta E(y(t))}{\delta y_k} \frac{\delta y_k(t)}{\delta w_{ij}} \f]
143  //!where \f$ y_k(t) \f$ is the activation of neuron \f$ k \f$ at timestep \f$ t \f$
144  //!the gradient needs to be updated after every timestep using the formula
145  //!\f[ \frac{\delta y_k(t+1)}{\delta w_{ij}}= y'_k(t)= \left[\sum_{l=1}^n w_{il}\frac{\delta y_l(t)}{\delta w_{ij}} +\delta_{kl}y_l(t-1)\right]\f]
146  //!so if the gradient is needed, don't forget to call weightedParameterDerivative at every timestep!
147  RealMatrix m_unitGradient;
148 };
149 }
150 
151 #endif //RNNET_H
152 
153 
154 
155 
156 
157 
158 
159 
160