Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
include
shark
Models
OnlineRNNet.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Offers the functions to create and to work with a
6
* recurrent neural network.
7
*
8
*
9
*
10
* \author O. Krause
11
* \date 2010
12
*
13
*
14
* \par Copyright 1995-2015 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <http://image.diku.dk/shark/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
#ifndef SHARK_MODELS_ONLINERNNET_H
35
#define SHARK_MODELS_ONLINERNNET_H
36
37
#include <
shark/Core/DLLSupport.h
>
38
#include <
shark/Models/AbstractModel.h
>
39
#include <
shark/Models/RecurrentStructure.h
>
40
namespace
shark
{
41
42
//! \brief A recurrent neural network regression model optimized
43
//! for online learning.
44
//!
45
//! The OnlineRNNet can only process a single input at a time. Internally
46
//! it stores the last activation as well as the derivatives which get updated
47
//! over the course of the sequence. Instead of feeding in the whole sequence,
48
//! the inputs must be given on after another. However if the whole sequence is
49
//! available in advance, this implementation is not advisable, since it is a lot slower
50
//! than RNNet which is targeted to whole sequences.
51
//!
52
class
OnlineRNNet
:
public
AbstractModel
<RealVector,RealVector>
53
{
54
public
:
55
//! creates a configured neural network
56
SHARK_EXPORT_SYMBOL
OnlineRNNet
(
RecurrentStructure
* structure);
57
58
/// \brief From INameable: return the class name.
59
std::string
name
()
const
60
{
return
"OnlineRNNet"
; }
61
62
//! \brief Feeds a timestep of a time series to the model and
63
//! calculates it's output.
64
//!
65
//! \param pattern Input patterns for the network.
66
//! \param output Used to store the outputs of the network.
67
SHARK_EXPORT_SYMBOL
void
eval
(RealMatrix
const
& pattern,RealMatrix& output);
68
using
AbstractModel<RealVector,RealVector>::eval
;
69
70
/// obtain the input dimension
71
std::size_t
inputSize
()
const
{
72
return
mpe_structure
->
inputs
();
73
}
74
75
/// obtain the output dimension
76
std::size_t
outputSize
()
const
{
77
return
mpe_structure
->
outputs
();
78
}
79
80
//!\brief calculates the weighted sum of gradients w.r.t the parameters
81
//!
82
//!Uses an iterative update scheme to calculate the gradient at timestep t from the gradient
83
//!at timestep t-1 using forward propagation. This Methods requires O(n^3) Memory and O(n^4) computations,
84
//!where n is the number of neurons. So if the network is very large, RNNet should be used!
85
//!
86
//! \param pattern the pattern to evaluate
87
//! \param coefficients the oefficients which are used to calculate the weighted sum
88
//! \param gradient the calculated gradient
89
SHARK_EXPORT_SYMBOL
void
weightedParameterDerivative
(RealMatrix
const
& pattern, RealMatrix
const
& coefficients, RealVector& gradient);
90
91
//! get internal parameters of the model
92
RealVector
parameterVector
()
const
{
93
return
mpe_structure
->
parameterVector
();
94
}
95
//! set internal parameters of the model
96
void
setParameterVector
(RealVector
const
& newParameters){
97
mpe_structure
->
setParameterVector
(newParameters);
98
}
99
100
//!number of parameters of the network
101
std::size_t
numberOfParameters
()
const
{
102
return
mpe_structure
->
parameters
();
103
}
104
105
//!resets the internal state of the network.
106
//!it resets the network to 0 activation and clears the derivative
107
//!this method needs to be called, when a sequence ends and a new sequence is to be started
108
void
resetInternalState
(){
109
m_lastActivation
.clear();
110
m_activation
.clear();
111
m_unitGradient
.clear();
112
}
113
114
//! \brief This Method sets the activation of the output neurons
115
//!
116
//! This is usefull when teacher forcing is used. When the network
117
//! is trained to predict a timeseries and diverges from the sequence
118
//! at an early stage, the resulting gradient might not be very helpfull.
119
//! In this case, teacher forcing can be applied to prevent diverging.
120
//! However, the network might become unstable, when teacher-forcing is turned off
121
//! because there is no force which prevents it from diverging anymore.
122
//!
123
//! \param activation Input patterns for the network.
124
void
setOutputActivation
(RealVector
const
& activation){
125
m_activation
.resize(
mpe_structure
->
numberOfUnits
());
126
subrange
(
m_activation
,
mpe_structure
->
numberOfUnits
()-
outputSize
(),
mpe_structure
->
numberOfUnits
()) = activation;
127
}
128
protected
:
129
130
//! the topology of the network.
131
RecurrentStructure
*
mpe_structure
;
132
133
//!the activation of the network at time t (after evaluation)
134
RealVector
m_activation
;
135
//!the activation of the network at time t-1 (before evaluation)
136
RealVector
m_lastActivation
;
137
138
//!\brief the gradient of the hidden units with respect to every weight
139
//!
140
//!The gradient \f$ \frac{\delta y_k(t)}{\delta w_{ij}} \f$ is stored in this
141
//!structure. Using this gradient, the derivative of the Network can be calculated as
142
//!\f[ \frac{\delta E(y(t))}{\delta w_{ij}}=\sum_{k=1}^n\frac{\delta E(y(t))}{\delta y_k} \frac{\delta y_k(t)}{\delta w_{ij}} \f]
143
//!where \f$ y_k(t) \f$ is the activation of neuron \f$ k \f$ at timestep \f$ t \f$
144
//!the gradient needs to be updated after every timestep using the formula
145
//!\f[ \frac{\delta y_k(t+1)}{\delta w_{ij}}= y'_k(t)= \left[\sum_{l=1}^n w_{il}\frac{\delta y_l(t)}{\delta w_{ij}} +\delta_{kl}y_l(t-1)\right]\f]
146
//!so if the gradient is needed, don't forget to call weightedParameterDerivative at every timestep!
147
RealMatrix
m_unitGradient
;
148
};
149
}
150
151
#endif //RNNET_H
152
153
154
155
156
157
158
159
160