32 #ifndef SHARK_MODELS_FFNET_H 33 #define SHARK_MODELS_FFNET_H 37 #include <boost/serialization/vector.hpp> 68 template<
class H
iddenNeuron,
class OutputNeuron>
71 struct InternalState:
public State{
88 void resize(std::size_t neurons, std::size_t patterns){
89 responses.resize(neurons,patterns);
100 :m_numberOfNeurons(0),m_inputNeurons(0),m_outputNeurons(0){
101 m_features|=HAS_FIRST_PARAMETER_DERIVATIVE;
102 m_features|=HAS_FIRST_INPUT_DERIVATIVE;
111 return m_inputNeurons;
115 return m_outputNeurons;
119 return m_numberOfNeurons;
123 return numberOfNeurons() - inputSize() -outputSize();
128 return m_layerMatrix;
133 return m_layerMatrix[layer];
136 void setLayer(std::size_t layerNumber, RealMatrix
const& m, RealVector
const& bias){
138 SIZE_CHECK(m.size1() == m_layerMatrix[layerNumber].size1());
139 SIZE_CHECK(m.size2() == m_layerMatrix[layerNumber].size2());
140 m_layerMatrix[layerNumber] = m;
141 std::size_t start = 0;
142 for(std::size_t i = 0; i != layerNumber; ++i){
143 start += m_layerMatrix[i].size1();
147 setParameterVector(parameterVector());
152 return m_backpropMatrix;
159 return m_inputOutputShortcut;
164 return m_hiddenNeuron;
168 return m_outputNeuron;
173 return m_hiddenNeuron;
177 return m_outputNeuron;
190 RealVector
bias(std::size_t layer)
const{
191 std::size_t start = 0;
192 for(std::size_t i = 0; i != layer; ++i){
193 start +=layerMatrices()[i].size1();
195 return subrange(m_bias,start,start+layerMatrices()[layer].size1());
200 std::size_t numParams = m_inputOutputShortcut.size1()*m_inputOutputShortcut.size2();
201 numParams += bias().size();
202 for(std::size_t i = 0; i != layerMatrices().size(); ++i){
203 numParams += layerMatrices()[i].size1()*layerMatrices()[i].size2();
228 std::size_t layeriStart = 0;
229 for(std::size_t layeri = 0; layeri != m_layerMatrix.size(); ++layeri){
230 std::size_t columni = 0;
231 std::size_t neuronsi = inputSize();
233 neuronsi = m_layerMatrix[layeri-1].size1();
235 std::size_t layerjStart = layeriStart + neuronsi;
236 for(std::size_t layerj = layeri; layerj != m_layerMatrix.size(); ++layerj){
237 std::size_t neuronsj = m_layerMatrix[layerj].size1();
239 if(layerjStart-m_layerMatrix[layerj].size2() <= layeriStart){
243 std::size_t weightStartj = layeriStart -(layerjStart - m_layerMatrix[layerj].size2());
244 noalias(
columns(m_backpropMatrix[layeri],columni,columni+neuronsj))
245 =
trans(
columns(m_layerMatrix[layerj],weightStartj,weightStartj+neuronsi));
248 layerjStart += neuronsj;
250 layeriStart += neuronsi;
259 InternalState
const& s = state.
toState<InternalState>();
264 return boost::shared_ptr<State>(
new InternalState());
271 void evalLayer(std::size_t layer,RealMatrix
const& patterns,RealMatrix& outputs)
const{
272 std::size_t numPatterns = patterns.size1();
273 std::size_t numOutputs = m_layerMatrix[layer].size1();
274 outputs.resize(numPatterns,numOutputs);
284 if(layer < m_layerMatrix.size()-1) {
285 noalias(outputs) = m_hiddenNeuron(outputs);
288 noalias(outputs) = m_outputNeuron(outputs);
300 evalLayer(layer,patterns.
batch(i),result.
batch(i));
305 void eval(RealMatrix
const& patterns,RealMatrix& output,
State& state)
const{
306 InternalState& s = state.
toState<InternalState>();
307 std::size_t numPatterns = patterns.size1();
309 s.resize(numberOfNeurons(),numPatterns);
312 std::size_t beginNeuron = m_inputNeurons;
314 for(std::size_t layer = 0; layer != m_layerMatrix.size();++layer){
315 const RealMatrix& weights = m_layerMatrix[layer];
317 std::size_t endNeuron = beginNeuron + weights.size1();
320 RealSubMatrix
const input =
rows(s.responses,beginNeuron - weights.size2(),beginNeuron);
322 RealSubMatrix responses =
rows(s.responses,beginNeuron,endNeuron);
329 ConstRealVectorRange bias =
subrange(m_bias,beginNeuron-inputSize(),endNeuron-inputSize());
334 if(layer < m_layerMatrix.size()-1) {
335 noalias(responses) = m_hiddenNeuron(responses);
339 if(m_inputOutputShortcut.size1() != 0){
342 noalias(responses) = m_outputNeuron(responses);
346 beginNeuron = endNeuron;
352 output.resize(numPatterns,m_outputNeurons);
353 noalias(output) =
trans(
rows(s.responses,m_numberOfNeurons-outputSize(),m_numberOfNeurons));
358 BatchInputType const& patterns, RealMatrix
const& coefficients,
State const& state, RealVector& gradient
360 SIZE_CHECK(coefficients.size2() == m_outputNeurons);
361 SIZE_CHECK(coefficients.size1() == patterns.size1());
362 std::size_t numPatterns=patterns.size1();
366 RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
367 RealSubMatrix outputDelta =
rows(delta,delta.size1()-outputSize(),delta.size1());
370 computeDelta(delta,state,
false);
371 computeParameterDerivative(delta,state,gradient);
378 SIZE_CHECK(coefficients.size2() == m_outputNeurons);
379 SIZE_CHECK(coefficients.size1() == patterns.size1());
380 std::size_t numPatterns=patterns.size1();
384 RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
385 RealSubMatrix outputDelta =
rows(delta,delta.size1()-outputSize(),delta.size1());
388 computeDelta(delta,state,
true);
389 inputDerivative.resize(numPatterns,inputSize());
397 RealVector& parameterDerivative,
400 SIZE_CHECK(coefficients.size2() == m_outputNeurons);
401 SIZE_CHECK(coefficients.size1() == patterns.size1());
402 std::size_t numPatterns = patterns.size1();
406 RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
407 RealSubMatrix outputDelta =
rows(delta,delta.size1()-outputSize(),delta.size1());
410 computeDelta(delta,state,
true);
411 inputDerivative.resize(numPatterns,inputSize());
415 computeParameterDerivative(delta,state,parameterDerivative);
424 RealMatrix
const& patterns, RealMatrix& delta,
State const& state, RealVector& gradient
426 InternalState
const& s = state.
toState<InternalState>();
427 SIZE_CHECK(delta.size1() == m_numberOfNeurons);
428 SIZE_CHECK(delta.size2() == patterns.size1());
429 SIZE_CHECK(s.responses.size2() == patterns.size1());
431 computeDelta(delta,state,
false);
433 computeParameterDerivative(delta,state,gradient);
461 std::vector<size_t>
const& layers,
463 bool biasNeuron =
true 466 m_layerMatrix.resize(layers.size()-1);
467 m_backpropMatrix.resize(layers.size()-1);
476 m_inputNeurons = layers.front();
477 m_outputNeurons = layers.back();
478 m_numberOfNeurons = 0;
479 for(std::size_t i = 0; i != layers.size(); ++i){
480 m_numberOfNeurons += layers[i];
483 m_bias.resize(m_numberOfNeurons - m_inputNeurons);
488 std::size_t numNeurons = layers[0];
489 for(std::size_t i = 0; i != m_layerMatrix.size(); ++i){
490 m_layerMatrix[i].resize(layers[i+1],numNeurons);
491 m_backpropMatrix[i].resize(layers[i],m_numberOfNeurons-numNeurons);
492 numNeurons += layers[i+1];
495 m_inputOutputShortcut.resize(0,0);
498 for(std::size_t i = 0; i != m_layerMatrix.size(); ++i){
499 m_layerMatrix[i].resize(layers[i+1],layers[i]);
500 m_backpropMatrix[i].resize(layers[i],layers[i+1]);
505 m_inputOutputShortcut.resize(m_outputNeurons,m_inputNeurons);
532 std::vector<size_t> layer(3);
536 setStructure(layer, connectivity, bias);
563 std::vector<size_t> layer(4);
568 setStructure(layer, connectivity, bias);
573 archive>>m_inputNeurons;
574 archive>>m_outputNeurons;
575 archive>>m_numberOfNeurons;
576 archive>>m_layerMatrix;
577 archive>>m_backpropMatrix;
578 archive>>m_inputOutputShortcut;
584 archive<<m_inputNeurons;
585 archive<<m_outputNeurons;
586 archive<<m_numberOfNeurons;
587 archive<<m_layerMatrix;
588 archive<<m_backpropMatrix;
589 archive<<m_inputOutputShortcut;
597 RealMatrix& delta,
State const& state,
bool computeInputDelta
599 SIZE_CHECK(delta.size1() == numberOfNeurons());
600 InternalState
const& s = state.
toState<InternalState>();
603 RealSubMatrix outputDelta =
rows(delta,delta.size1()-outputSize(),delta.size1());
604 ConstRealSubMatrix outputResponse =
rows(s.responses,delta.size1()-outputSize(),delta.size1());
605 noalias(outputDelta) *= m_outputNeuron.derivative(outputResponse);
612 std::size_t endNeuron = delta.size1()-outputSize();
613 std::size_t layer = m_backpropMatrix.size()-1;
614 std::size_t endIndex = computeInputDelta? 0: inputSize();
615 while(endNeuron > endIndex){
617 RealMatrix
const& weights = m_backpropMatrix[layer];
618 std::size_t beginNeuron = endNeuron - weights.size1();
620 RealSubMatrix layerDelta =
rows(delta,beginNeuron,endNeuron);
621 RealSubMatrix layerDeltaInput =
rows(delta,endNeuron,endNeuron+weights.size2());
622 ConstRealSubMatrix layerResponse =
rows(s.responses,beginNeuron,endNeuron);
624 noalias(layerDelta) +=
prod(weights,layerDeltaInput);
626 noalias(layerDelta) *= m_hiddenNeuron.derivative(layerResponse);
629 endNeuron=beginNeuron;
634 if(inputOutputShortcut().size1() != 0)
638 void computeParameterDerivative(RealMatrix
const& delta,
State const& state, RealVector& gradient)
const{
639 SIZE_CHECK(delta.size1() == numberOfNeurons());
640 InternalState
const& s = state.
toState<InternalState>();
643 gradient.resize(numberOfParameters());
645 std::size_t layerStart = inputSize();
646 for(std::size_t layer = 0; layer != layerMatrices().size(); ++layer){
647 std::size_t layerRows = layerMatrices()[layer].size1();
648 std::size_t layerColumns = layerMatrices()[layer].size2();
649 std::size_t params = layerRows*layerColumns;
651 rows(delta,layerStart,layerStart+layerRows),
652 trans(
rows(s.responses,layerStart-layerColumns,layerStart)),
657 layerStart += layerRows;
662 for (std::size_t neuron = m_inputNeurons; neuron < m_numberOfNeurons; neuron++){
663 gradient(pos) =
sum(
row(delta,neuron));
668 if(inputOutputShortcut().size1() != 0){
669 std::size_t params = inputSize()*outputSize();
671 rows(delta,delta.size1()-outputSize(),delta.size1()),
684 std::size_t m_numberOfNeurons;
685 std::size_t m_inputNeurons;
686 std::size_t m_outputNeurons;
695 std::vector<RealMatrix> m_layerMatrix;
700 RealMatrix m_inputOutputShortcut;
706 std::vector<RealMatrix> m_backpropMatrix;
712 HiddenNeuron m_hiddenNeuron;
714 OutputNeuron m_outputNeuron;