32 #ifndef SHARK_OBJECTIVEFUNCTIONS_KERNELTARGETALIGNMENT_H 33 #define SHARK_OBJECTIVEFUNCTIONS_KERNELTARGETALIGNMENT_H 102 template<
class InputType = RealVector,
class LabelType =
unsigned int>
116 SHARK_CHECK(kernel != NULL,
"[KernelTargetAlignment] kernel must not be NULL");
123 if(mep_kernel -> hasFirstParameterDerivative())
135 {
return "KernelTargetAlignment"; }
139 return mep_kernel -> parameterVector();
150 double eval(RealVector
const& input)
const{
153 return -evaluateKernelMatrix().error;
181 KernelMatrixResults results = evaluateKernelMatrix();
184 derivative.resize(parameters);
187 std::size_t startX = 0;
188 for(
int j = 0; j != i; ++j){
191 RealVector threadDerivative(parameters,0.0);
192 RealVector blockDerivative;
193 boost::shared_ptr<State> state = mep_kernel->
createState();
196 std::size_t startY = 0;
197 for(
int j = 0; j <= i; ++j){
198 mep_kernel->
eval(m_data.
batch(i).input,m_data.
batch(j).input,blockK,*state);
201 generateDerivativeWeightBlock(i,j,startX,startY,blockK,results),
205 noalias(threadDerivative) += blockDerivative;
209 noalias(derivative) += threadDerivative;
213 return -results.error;
219 RealVector m_columnMeanY;
221 std::size_t m_numberOfClasses;
222 std::size_t m_elements;
224 struct KernelMatrixResults{
235 std::vector<std::size_t> classCount =
classSizes(labels);
236 m_numberOfClasses = classCount.size();
237 RealVector classMean(m_numberOfClasses);
238 double dm1 = m_numberOfClasses-1.0;
239 for(std::size_t i = 0; i != m_numberOfClasses; ++i){
240 classMean(i) = classCount[i]-(m_elements-classCount[i])/dm1;
241 classMean /= m_elements;
244 m_columnMeanY.resize(m_elements);
245 for(std::size_t i = 0; i != m_elements; ++i){
246 m_columnMeanY(i) = classMean(labels.
element(i));
248 m_meanY=
sum(m_columnMeanY)/m_elements;
252 RealVector meanLabel =
mean(labels);
253 m_columnMeanY.resize(m_elements);
254 for(std::size_t i = 0; i != m_elements; ++i){
257 m_meanY=
sum(m_columnMeanY)/m_elements;
261 double updateYKc(UIntVector
const& labelsi,UIntVector
const& labelsj, RealMatrix
const& block)
const{
262 std::size_t blockSize1 = labelsi.size();
263 std::size_t blockSize2 = labelsj.size();
266 double dm1 = m_numberOfClasses-1.0;
267 for(std::size_t k = 0; k != blockSize1; ++k){
268 for(std::size_t l = 0; l != blockSize2; ++l){
269 if(labelsi(k) == labelsj(l))
270 result += block(k,l);
272 result -= block(k,l)/dm1;
279 double updateYKc(RealMatrix
const& labelsi,RealMatrix
const& labelsj, RealMatrix
const& block)
const{
280 std::size_t blockSize1 = labelsi.size1();
281 std::size_t blockSize2 = labelsj.size1();
284 for(std::size_t k = 0; k != blockSize1; ++k){
285 for(std::size_t l = 0; l != blockSize2; ++l){
287 result += y_kl*block(k,l);
293 void computeBlockY(UIntVector
const& labelsi,UIntVector
const& labelsj, RealMatrix& blockY)
const{
294 std::size_t blockSize1 = labelsi.size();
295 std::size_t blockSize2 = labelsj.size();
296 double dm1 = m_numberOfClasses-1.0;
297 for(std::size_t k = 0; k != blockSize1; ++k){
298 for(std::size_t l = 0; l != blockSize2; ++l){
299 if( labelsi(k) == labelsj(l))
302 blockY(k,l) = -1.0/dm1;
307 void computeBlockY(RealMatrix
const& labelsi,RealMatrix
const& labelsj, RealMatrix& blockY)
const{
308 std::size_t blockSize1 = labelsi.size1();
309 std::size_t blockSize2 = labelsj.size1();
310 for(std::size_t k = 0; k != blockSize1; ++k){
311 for(std::size_t l = 0; l != blockSize2; ++l){
319 RealMatrix generateDerivativeWeightBlock(
320 std::size_t i, std::size_t j,
321 std::size_t start1, std::size_t start2,
322 RealMatrix
const& blockK,
323 KernelMatrixResults
const& matrixStatistics
325 std::size_t blockSize1 =
size(m_data.
batch(i));
326 std::size_t blockSize2 =
size(m_data.
batch(j));
328 double KcKc = matrixStatistics.KcKc;
329 double YcKc = matrixStatistics.YcKc;
330 double meanK = matrixStatistics.meanK;
331 RealMatrix blockW(blockSize1,blockSize2);
334 computeBlockY(m_data.
batch(i).label,m_data.
batch(j).label,blockW);
341 blockW-=
repeat(
subrange(KcKc*m_columnMeanY - YcKc*matrixStatistics.k,start2,start2+blockSize2),blockSize1);
342 blockW-=
trans(
repeat(
subrange(KcKc*m_columnMeanY - YcKc*matrixStatistics.k,start1,start1+blockSize1),blockSize2));
344 blockW+= KcKc*m_meanY-YcKc*meanK;
345 blockW /= KcKc*std::sqrt(KcKc);
362 KernelMatrixResults evaluateKernelMatrix()
const{
371 RealVector k(m_elements,0.0);
373 std::size_t startRow = 0;
374 for(
int j = 0; j != i; ++j){
377 std::size_t rowSize =
size(m_data.
batch(i));
379 double threadYKc = 0;
380 RealVector threadk(m_elements,0.0);
381 std::size_t startColumn = 0;
382 for(
int j = 0; j <= i; ++j){
383 std::size_t columnSize =
size(m_data.
batch(j));
384 RealMatrix blockK = (*mep_kernel)(m_data.
batch(i).input,m_data.
batch(j).input);
388 threadYKc += updateYKc(m_data.
batch(i).label,m_data.
batch(j).label,blockK);
394 threadYKc += 2.0 * updateYKc(m_data.
batch(i).label,m_data.
batch(j).label,blockK);
396 startColumn+=columnSize;
405 double n = m_elements;
407 double meanK =
sum(k)/n;
409 double YcKc = YKc-2.0*n*
inner_prod(k,m_columnMeanY)+n2*m_meanY*meanK;
412 KernelMatrixResults results;
416 results.meanK = meanK;
417 results.error = YcKc/std::sqrt(KcKc);