36 #ifndef SHARK_ALGORITHMS_QP_QPMCLINEAR_H 37 #define SHARK_ALGORITHMS_QP_QPMCLINEAR_H 54 #define CHANGE_RATE 0.2 59 #define MAXITER_MULTIPLIER 10 63 template <
class InputT>
84 const DatasetType& dataset,
87 std::size_t strategy = ACF,
88 bool shrinking =
false)
98 for (std::size_t i=0; i<
m_data.size(); i++)
116 bool verbose =
false)
126 std::size_t ell =
m_data.size();
127 RealMatrix alpha(ell,
m_classes + 1, 0.0);
131 RealVector pref(ell, 1.0);
132 double prefsum = ell;
134 std::vector<std::size_t> schedule(ell);
137 for (std::size_t i=0; i<ell; i++) schedule[i] = i;
141 std::size_t active = ell;
144 std::size_t epoch = 0;
145 std::size_t steps = 0;
148 double objective = 0.0;
149 double max_violation = 0.0;
152 const double gain_learning_rate = 1.0 / ell;
153 double average_gain = 0.0;
163 double psum = prefsum;
166 for (std::size_t i=0; i<ell; i++)
169 double num = (psum < 1e-6) ? ell - pos :
std::min((
double)(ell - pos), (ell - pos) * p / psum);
170 std::size_t n = (std::size_t)std::floor(num);
171 double prob = num - n;
173 for (std::size_t j=0; j<n; j++)
186 for (std::size_t i=0; i<active; i++)
191 for (std::size_t i=0; i<ell; i++)
197 size_t nPoints = ell;
201 for (std::size_t j=0; j<nPoints; j++)
205 const std::size_t i = schedule[j];
206 InputReferenceType x_i =
m_data[i].input;
207 const unsigned int y_i =
m_data[i].label;
209 RealMatrixRow a =
row(alpha, i);
212 RealVector wx =
prod(w,x_i);
218 max_violation =
std::max(max_violation, kkt);
232 std::swap(schedule[j], schedule[active]);
239 if (epoch == 0) average_gain += gain / (double)ell;
242 double change =
CHANGE_RATE * (gain / average_gain - 1.0);
244 prefsum += newpref - pref(i);
246 average_gain = (1.0 - gain_learning_rate) * average_gain + gain_learning_rate * gain;
262 if (prop != NULL) prop->type =
QpTimeout;
269 std::cout <<
"#" << std::flush;
281 for (std::size_t i=0; i<ell; i++) pref(i) = 1.0;
295 if (verbose) std::cout <<
"." << std::flush;
299 canstop = (active == ell);
308 for (std::size_t d=0; d<
m_dim; d++) objective -=
w(j, d) *
w(j, d);
311 for (std::size_t i=0; i<ell; i++)
313 for (std::size_t j=0; j<
m_classes; j++) objective += alpha(i, j);
319 prop->accuracy = max_violation;
320 prop->iterations = ell * epoch;
321 prop->value = objective;
322 prop->seconds = timer.
lastLap();
328 std::cout << std::endl;
329 std::cout <<
"training time (seconds): " << timer.
lastLap() << std::endl;
330 std::cout <<
"number of epochs: " << epoch << std::endl;
331 std::cout <<
"number of iterations: " << (ell * epoch) << std::endl;
332 std::cout <<
"number of non-zero steps: " << steps << std::endl;
333 std::cout <<
"dual accuracy: " << max_violation << std::endl;
334 std::cout <<
"dual objective value: " << objective << std::endl;
343 void add_scaled(RealMatrix&
w, RealVector
const& mu, InputReferenceType x)
357 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y) = 0;
377 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu) = 0;
707 template <
class InputT>
715 const DatasetType& dataset,
723 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y)
725 double violation = 0.0;
726 for (std::size_t c=0; c<wx.size(); c++)
734 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
736 if (g > violation && alpha(c) < C) violation = g;
737 else if (-g > violation && alpha(c) > 0.0) violation = -g;
747 for (std::size_t c=0; c<
m_classes; c++) sum_mu += mu(c);
748 unsigned int y =
m_data[index].label;
749 RealVector step(-0.5 * mu);
750 step(y) = 0.5 * sum_mu;
755 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu)
757 const double qq = 0.5 * q;
762 for (iter=0; iter<maxiter; iter++)
769 if (c == y)
continue;
771 const double g = gradient(c);
772 const double a = alpha(c);
773 if (g > kkt && a < C) { kkt = g; idx = c; }
774 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
778 if (kkt < epsilon)
break;
781 const double a = alpha(idx);
782 const double g = gradient(idx);
784 double a_new = a + m;
799 const double dg = 0.5 * m * qq;
800 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dg;
803 gain += m * (g - dg);
817 template <
class InputT>
825 const DatasetType& dataset,
833 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y)
835 double violation = 0.0;
844 const double g = 1.0 + wx(c);
846 if (g > violation && alpha(c) < C) violation = g;
847 else if (-g > violation && alpha(c) > 0.0) violation = -g;
856 double mean_mu = 0.0;
857 for (std::size_t c=0; c<
m_classes; c++) mean_mu += mu(c);
858 mean_mu /= (double)m_classes;
859 RealVector step(m_classes);
860 for (std::size_t c=0; c<
m_classes; c++) step(c) = mean_mu - mu(c);
865 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu)
868 const double qq = (1.0 - ood) * q;
873 for (iter=0; iter<maxiter; iter++)
880 if (c == y)
continue;
882 const double g = gradient(c);
883 const double a = alpha(c);
884 if (g > kkt && a < C) { kkt = g; idx = c; }
885 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
889 if (kkt < epsilon)
break;
892 const double a = alpha(idx);
893 const double g = gradient(idx);
895 double a_new = a + m;
910 const double dg = m * q;
912 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += dgc;
915 gain += m * (g - 0.5 * (dg - dgc));
929 template <
class InputT>
937 const DatasetType& dataset,
945 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y)
947 double violation = 0.0;
950 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
952 if (g > violation && alpha(c) < C) violation = g;
953 else if (-g > violation && alpha(c) > 0.0) violation = -g;
961 unsigned int y =
m_data[index].label;
962 double mean = -2.0 * mu(y);
963 for (std::size_t c=0; c<
m_classes; c++) mean += mu(c);
964 mean /= (double)m_classes;
965 RealVector step(m_classes);
966 for (std::size_t c=0; c<
m_classes; c++) step(c) = ((c == y) ? (mu(c) +
mean) : (mean - mu(c)));
971 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu)
974 const double qq = (1.0 - ood) * q;
979 for (iter=0; iter<maxiter; iter++)
986 const double g = gradient(c);
987 const double a = alpha(c);
988 if (g > kkt && a < C) { kkt = g; idx = c; }
989 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
993 if (kkt < epsilon)
break;
996 const double a = alpha(idx);
997 const double g = gradient(idx);
999 double a_new = a + m;
1005 else if (a_new >= C)
1014 const double dg = m * q;
1018 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dgc;
1019 gradient(idx) -= dg - 2.0 * dgc;
1023 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
1024 gradient(idx) -= dg;
1027 gain += m * (g - 0.5 * (dg - dgc));
1041 template <
class InputT>
1049 const DatasetType& dataset,
1051 std::size_t classes)
1057 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y)
1059 for (std::size_t c=0; c<
m_classes; c++) gradient(c) = 0.0;
1060 const double g = 1.0 - wx(y);
1062 const double a = alpha(0);
1065 if (a == C)
return 0.0;
1070 if (a == 0.0)
return 0.0;
1078 unsigned int y =
m_data[index].label;
1083 for (
size_t c=0; c<
m_classes; c++) step(c) = (c == y) ? sy : sc;
1088 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu)
1091 const double qq = (1.0 - ood) * q;
1094 const double g = gradient(y);
1095 const double a = alpha(0);
1096 if (g > kkt && a < C) kkt = g;
1097 else if (-g > kkt && a > 0.0) kkt = -g;
1100 if (kkt < epsilon)
return 0.0;
1104 double a_new = a + m;
1110 else if (a_new >= C)
1119 return m * (g - 0.5 * m * qq);
1130 template <
class InputT>
1138 const DatasetType& dataset,
1140 std::size_t classes)
1146 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y)
1150 double violation = 0.0;
1151 for (std::size_t c=0; c<wx.size(); c++)
1159 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
1161 if (g > violation) violation = g;
1162 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1170 double kkt_up = 0.0, kkt_down = 1e100;
1179 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
1181 if (g > kkt_up && alpha(c) < C) kkt_up = g;
1182 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
1185 return std::max(0.0, kkt_up - kkt_down);
1192 unsigned int y =
m_data[index].label;
1193 double sum_mu = 0.0;
1194 for (std::size_t c=0; c<
m_classes; c++)
if (c != y) sum_mu += mu(c);
1195 RealVector step(-0.5 * mu);
1196 step(y) = 0.5 * sum_mu;
1201 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu)
1203 const double qq = 0.5 * q;
1208 for (iter=0; iter<maxiter; iter++)
1211 std::size_t idx = 0;
1212 std::size_t idx_up = 0, idx_down = 0;
1216 if (alpha(m_classes) == C)
1218 double kkt_up = -1e100, kkt_down = 1e100;
1221 if (c == y)
continue;
1223 const double g = gradient(c);
1224 const double a = alpha(c);
1225 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
1226 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
1237 grad = kkt_up - kkt_down;
1246 if (c == y)
continue;
1248 const double g = gradient(c);
1249 const double a = alpha(c);
1250 if (g > kkt) { kkt = g; idx = c; }
1251 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1253 grad = gradient(idx);
1257 if (kkt < epsilon)
return gain;
1262 const double a_up = alpha(idx_up);
1263 const double a_down = alpha(idx_down);
1264 double m = grad / qq;
1265 double a_up_new = a_up + m;
1266 double a_down_new = a_down - m;
1267 if (a_down_new <= 0.0)
1270 a_up_new = a_up + m;
1273 alpha(idx_up) = a_up_new;
1274 alpha(idx_down) = a_down_new;
1279 const double dg = 0.5 * m * qq;
1280 gradient(idx_up) -= dg;
1281 gradient(idx_down) += dg;
1282 gain += m * (grad - 2.0 * dg);
1287 const double a = alpha(idx);
1288 const double a_sum = alpha(m_classes);
1289 double m = grad / qq;
1290 double a_new = a + m;
1291 double a_sum_new = a_sum + m;
1296 a_sum_new = a_sum + m;
1298 else if (a_sum_new >= C)
1305 alpha(m_classes) = a_sum_new;
1309 const double dg = 0.5 * m * qq;
1310 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dg;
1311 gradient(idx) -= dg;
1312 gain += m * (grad - dg);
1327 template <
class InputT>
1335 const DatasetType& dataset,
1337 std::size_t classes)
1343 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y)
1347 double violation = 0.0;
1356 const double g = 1.0 + wx(c);
1358 if (g > violation) violation = g;
1359 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1366 double kkt_up = 0.0, kkt_down = 1e100;
1375 const double g = 1.0 + wx(c);
1377 if (g > kkt_up && alpha(c) < C) kkt_up = g;
1378 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
1381 return std::max(0.0, kkt_up - kkt_down);
1388 double mean_mu = 0.0;
1389 for (std::size_t c=0; c<
m_classes; c++) mean_mu += mu(c);
1390 mean_mu /= (double)m_classes;
1391 RealVector step(m_classes);
1392 for (
size_t c=0; c<
m_classes; c++) step(c) = mean_mu - mu(c);
1397 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu)
1400 const double qq = (1.0 - ood) * q;
1405 for (iter=0; iter<maxiter; iter++)
1408 std::size_t idx = 0;
1409 std::size_t idx_up = 0, idx_down = 0;
1413 if (alpha(m_classes) == C)
1415 double kkt_up = -1e100, kkt_down = 1e100;
1418 if (c == y)
continue;
1420 const double g = gradient(c);
1421 const double a = alpha(c);
1422 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
1423 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
1434 grad = kkt_up - kkt_down;
1443 if (c == y)
continue;
1445 const double g = gradient(c);
1446 const double a = alpha(c);
1447 if (g > kkt) { kkt = g; idx = c; }
1448 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1450 grad = gradient(idx);
1454 if (kkt < epsilon)
return gain;
1459 const double a_up = alpha(idx_up);
1460 const double a_down = alpha(idx_down);
1461 double m = grad / (2.0 * q);
1462 double a_up_new = a_up + m;
1463 double a_down_new = a_down - m;
1464 if (a_down_new <= 0.0)
1467 a_up_new = a_up + m;
1470 alpha(idx_up) = a_up_new;
1471 alpha(idx_down) = a_down_new;
1476 const double dg = m * q;
1478 gradient(idx_up) -= dg;
1479 gradient(idx_down) += dg;
1480 gain += m * (grad - (dg - dgc));
1485 const double a = alpha(idx);
1486 const double a_sum = alpha(m_classes);
1487 double m = grad / qq;
1488 double a_new = a + m;
1489 double a_sum_new = a_sum + m;
1494 a_sum_new = a_sum + m;
1496 else if (a_sum_new >= C)
1503 alpha(m_classes) = a_sum_new;
1507 const double dg = m * q;
1509 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += dgc;
1510 gradient(idx) -= dg;
1511 gain += m * (grad - 0.5 * (dg - dgc));
1526 template <
class InputT>
1534 const DatasetType& dataset,
1536 std::size_t classes)
1542 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y)
1546 double violation = 0.0;
1549 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
1551 if (g > violation) violation = g;
1552 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1558 double kkt_up = 0.0, kkt_down = 1e100;
1561 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
1563 if (g > kkt_up && alpha(c) < C) kkt_up = g;
1564 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
1566 return std::max(0.0, kkt_up - kkt_down);
1573 unsigned int y =
m_data[index].label;
1574 double mean = -2.0 * mu(y);
1575 for (std::size_t c=0; c<
m_classes; c++) mean += mu(c);
1576 mean /= (double)m_classes;
1577 RealVector step(m_classes);
1578 for (
size_t c=0; c<
m_classes; c++) step(c) = (c == y) ? (mu(c) +
mean) : (mean - mu(c));
1583 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu)
1586 const double qq = (1.0 - ood) * q;
1591 for (iter=0; iter<maxiter; iter++)
1594 std::size_t idx = 0;
1595 std::size_t idx_up = 0, idx_down = 0;
1599 if (alpha(m_classes) == C)
1601 double kkt_up = -1e100, kkt_down = 1e100;
1604 const double g = gradient(c);
1605 const double a = alpha(c);
1606 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
1607 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
1618 grad = kkt_up - kkt_down;
1627 const double g = gradient(c);
1628 const double a = alpha(c);
1629 if (g > kkt) { kkt = g; idx = c; }
1630 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1632 grad = gradient(idx);
1636 if (kkt < epsilon)
return gain;
1641 const double a_up = alpha(idx_up);
1642 const double a_down = alpha(idx_down);
1643 double m = grad / (2.0 * q);
1644 double a_up_new = a_up + m;
1645 double a_down_new = a_down - m;
1646 if (a_down_new <= 0.0)
1649 a_up_new = a_up + m;
1652 alpha(idx_up) = a_up_new;
1653 alpha(idx_down) = a_down_new;
1658 const double dg = m * q;
1662 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dgc;
1663 gradient(idx_up) -= dg - 2.0 * dgc;
1664 gradient(idx_down) += dg;
1666 else if (idx_down == y)
1668 gradient(idx_up) -= dg;
1669 gradient(idx_down) += dg - 2.0 * dgc;
1673 gradient(idx_up) -= dg;
1674 gradient(idx_down) += dg;
1676 gain += m * (grad - (dg - dgc));
1681 const double a = alpha(idx);
1682 const double a_sum = alpha(m_classes);
1683 double m = grad / qq;
1684 double a_new = a + m;
1685 double a_sum_new = a_sum + m;
1690 a_sum_new = a_sum + m;
1692 else if (a_sum_new >= C)
1699 alpha(m_classes) = a_sum_new;
1703 const double dg = m * q;
1707 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dgc;
1708 gradient(idx) -= dg - 2.0 * dgc;
1712 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
1713 gradient(idx) -= dg;
1715 gain += m * (grad - 0.5 * (dg - dgc));
1730 template <
class InputT>
1738 const DatasetType& dataset,
1740 std::size_t classes)
1746 virtual double calcGradient(RealVector& gradient, RealVector wx, RealMatrixRow
const& alpha,
double C,
unsigned int y)
1748 double violation = 0.0;
1751 const double g = (c == y) ? (m_classes - 1.0) - wx(y) : 1.0 + wx(c);
1753 if (g > violation && alpha(c) < C) violation = g;
1754 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1762 unsigned int y =
m_data[index].label;
1763 double mean = -2.0 * mu(y);
1764 for (std::size_t c=0; c<
m_classes; c++) mean += mu(c);
1765 mean /= (double)m_classes;
1766 RealVector step(m_classes);
1767 for (std::size_t c=0; c<
m_classes; c++) step(c) = ((c == y) ? (mu(c) +
mean) : (mean - mu(c)));
1772 virtual double solveSub(
double epsilon, RealVector gradient,
double q,
double C,
unsigned int y, RealMatrixRow& alpha, RealVector& mu)
1775 const double qq = (1.0 - ood) * q;
1780 for (iter=0; iter<maxiter; iter++)
1783 std::size_t idx = 0;
1787 const double g = gradient(c);
1788 const double a = alpha(c);
1789 if (g > kkt && a < C) { kkt = g; idx = c; }
1790 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1794 if (kkt < epsilon)
break;
1797 const double a = alpha(idx);
1798 const double g = gradient(idx);
1800 double a_new = a + m;
1806 else if (a_new >= C)
1815 const double dg = m * q;
1819 for (std::size_t c=0; c<
m_classes; c++) gradient(c) -= dgc;
1820 gradient(idx) -= dg - 2.0 * dgc;
1824 for (std::size_t c=0; c<
m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
1825 gradient(idx) -= dg;
1828 gain += m * (g - 0.5 * (dg - dgc));