27inline void ICopy(
const T *src, T *dst,
int n) {
28 memcpy(dst, src, n *
sizeof(T));
31inline void ICopy1(
const T src[1], T dst[1]) {
35inline void ICopy2(
const T src[2], T dst[2]) {
40inline void ICopy3(
const T src[3], T dst[3]) {
46inline void ICopy4(
const T src[4], T dst[4]) {
53inline void ICopy5(
const T src[5], T dst[5]) {
61inline void ICopy6(
const T src[6], T dst[6]) {
70inline void ICopy7(
const T src[7], T dst[7]) {
80inline void ICopy8(
const T src[8], T dst[8]) {
91inline void ICopy9(
const T src[9], T dst[9]) {
103inline void ICopy10(
const T src[10], T dst[10]) {
116inline void ICopy11(
const T src[11], T dst[11]) {
130inline void ICopy12(
const T src[12], T dst[12]) {
145inline void ICopy13(
const T src[13], T dst[13]) {
161inline void ICopy14(
const T src[14], T dst[14]) {
178inline void ICopy15(
const T src[15], T dst[15]) {
196inline void ICopy16(
const T src[16], T dst[16]) {
216inline void ICopy(
const T *
const *src, T **dst,
int m,
int n) {
218 for (i = 0; i < m; i++) {
219 ICopy<T>(src[i], dst[i], n);
224template <
typename T,
typename S>
225inline void ICopy(
const T *src, S *dst,
int n) {
227 for (i = 0; i < n; ++i) {
228 dst[i] = (S)(src[i]);
232template <
typename T,
typename S>
233inline void ICopy(
const T *
const *src, S **dst,
int m,
int n) {
235 for (i = 0; i < m; i++) {
236 ICopy<T, S>(src[i], dst[i], n);
242inline void IFill(T *a,
int n, T val) {
243 for (
int i = 0; i < n; i++) {
257 a[0] = a[1] = a[2] = val;
261 a[0] = a[1] = a[2] = a[3] = val;
265 a[0] = a[1] = a[2] = a[3] = a[4] = val;
269 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = val;
273 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = val;
277 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = val;
281 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = val;
285 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = val;
289 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
294 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
299 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
304 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
305 a[11] = a[12] = a[13] = val;
309 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
310 a[11] = a[12] = a[13] = a[14] = val;
314 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
315 a[11] = a[12] = a[13] = a[14] = a[15] = val;
321 for (
int i = 0; i < n; i++) {
322 a[i] =
static_cast<T
>(0.0);
327 a[0] =
static_cast<T
>(0.0);
331 a[0] = a[1] =
static_cast<T
>(0.0);
335 a[0] = a[1] = a[2] =
static_cast<T
>(0.0);
339 a[0] = a[1] = a[2] = a[3] =
static_cast<T
>(0.0);
343 a[0] = a[1] = a[2] = a[3] = a[4] =
static_cast<T
>(0.0);
347 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] =
static_cast<T
>(0.0);
351 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] =
static_cast<T
>(0.0);
355 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] =
static_cast<T
>(0.0);
359 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] =
364 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] =
369 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
374 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
375 a[11] =
static_cast<T
>(0.0);
379 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
380 a[11] = a[12] =
static_cast<T
>(0.0);
384 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
385 a[11] = a[12] = a[13] =
static_cast<T
>(0.0);
389 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
390 a[11] = a[12] = a[13] = a[14] =
static_cast<T
>(0.0);
394 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
395 a[11] = a[12] = a[13] = a[14] = a[15] =
static_cast<T
>(0.0);
401 for (
int i = 0; i < n; i++) {
591inline void INeg1(
const T x[1], T y[1]) {
595inline void INeg2(
const T x[2], T y[2]) {
600inline void INeg3(
const T x[3], T y[3]) {
606inline void INeg4(
const T x[4], T y[4]) {
613inline void INeg5(
const T x[5], T y[5]) {
621inline void INeg6(
const T x[6], T y[6]) {
630inline void INeg7(
const T x[7], T y[7]) {
640inline void INeg8(
const T x[8], T y[8]) {
651inline void INeg9(
const T x[9], T y[9]) {
663inline void INeg10(
const T x[10], T y[10]) {
676inline void INeg11(
const T x[11], T y[11]) {
690inline void INeg12(
const T x[12], T y[12]) {
705inline void INeg13(
const T x[13], T y[13]) {
721inline void INeg14(
const T x[14], T y[14]) {
738inline void INeg15(
const T x[15], T y[15]) {
756inline void INeg16(
const T x[16], T y[16]) {
777inline void INegCol(T *A,
int c,
int m,
int n) {
779 for (
int r = 0; r < m; ++r, ref += n) {
786inline void IAdd(T *x,
int n, T k) {
787 for (
int i = 0; i < n; i++) {
793inline void IAdd(
const T *x,
const T *y,
int n, T *z) {
794 for (
int i = 0; i < n; i++) {
799inline void IAdd1(
const T x[1],
const T y[1], T z[1]) {
803inline void IAdd2(
const T x[2],
const T y[2], T z[2]) {
808inline void IAdd3(
const T x[3],
const T y[3], T z[3]) {
814inline void IAdd4(
const T x[4],
const T y[4], T z[4]) {
821inline void IAdd5(
const T x[5],
const T y[5], T z[5]) {
829inline void IAdd6(
const T x[6],
const T y[6], T z[6]) {
838inline void IAdd7(
const T x[7],
const T y[7], T z[7]) {
848inline void IAdd8(
const T x[8],
const T y[8], T z[8]) {
859inline void IAdd9(
const T x[9],
const T y[9], T z[9]) {
871inline void IAdd10(
const T x[10],
const T y[10], T z[10]) {
884inline void IAdd11(
const T x[11],
const T y[11], T z[11]) {
895 z[10] = x[10] + y[10];
898inline void IAdd12(
const T x[12],
const T y[12], T z[12]) {
909 z[10] = x[10] + y[10];
910 z[11] = x[11] + y[11];
913inline void IAdd13(
const T x[13],
const T y[13], T z[13]) {
924 z[10] = x[10] + y[10];
925 z[11] = x[11] + y[11];
926 z[12] = x[12] + y[12];
929inline void IAdd14(
const T x[14],
const T y[14], T z[14]) {
940 z[10] = x[10] + y[10];
941 z[11] = x[11] + y[11];
942 z[12] = x[12] + y[12];
943 z[13] = x[13] + y[13];
946inline void IAdd15(
const T x[15],
const T y[15], T z[15]) {
957 z[10] = x[10] + y[10];
958 z[11] = x[11] + y[11];
959 z[12] = x[12] + y[12];
960 z[13] = x[13] + y[13];
961 z[14] = x[14] + y[14];
964inline void IAdd16(
const T x[16],
const T y[16], T z[16]) {
975 z[10] = x[10] + y[10];
976 z[11] = x[11] + y[11];
977 z[12] = x[12] + y[12];
978 z[13] = x[13] + y[13];
979 z[14] = x[14] + y[14];
980 z[15] = x[15] + y[15];
983inline void IAdd20(
const T x[20],
const T y[20], T z[20]) {
994 z[10] = x[10] + y[10];
995 z[11] = x[11] + y[11];
996 z[12] = x[12] + y[12];
997 z[13] = x[13] + y[13];
998 z[14] = x[14] + y[14];
999 z[15] = x[15] + y[15];
1000 z[16] = x[16] + y[16];
1001 z[17] = x[17] + y[17];
1002 z[18] = x[18] + y[18];
1003 z[19] = x[19] + y[19];
1007template <
typename T>
1008inline void IAdd(
const T *x, T *y,
int n) {
1009 for (
int i = 0; i < n; i++) {
1013template <
typename T>
1014inline void IAdd1(
const T x[1], T y[1]) {
1017template <
typename T>
1018inline void IAdd2(
const T x[2], T y[2]) {
1022template <
typename T>
1023inline void IAdd3(
const T x[3], T y[3]) {
1028template <
typename T>
1029inline void IAdd4(
const T x[4], T y[4]) {
1035template <
typename T>
1036inline void IAdd5(
const T x[5], T y[5]) {
1043template <
typename T>
1044inline void IAdd6(
const T x[6], T y[6]) {
1052template <
typename T>
1053inline void IAdd7(
const T x[7], T y[7]) {
1062template <
typename T>
1063inline void IAdd8(
const T x[8], T y[8]) {
1073template <
typename T>
1074inline void IAdd9(
const T x[9], T y[9]) {
1085template <
typename T>
1098template <
typename T>
1112template <
typename T>
1127template <
typename T>
1143template <
typename T>
1160template <
typename T>
1178template <
typename T>
1197template <
typename T>
1222template <
typename T>
1224 for (
int i = 0; i < n; i++) {
1228template <
typename T>
1232template <
typename T>
1237template <
typename T>
1243template <
typename T>
1250template <
typename T>
1258template <
typename T>
1267template <
typename T>
1277template <
typename T>
1288template <
typename T>
1302template <
typename T>
1303inline void IAddScaled(
const T *x,
const T *y, T *z,
int n, T k) {
1304 for (
int i = 0; i < n; i++) {
1305 z[i] = x[i] + y[i] * k;
1308template <
typename T>
1310 z[0] = x[0] + y[0] * k;
1312template <
typename T>
1314 z[0] = x[0] + y[0] * k;
1315 z[1] = x[1] + y[1] * k;
1317template <
typename T>
1319 z[0] = x[0] + y[0] * k;
1320 z[1] = x[1] + y[1] * k;
1321 z[2] = x[2] + y[2] * k;
1323template <
typename T>
1325 z[0] = x[0] + y[0] * k;
1326 z[1] = x[1] + y[1] * k;
1327 z[2] = x[2] + y[2] * k;
1328 z[3] = x[3] + y[3] * k;
1330template <
typename T>
1332 z[0] = x[0] + y[0] * k;
1333 z[1] = x[1] + y[1] * k;
1334 z[2] = x[2] + y[2] * k;
1335 z[3] = x[3] + y[3] * k;
1336 z[4] = x[4] + y[4] * k;
1338template <
typename T>
1340 z[0] = x[0] + y[0] * k;
1341 z[1] = x[1] + y[1] * k;
1342 z[2] = x[2] + y[2] * k;
1343 z[3] = x[3] + y[3] * k;
1344 z[4] = x[4] + y[4] * k;
1345 z[5] = x[5] + y[5] * k;
1347template <
typename T>
1349 z[0] = x[0] + y[0] * k;
1350 z[1] = x[1] + y[1] * k;
1351 z[2] = x[2] + y[2] * k;
1352 z[3] = x[3] + y[3] * k;
1353 z[4] = x[4] + y[4] * k;
1354 z[5] = x[5] + y[5] * k;
1355 z[6] = x[6] + y[6] * k;
1357template <
typename T>
1359 z[0] = x[0] + y[0] * k;
1360 z[1] = x[1] + y[1] * k;
1361 z[2] = x[2] + y[2] * k;
1362 z[3] = x[3] + y[3] * k;
1363 z[4] = x[4] + y[4] * k;
1364 z[5] = x[5] + y[5] * k;
1365 z[6] = x[6] + y[6] * k;
1366 z[7] = x[7] + y[7] * k;
1368template <
typename T>
1370 z[0] = x[0] + y[0] * k;
1371 z[1] = x[1] + y[1] * k;
1372 z[2] = x[2] + y[2] * k;
1373 z[3] = x[3] + y[3] * k;
1374 z[4] = x[4] + y[4] * k;
1375 z[5] = x[5] + y[5] * k;
1376 z[6] = x[6] + y[6] * k;
1377 z[7] = x[7] + y[7] * k;
1378 z[8] = x[8] + y[8] * k;
1382template <
typename T>
1383inline void ISub(T *x,
int n, T k) {
1384 for (
int i = 0; i < n; i++) {
1389template <
typename T>
1390inline void ISub(
const T *x,
const T *y,
int n, T *z) {
1391 for (
int i = 0; i < n; i++) {
1395template <
typename T>
1396inline void ISub1(
const T x[1],
const T y[1], T z[1]) {
1399template <
typename T>
1400inline void ISub2(
const T x[2],
const T y[2], T z[2]) {
1404template <
typename T>
1405inline void ISub3(
const T x[3],
const T y[3], T z[3]) {
1410template <
typename T>
1411inline void ISub4(
const T x[4],
const T y[4], T z[4]) {
1417template <
typename T>
1418inline void ISub5(
const T x[5],
const T y[5], T z[5]) {
1425template <
typename T>
1426inline void ISub6(
const T x[6],
const T y[6], T z[6]) {
1434template <
typename T>
1435inline void ISub7(
const T x[7],
const T y[7], T z[7]) {
1444template <
typename T>
1445inline void ISub8(
const T x[8],
const T y[8], T z[8]) {
1455template <
typename T>
1456inline void ISub9(
const T x[9],
const T y[9], T z[9]) {
1467template <
typename T>
1468inline void ISub10(
const T x[10],
const T y[10], T z[10]) {
1480template <
typename T>
1481inline void ISub11(
const T x[11],
const T y[11], T z[11]) {
1492 z[10] = x[10] - y[10];
1494template <
typename T>
1495inline void ISub12(
const T x[12],
const T y[12], T z[12]) {
1506 z[10] = x[10] - y[10];
1507 z[11] = x[11] - y[11];
1509template <
typename T>
1510inline void ISub13(
const T x[13],
const T y[13], T z[13]) {
1521 z[10] = x[10] - y[10];
1522 z[11] = x[11] - y[11];
1523 z[12] = x[12] - y[12];
1525template <
typename T>
1526inline void ISub14(
const T x[14],
const T y[14], T z[14]) {
1537 z[10] = x[10] - y[10];
1538 z[11] = x[11] - y[11];
1539 z[12] = x[12] - y[12];
1540 z[13] = x[13] - y[13];
1542template <
typename T>
1543inline void ISub15(
const T x[15],
const T y[15], T z[15]) {
1554 z[10] = x[10] - y[10];
1555 z[11] = x[11] - y[11];
1556 z[12] = x[12] - y[12];
1557 z[13] = x[13] - y[13];
1558 z[14] = x[14] - y[14];
1560template <
typename T>
1561inline void ISub16(
const T x[16],
const T y[16], T z[16]) {
1572 z[10] = x[10] - y[10];
1573 z[11] = x[11] - y[11];
1574 z[12] = x[12] - y[12];
1575 z[13] = x[13] - y[13];
1576 z[14] = x[14] - y[14];
1577 z[15] = x[15] - y[15];
1581template <
typename T>
1582inline void ISub(
const T *x, T *y,
int n) {
1583 for (
int i = 0; i < n; i++) {
1587template <
typename T>
1588inline void ISub1(
const T x[1], T y[1]) {
1591template <
typename T>
1592inline void ISub2(
const T x[2], T y[2]) {
1596template <
typename T>
1597inline void ISub3(
const T x[3], T y[3]) {
1602template <
typename T>
1603inline void ISub4(
const T x[4], T y[4]) {
1609template <
typename T>
1610inline void ISub5(
const T x[5], T y[5]) {
1617template <
typename T>
1618inline void ISub6(
const T x[6], T y[6]) {
1626template <
typename T>
1627inline void ISub7(
const T x[7], T y[7]) {
1636template <
typename T>
1637inline void ISub8(
const T x[8], T y[8]) {
1647template <
typename T>
1648inline void ISub9(
const T x[9], T y[9]) {
1659template <
typename T>
1672template <
typename T>
1686template <
typename T>
1701template <
typename T>
1717template <
typename T>
1734template <
typename T>
1752template <
typename T>
1772template <
typename T>
1774 for (
int i = 0; i < n; i++) {
1778template <
typename T>
1782template <
typename T>
1787template <
typename T>
1793template <
typename T>
1800template <
typename T>
1808template <
typename T>
1817template <
typename T>
1827template <
typename T>
1838template <
typename T>
1852template <
typename T>
1854 for (
int i = 0; i < n; i++) {
1858template <
typename T>
1862template <
typename T>
1867template <
typename T>
1873template <
typename T>
1880template <
typename T>
1888template <
typename T>
1897template <
typename T>
1907template <
typename T>
1918template <
typename T>
1930template <
typename T>
1943template <
typename T>
1957template <
typename T>
1972template <
typename T>
1988template <
typename T>
2005template <
typename T>
2023template <
typename T>
2044template <
typename T>
2045inline void IScale(
const T *x, T *y,
int n, T sf) {
2046 for (
int i = 0; i < n; i++) {
2050template <
typename T>
2054template <
typename T>
2059template <
typename T>
2065template <
typename T>
2072template <
typename T>
2080template <
typename T>
2089template <
typename T>
2099template <
typename T>
2110template <
typename T>
2122template <
typename T>
2135template <
typename T>
2149template <
typename T>
2164template <
typename T>
2180template <
typename T>
2197template <
typename T>
2215template <
typename T>
2243template <
typename T>
2244inline T
IDot(
const T *x,
const T *y,
int n) {
2245 T acc =
static_cast<T
>(0.0);
2246 for (
int i = 0; i < n; ++i) {
2251template <
typename T>
2252inline T
IDot1(
const T x[1],
const T y[1]) {
2253 return (x[0] * y[0]);
2255template <
typename T>
2256inline T
IDot2(
const T x[2],
const T y[2]) {
2257 return (x[0] * y[0] + x[1] * y[1]);
2259template <
typename T>
2260inline T
IDot3(
const T x[3],
const T y[3]) {
2261 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2]);
2263template <
typename T>
2264inline T
IDot4(
const T x[4],
const T y[4]) {
2265 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3]);
2267template <
typename T>
2268inline T
IDot5(
const T x[5],
const T y[5]) {
2269 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4]);
2271template <
typename T>
2272inline T
IDot6(
const T x[6],
const T y[6]) {
2273 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2276template <
typename T>
2277inline T
IDot7(
const T x[7],
const T y[7]) {
2278 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2279 x[5] * y[5] + x[6] * y[6]);
2281template <
typename T>
2282inline T
IDot8(
const T x[8],
const T y[8]) {
2283 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2284 x[5] * y[5] + x[6] * y[6] + x[7] * y[7]);
2286template <
typename T>
2287inline T
IDot9(
const T x[9],
const T y[9]) {
2288 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2289 x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8]);
2291template <
typename T>
2292inline T
IDot10(
const T x[10],
const T y[10]) {
2293 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2294 x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9]);
2296template <
typename T>
2297inline T
IDot11(
const T x[11],
const T y[11]) {
2298 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2299 x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2302template <
typename T>
2303inline T
IDot12(
const T x[12],
const T y[12]) {
2304 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2305 x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2306 x[10] * y[10] + x[11] * y[11]);
2308template <
typename T>
2309inline T
IDot13(
const T x[13],
const T y[13]) {
2310 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2311 x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2312 x[10] * y[10] + x[11] * y[11] + x[12] * y[12]);
2314template <
typename T>
2315inline T
IDot14(
const T x[14],
const T y[14]) {
2316 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2317 x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2318 x[10] * y[10] + x[11] * y[11] + x[12] * y[12] + x[13] * y[13]);
2320template <
typename T>
2321inline T
IDot15(
const T x[15],
const T y[15]) {
2322 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2323 x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2324 x[10] * y[10] + x[11] * y[11] + x[12] * y[12] + x[13] * y[13] +
2327template <
typename T>
2328inline T
IDot16(
const T x[16],
const T y[16]) {
2329 return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2330 x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2331 x[10] * y[10] + x[11] * y[11] + x[12] * y[12] + x[13] * y[13] +
2332 x[14] * y[14] + x[15] * y[15]);
2336inline int ISumU(
const unsigned char *x,
int n) {
2338 for (
int i = 0; i < n; ++i) {
2339 acc +=
static_cast<int>(x[i]);
2343template <
typename T>
2345 T acc =
static_cast<T
>(0.0);
2346 for (
int i = 0; i < n; ++i) {
2351template <
typename T>
2355template <
typename T>
2357 return (x[0] + x[1]);
2359template <
typename T>
2361 return (x[0] + x[1] + x[2]);
2363template <
typename T>
2365 return (x[0] + x[1] + x[2] + x[3]);
2367template <
typename T>
2369 return (x[0] + x[1] + x[2] + x[3] + x[4]);
2371template <
typename T>
2373 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5]);
2375template <
typename T>
2377 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6]);
2379template <
typename T>
2381 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7]);
2383template <
typename T>
2385 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8]);
2387template <
typename T>
2389 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9]);
2391template <
typename T>
2393 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2396template <
typename T>
2398 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2401template <
typename T>
2403 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2404 x[10] + x[11] + x[12]);
2406template <
typename T>
2408 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2409 x[10] + x[11] + x[12] + x[13]);
2411template <
typename T>
2413 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2414 x[10] + x[11] + x[12] + x[13] + x[14]);
2416template <
typename T>
2418 return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2419 x[10] + x[11] + x[12] + x[13] + x[14] + x[15]);
2422template <
typename T>
2424 T acc =
static_cast<T
>(0.0);
2425 for (
int i = 0; i < n; ++i) {
2432inline int IMeanU(
const unsigned char *x,
int n) {
return ISumU(x, n) / n; }
2433template <
typename T>
2435 return ISum(x, n) / n;
2437template <
typename T>
2439 return ISum2(x) / 2;
2441template <
typename T>
2443 return ISum3(x) / 3;
2445template <
typename T>
2447 return ISum4(x) / 4;
2449template <
typename T>
2451 return ISum5(x) / 5;
2453template <
typename T>
2455 return ISum6(x) / 6;
2457template <
typename T>
2459 return ISum7(x) / 7;
2461template <
typename T>
2463 return ISum8(x) / 8;
2465template <
typename T>
2467 return ISum9(x) / 9;
2469template <
typename T>
2473template <
typename T>
2477template <
typename T>
2481template <
typename T>
2485template <
typename T>
2489template <
typename T>
2493template <
typename T>
2499template <
typename T>
2500inline T
ISdv(
const T *x, T mean,
int n) {
2502 return static_cast<T
>(0.0);
2504 T sdv =
static_cast<T
>(0.0);
2505 for (
int i = 0; i < n; ++i) {
2506 sdv +=
ISqr(x[i] - mean);
2514 for (
int i = 0; i < n; i++) {
2519template <
typename T>
2521 T acc =
static_cast<T
>(0.0);
2522 for (
int i = 0; i < n; ++i) {
2527template <
typename T>
2529 return (
ISqr(x[0]));
2531template <
typename T>
2535template <
typename T>
2539template <
typename T>
2543template <
typename T>
2547template <
typename T>
2552template <
typename T>
2557template <
typename T>
2562template <
typename T>
2567template <
typename T>
2572template <
typename T>
2578template <
typename T>
2584template <
typename T>
2590template <
typename T>
2596template <
typename T>
2602template <
typename T>
2615 for (
int i = 0; i < n; i++) {
2616 acc +=
ISqr(
static_cast<int>(x[i]) -
static_cast<int>(y[i]));
2620template <
typename T>
2622 T acc =
static_cast<T
>(0.0);
2623 for (
int i = 0; i < n; i++) {
2624 acc +=
ISqr(x[i] - y[i]);
2628template <
typename T>
2630 return (
ISqr(x[0] - y[0]));
2632template <
typename T>
2634 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]));
2636template <
typename T>
2638 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]));
2640template <
typename T>
2642 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2645template <
typename T>
2647 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2648 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]));
2650template <
typename T>
2652 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2653 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]));
2655template <
typename T>
2657 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2658 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2661template <
typename T>
2663 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2664 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2665 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]));
2667template <
typename T>
2669 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2670 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2671 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]) +
ISqr(x[8] - y[8]));
2673template <
typename T>
2675 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2676 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2677 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]) +
ISqr(x[8] - y[8]) +
2680template <
typename T>
2682 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2683 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2684 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]) +
ISqr(x[8] - y[8]) +
2685 ISqr(x[9] - y[9]) +
ISqr(x[10] - y[10]));
2687template <
typename T>
2689 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2690 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2691 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]) +
ISqr(x[8] - y[8]) +
2692 ISqr(x[9] - y[9]) +
ISqr(x[10] - y[10]) +
ISqr(x[11] - y[11]));
2694template <
typename T>
2696 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2697 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2698 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]) +
ISqr(x[8] - y[8]) +
2699 ISqr(x[9] - y[9]) +
ISqr(x[10] - y[10]) +
ISqr(x[11] - y[11]) +
2700 ISqr(x[12] - y[12]));
2702template <
typename T>
2704 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2705 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2706 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]) +
ISqr(x[8] - y[8]) +
2707 ISqr(x[9] - y[9]) +
ISqr(x[10] - y[10]) +
ISqr(x[11] - y[11]) +
2708 ISqr(x[12] - y[12]) +
ISqr(x[13] - y[13]));
2710template <
typename T>
2712 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2713 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2714 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]) +
ISqr(x[8] - y[8]) +
2715 ISqr(x[9] - y[9]) +
ISqr(x[10] - y[10]) +
ISqr(x[11] - y[11]) +
2716 ISqr(x[12] - y[12]) +
ISqr(x[13] - y[13]) +
ISqr(x[14] - y[14]));
2718template <
typename T>
2720 return (
ISqr(x[0] - y[0]) +
ISqr(x[1] - y[1]) +
ISqr(x[2] - y[2]) +
2721 ISqr(x[3] - y[3]) +
ISqr(x[4] - y[4]) +
ISqr(x[5] - y[5]) +
2722 ISqr(x[6] - y[6]) +
ISqr(x[7] - y[7]) +
ISqr(x[8] - y[8]) +
2723 ISqr(x[9] - y[9]) +
ISqr(x[10] - y[10]) +
ISqr(x[11] - y[11]) +
2724 ISqr(x[12] - y[12]) +
ISqr(x[13] - y[13]) +
ISqr(x[14] - y[14]) +
2725 ISqr(x[15] - y[15]));
2731inline unsigned int IHammingDiff(
const unsigned int *x,
const unsigned int *y,
2733 unsigned int distance = 0;
2734 for (
int i = 0; i < n; ++i) {
2740 const unsigned int y[2]) {
2741 unsigned int distance = 0;
2747 const unsigned int y[4]) {
2748 unsigned int distance = 0;
2756 const unsigned int y[8]) {
2757 unsigned int distance = 0;
2769 const unsigned int y[16]) {
2770 unsigned int distance = 0;
2791inline double IL2Norm(
const unsigned char *x,
int n) {
2803template <
typename T>
2807template <
typename T>
2811template <
typename T>
2815template <
typename T>
2819template <
typename T>
2823template <
typename T>
2827template <
typename T>
2831template <
typename T>
2835template <
typename T>
2839template <
typename T>
2843template <
typename T>
2847template <
typename T>
2851template <
typename T>
2855template <
typename T>
2859template <
typename T>
2863template <
typename T>
2870template <
typename T>
2875 return (absa *
ISqrt(
static_cast<T
>(1.0) +
ISqr(
IDiv(absb, absa))));
2877 return (absb ==
static_cast<T
>(0.0)
2878 ?
static_cast<T
>(0.0)
2879 : absb *
ISqrt(
static_cast<T
>(1.0) +
ISqr(
IDiv(absa, absb))));
2885template <
typename T>
2887 return IL2NormAdv<T>(x[0], x[1]);
2891template <
typename T>
2893 T infinity_norm =
IAbsSum(A, n);
2896 for (i = 1; i < m; ++i, ni += n) {
2898 if (infinity_norm < tmp) {
2899 infinity_norm = tmp;
2902 return infinity_norm;
2914 IScale(x, n, 1.0 / norm);
2922 IScale(x, n,
static_cast<float>(1.0) / norm);
2927inline void IUnitize(
const double *x,
double *y,
int n) {
2938 IScale(x, y, n, 1.0 / norm);
2946 IScale(x, y, n,
static_cast<float>(1.0) / norm);
2950template <
typename T>
2954template <
typename T>
2958template <
typename T>
2962template <
typename T>
2966template <
typename T>
2970template <
typename T>
2974template <
typename T>
2978template <
typename T>
2982template <
typename T>
2986template <
typename T>
2990template <
typename T>
2994template <
typename T>
2998template <
typename T>
3002template <
typename T>
3006template <
typename T>
3010template <
typename T>
3014template <
typename T>
3018template <
typename T>
3022template <
typename T>
3026template <
typename T>
3030template <
typename T>
3034template <
typename T>
3038template <
typename T>
3042template <
typename T>
3046template <
typename T>
3050template <
typename T>
3054template <
typename T>
3058template <
typename T>
3062template <
typename T>
3066template <
typename T>
3071template <
typename T>
3075template <
typename T>
3079template <
typename T>
3084template <
typename T>
3088template <
typename T>
3092template <
typename T>
3096template <
typename T>
3100template <
typename T>
3105template <
typename T>
3109template <
typename T>
3113template <
typename T>
3117template <
typename T>
3121template <
typename T>
3127inline void ICentroid3(
const double *a,
int n,
double centroid[3]) {
3130 for (
int i = 0; i < length; i += 3) {
3131 IAdd3(a + i, centroid);
3135inline void ICentroid3(
const float *a,
int n,
float centroid[3]) {
3138 for (
int i = 0; i < length; i += 3) {
3139 IAdd3(a + i, centroid);
3144inline void ICentroid2(
const double *a,
int n,
double centroid[2]) {
3147 for (
int i = 0; i < length; i += 2) {
3148 IAdd2(a + i, centroid);
3152inline void ICentroid2(
const float *a,
int n,
float centroid[2]) {
3155 for (
int i = 0; i < length; i += 2) {
3156 IAdd2(a + i, centroid);
3163inline void ICentroid3(
const double *a,
int n,
double centroid[3],
3164 double *distances) {
3168 for (i = 0; i < length; i += 3) {
3169 IAdd3(a + i, centroid);
3172 for (i = 0, j = 0; i < n; ++i, j += 3) {
3181 for (i = 0; i < length; i += 3) {
3182 IAdd3(a + i, centroid);
3185 for (i = 0, j = 0; i < n; ++i, j += 3) {
3191inline void ICentroid2(
const double *a,
int n,
double centroid[2],
3192 double *distances) {
3196 for (i = 0; i < length; i += 2) {
3197 IAdd2(a + i, centroid);
3200 for (i = 0, j = 0; i < n; ++i, j += 2) {
3209 for (i = 0; i < length; i += 2) {
3210 IAdd2(a + i, centroid);
3213 for (i = 0, j = 0; i < n; ++i, j += 2) {
3219template <
typename T>
3223 return (
static_cast<T
>(0.0));
3226 for (
int i = 1; i < n; i++) {
3235template <
typename T>
3239 return (
static_cast<T
>(0.0));
3242 for (
int i = 1; i < n; i++) {
3288template <
typename T>
3292 return (
static_cast<T
>(0.0));
3296 for (i = 1; i < n; i++, ni += n) {
3314 for (
int i = 1; i < n; i++)
3315 if ((t = a[i]) > b) {
3329 for (
int i = 1; i < n; i++)
3330 if ((t = a[i]) > b) {
3344 for (
int i = 1; i < n; i++)
3345 if ((t = a[i]) > b) {
3361 for (
int i = 1; i < n; i++)
3362 if ((t =
IAbs(a[i])) > b) {
3376 for (
int i = 1; i < n; i++)
3377 if ((t =
IAbs(a[i])) > b) {
3391 for (
int i = 1; i < n; i++)
3392 if ((t =
IAbs(a[i])) > b) {
3407 for (
int i = 1; i < n; i++)
3408 if ((t =
IAbs(a[i])) < b) {
3422 for (
int i = 1; i < n; i++)
3423 if ((t =
IAbs(a[i])) < b) {
3437 for (
int i = 1; i < n; i++)
3438 if ((t =
IAbs(a[i])) < b) {
3452 for (
int i = i1 + 1; i < i2; i++) {
3453 if ((t =
IAbs(a[i])) > b) {
3465 for (
int i = i1 + 1; i < i2; i++) {
3466 if ((t =
IAbs(a[i])) > b) {
3478 for (
int i = i1 + 1; i < i2; i++) {
3479 if ((t =
IAbs(a[i])) > b) {
3493 for (
int i = i1 + 1; i < i2; i++) {
3494 if ((t =
IAbs(a[i])) < b) {
3506 for (
int i = i1 + 1; i < i2; i++) {
3507 if ((t =
IAbs(a[i])) < b) {
3519 for (
int i = i1 + 1; i < i2; i++) {
3520 if ((t =
IAbs(a[i])) < b) {
3533 const double *ref = a + n * i1;
3537 for (
int i = i1 + 1; i < i2; ++i, ref += n) {
3538 if ((t =
IAbs(*ref)) > b) {
3548 const float *ref = a + i1 * n;
3551 for (
int i = i1 + 1; i < i2; ++i, ref += n) {
3552 if ((t =
IAbs(*ref)) > b) {
3561 const int *ref = a + i1 * n;
3564 for (
int i = i1; i < i2; ++i, ref += n) {
3565 if ((t =
IAbs(*ref)) > b) {
3578 const double *ref = a + n * i1;
3581 for (
int i = i1 + 1; i < i2; ++i, ref += n) {
3582 if ((t =
IAbs(*ref)) < b) {
3592 const float *ref = a + i1 * n;
3595 for (
int i = i1 + 1; i < i2; ++i, ref += n) {
3596 if ((t =
IAbs(*ref)) < b) {
3605 const int *ref = a + i1 * n;
3608 for (
int i = i1; i < i2; ++i, ref += n) {
3609 if ((t =
IAbs(*ref)) < b) {
3620template <
typename T>
3623 T largest_val, temp;
3624 largest_val =
IAbs(A[i * n + i]);
3626 for (j = i + 1; j < n; j++) {
3627 temp =
IAbs(A[j * n + i]);
3628 if (temp > largest_val) {
3637template <
typename T>
3641 *min_val = *max_val =
static_cast<T
>(0.0);
3644 *min_val = *max_val = a[0];
3645 for (
int i = 1; i < n; i++) {
3647 if (temp > *max_val) {
3650 if (temp < *min_val) {
3690template <
typename T>
3692 for (
int i = 0; i < n; ++i) {
3695 y[n] =
static_cast<T
>(1.0);
3697template <
typename T>
3700 y[1] =
static_cast<T
>(1.0);
3702template <
typename T>
3706 y[2] =
static_cast<T
>(1.0);
3708template <
typename T>
3713 y[3] =
static_cast<T
>(1.0);
3717template <
typename T>
3718inline void ICross(
const T x[3],
const T y[3], T xxy[3]) {
3719 xxy[0] = x[1] * y[2] - x[2] * y[1];
3720 xxy[1] = x[2] * y[0] - x[0] * y[2];
3721 xxy[2] = x[0] * y[1] - x[1] * y[0];
3726template <
typename T>
3728 e_x[0] =
static_cast<T
>(0.0);
3732 e_x[4] =
static_cast<T
>(0.0);
3736 e_x[8] =
static_cast<T
>(0.0);
3741template <
typename T>
3743 T x0_sqr =
ISqr(x[0]);
3744 T x1_sqr =
ISqr(x[1]);
3745 T x2_sqr =
ISqr(x[2]);
3746 e_x2[0] = -(x1_sqr + x2_sqr);
3747 e_x2[1] = e_x2[3] = x[0] * x[1];
3748 e_x2[4] = -(x2_sqr + x0_sqr);
3749 e_x2[2] = e_x2[6] = x[2] * x[0];
3750 e_x2[8] = -(x0_sqr + x1_sqr);
3751 e_x2[5] = e_x2[7] = x[1] * x[2];
3755template <
typename T>
3759 for (
int i = 0; i < n; ++i, in += n) {
3760 A[in + i] =
static_cast<T
>(1.0);
3763template <
typename T>
3765 A[0] = A[3] =
static_cast<T
>(1.0);
3766 A[1] = A[2] =
static_cast<T
>(0.0);
3768template <
typename T>
3770 A[0] = A[4] = A[8] =
static_cast<T
>(1.0);
3771 A[1] = A[2] = A[3] = A[5] = A[6] = A[7] =
static_cast<T
>(0.0);
3773template <
typename T>
3775 A[0] = A[5] = A[10] = A[15] =
static_cast<T
>(1.0);
3776 A[1] = A[2] = A[3] = A[4] = A[6] = A[7] = A[8] = A[9] = A[11] = A[12] =
3777 A[13] = A[14] =
static_cast<T
>(0.0);
3781template <
typename T>
3785 A[2] =
static_cast<T
>(0.0);
3790template <
typename T>
3795 A[3] =
static_cast<T
>(0.0);
3798 A[6] =
static_cast<T
>(0.0);
3799 A[7] =
static_cast<T
>(0.0);
3804inline double ITrace2x2(
const double A[4]) {
return (A[0] + A[3]); }
3805inline float ITrace2x2(
const float A[4]) {
return (A[0] + A[3]); }
3807inline double ITrace3x3(
const double A[9]) {
return (A[0] + A[4] + A[8]); }
3808inline float ITrace3x3(
const float A[9]) {
return (A[0] + A[4] + A[8]); }
3812 return (A[0] * A[3] - A[1] * A[2]);
3815 return (A[0] * A[3] - A[1] * A[2]);
3818 return (A[0] * A[3] - A[1] * A[2]);
3824 ICross(A, A + 3, r0_x_r1);
3825 return (
IDot3(r0_x_r1, A + 6));
3829 ICross(A, A + 3, r0_x_r1);
3830 return (
IDot3(r0_x_r1, A + 6));
3834 ICross(A, A + 3, r0_x_r1);
3835 return (
IDot3(r0_x_r1, A + 6));
3842 sd[0] = x[0] * y[1] - x[1] * y[0];
3843 sd[1] = x[0] * y[2] - x[2] * y[0];
3844 sd[2] = x[0] * y[3] - x[3] * y[0];
3845 sd[3] = x[1] * y[2] - x[2] * y[1];
3846 sd[4] = x[1] * y[3] - x[3] * y[1];
3847 sd[5] = x[2] * y[3] - x[3] * y[2];
3851 sd[0] = x[0] * y[1] - x[1] * y[0];
3852 sd[1] = x[0] * y[2] - x[2] * y[0];
3853 sd[2] = x[0] * y[3] - x[3] * y[0];
3854 sd[3] = x[1] * y[2] - x[2] * y[1];
3855 sd[4] = x[1] * y[3] - x[3] * y[1];
3856 sd[5] = x[2] * y[3] - x[3] * y[2];
3859 sd[0] = x[0] * y[1] - x[1] * y[0];
3860 sd[1] = x[0] * y[2] - x[2] * y[0];
3861 sd[2] = x[0] * y[3] - x[3] * y[0];
3862 sd[3] = x[1] * y[2] - x[2] * y[1];
3863 sd[4] = x[1] * y[3] - x[3] * y[1];
3864 sd[5] = x[2] * y[3] - x[3] * y[2];
3870 const double z[4],
double sd[4]) {
3873 sd[0] = z[1] * ssd[5] - z[2] * ssd[4] + z[3] * ssd[3];
3874 sd[1] = z[0] * ssd[5] - z[2] * ssd[2] + z[3] * ssd[1];
3875 sd[2] = z[0] * ssd[4] - z[1] * ssd[2] + z[3] * ssd[0];
3876 sd[3] = z[0] * ssd[3] - z[1] * ssd[1] + z[2] * ssd[0];
3879 const float z[4],
float sd[4]) {
3882 sd[0] = z[1] * ssd[5] - z[2] * ssd[4] + z[3] * ssd[3];
3883 sd[1] = z[0] * ssd[5] - z[2] * ssd[2] + z[3] * ssd[1];
3884 sd[2] = z[0] * ssd[4] - z[1] * ssd[2] + z[3] * ssd[0];
3885 sd[3] = z[0] * ssd[3] - z[1] * ssd[1] + z[2] * ssd[0];
3891 sd[0] = z[1] * ssd[5] - z[2] * ssd[4] + z[3] * ssd[3];
3892 sd[1] = z[0] * ssd[5] - z[2] * ssd[2] + z[3] * ssd[1];
3893 sd[2] = z[0] * ssd[4] - z[1] * ssd[2] + z[3] * ssd[0];
3894 sd[3] = z[0] * ssd[3] - z[1] * ssd[1] + z[2] * ssd[0];
3901 return -(A[12] * sd[0]) + (A[13] * sd[1]) - (A[14] * sd[2]) + (A[15] * sd[3]);
3906 return -(A[12] * sd[0]) + (A[13] * sd[1]) - (A[14] * sd[2]) + (A[15] * sd[3]);
3911 return -(A[12] * sd[0]) + (A[13] * sd[1]) - (A[14] * sd[2]) + (A[15] * sd[3]);
3918template <
typename T>
3919inline void ICross(
const T x[4],
const T y[4],
const T z[4], T xxyxz[4]) {
3921 xxyxz[0] = -xxyxz[0];
3922 xxyxz[2] = -xxyxz[2];
3928 double sf =
IRec(d);
3929 Ai[0] = sf * (A[3]);
3930 Ai[1] = sf * (-A[1]);
3931 Ai[2] = sf * (-A[2]);
3932 Ai[3] = sf * (A[0]);
3938 Ai[0] = sf * (A[3]);
3939 Ai[1] = sf * (-A[1]);
3940 Ai[2] = sf * (-A[2]);
3941 Ai[3] = sf * (A[0]);
3946 double sf =
IRec(d);
3947 Ai[0] = sf * (A[3]);
3948 Ai[1] = sf * (-A[1]);
3949 Ai[2] = sf * (-A[2]);
3950 Ai[3] = sf * (A[0]);
3990 double sd0 = A[4] * A[8] - A[5] * A[7];
3991 double sd1 = A[5] * A[6] - A[3] * A[8];
3992 double sd2 = A[3] * A[7] - A[4] * A[6];
3993 double sd3 = A[2] * A[7] - A[1] * A[8];
3994 double sd4 = A[0] * A[8] - A[2] * A[6];
3995 double sd5 = A[1] * A[6] - A[0] * A[7];
3996 double sd6 = A[1] * A[5] - A[2] * A[4];
3997 double sd7 = A[2] * A[3] - A[0] * A[5];
3998 double sd8 = A[0] * A[4] - A[1] * A[3];
3999 double d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4000 double d_rec =
IRec(d);
4001 Ai[0] = d_rec * sd0;
4002 Ai[1] = d_rec * sd3;
4003 Ai[2] = d_rec * sd6;
4004 Ai[3] = d_rec * sd1;
4005 Ai[4] = d_rec * sd4;
4006 Ai[5] = d_rec * sd7;
4007 Ai[6] = d_rec * sd2;
4008 Ai[7] = d_rec * sd5;
4009 Ai[8] = d_rec * sd8;
4012 float sd0 = A[4] * A[8] - A[5] * A[7];
4013 float sd1 = A[5] * A[6] - A[3] * A[8];
4014 float sd2 = A[3] * A[7] - A[4] * A[6];
4015 float sd3 = A[2] * A[7] - A[1] * A[8];
4016 float sd4 = A[0] * A[8] - A[2] * A[6];
4017 float sd5 = A[1] * A[6] - A[0] * A[7];
4018 float sd6 = A[1] * A[5] - A[2] * A[4];
4019 float sd7 = A[2] * A[3] - A[0] * A[5];
4020 float sd8 = A[0] * A[4] - A[1] * A[3];
4021 float d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4022 float d_rec =
IRec(d);
4023 Ai[0] = d_rec * sd0;
4024 Ai[1] = d_rec * sd3;
4025 Ai[2] = d_rec * sd6;
4026 Ai[3] = d_rec * sd1;
4027 Ai[4] = d_rec * sd4;
4028 Ai[5] = d_rec * sd7;
4029 Ai[6] = d_rec * sd2;
4030 Ai[7] = d_rec * sd5;
4031 Ai[8] = d_rec * sd8;
4035 int sd0 = A[4] * A[8] - A[5] * A[7];
4036 int sd1 = A[5] * A[6] - A[3] * A[8];
4037 int sd2 = A[3] * A[7] - A[4] * A[6];
4038 int sd3 = A[2] * A[7] - A[1] * A[8];
4039 int sd4 = A[0] * A[8] - A[2] * A[6];
4040 int sd5 = A[1] * A[6] - A[0] * A[7];
4041 int sd6 = A[1] * A[5] - A[2] * A[4];
4042 int sd7 = A[2] * A[3] - A[0] * A[5];
4043 int sd8 = A[0] * A[4] - A[1] * A[3];
4044 int d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4045 double d_rec =
IRec(d);
4046 Ai[0] = d_rec * sd0;
4047 Ai[1] = d_rec * sd3;
4048 Ai[2] = d_rec * sd6;
4049 Ai[3] = d_rec * sd1;
4050 Ai[4] = d_rec * sd4;
4051 Ai[5] = d_rec * sd7;
4052 Ai[6] = d_rec * sd2;
4053 Ai[7] = d_rec * sd5;
4054 Ai[8] = d_rec * sd8;
4135 double A4A8 = A[4] * A[8];
4136 double A0rec =
IRec(A[0]);
4138 Ai[1] = -
IDiv(A[1], A[0] * A[4]);
4139 Ai[2] = (
IDiv(A[1] * A[5], A4A8) -
IDiv(A[2], A[8])) * A0rec;
4142 Ai[5] = -
IDiv(A[5], A4A8);
4148 float A4A8 = A[4] * A[8];
4149 float A0rec =
IRec(A[0]);
4151 Ai[1] = -
IDiv(A[1], A[0] * A[4]);
4152 Ai[2] = (
IDiv(A[1] * A[5], A4A8) -
IDiv(A[2], A[8])) * A0rec;
4155 Ai[5] = -
IDiv(A[5], A4A8);
4161 double A4A8 =
static_cast<double>(A[4] * A[8]);
4162 double A0rec =
IRec(A[0]);
4164 Ai[1] = -
IDiv(
static_cast<double>(A[1]),
static_cast<double>(A[0] * A[4]));
4166 (
IDiv(
static_cast<double>(A[1] * A[5]), A4A8) -
IDiv(A[2], A[8])) * A0rec;
4169 Ai[5] = -
IDiv(
static_cast<double>(A[5]), A4A8);
4176inline void ISolve2x2(
const double A[4],
const double b[2],
double x[2]) {
4180 x[0] = rec * (A[3] * b[0] - A[1] * b[1]);
4181 x[1] = rec * (A[0] * b[1] - A[2] * b[0]);
4183inline void ISolve2x2(
const float A[4],
const float b[2],
float x[2]) {
4187 x[0] = rec * (A[3] * b[0] - A[1] * b[1]);
4188 x[1] = rec * (A[0] * b[1] - A[2] * b[0]);
4191inline void ISolve3x3(
const double A[9],
const double b[3],
double x[3]) {
4192 double d, rec, da0, da1, da2;
4195 da0 = b[0] * (A[4] * A[8] - A[5] * A[7]) +
4196 b[1] * (A[2] * A[7] - A[1] * A[8]) + b[2] * (A[1] * A[5] - A[2] * A[4]);
4197 da1 = b[0] * (A[5] * A[6] - A[3] * A[8]) +
4198 b[1] * (A[0] * A[8] - A[2] * A[6]) + b[2] * (A[2] * A[3] - A[0] * A[5]);
4199 da2 = b[0] * (A[3] * A[7] - A[4] * A[6]) +
4200 b[1] * (A[1] * A[6] - A[0] * A[7]) + b[2] * (A[0] * A[4] - A[1] * A[3]);
4205inline void ISolve3x3(
const float A[9],
const float b[3],
float x[3]) {
4206 float d, rec, da0, da1, da2;
4209 da0 = b[0] * (A[4] * A[8] - A[5] * A[7]) +
4210 b[1] * (A[2] * A[7] - A[1] * A[8]) + b[2] * (A[1] * A[5] - A[2] * A[4]);
4211 da1 = b[0] * (A[5] * A[6] - A[3] * A[8]) +
4212 b[1] * (A[0] * A[8] - A[2] * A[6]) + b[2] * (A[2] * A[3] - A[0] * A[5]);
4213 da2 = b[0] * (A[3] * A[7] - A[4] * A[6]) +
4214 b[1] * (A[1] * A[6] - A[0] * A[7]) + b[2] * (A[0] * A[4] - A[1] * A[3]);
4221template <
typename T>
4223 for (
int r = 0; r < n; ++r) {
4224 for (
int c = r; c < n; ++c) {
4225 ISwap(A[r * n + c], A[c * n + r]);
4230template <
typename T>
4232 for (
int r = 0; r < m; ++r) {
4233 for (
int c = 0; c < n; ++c) {
4234 At[c * m + r] = A[r * n + c];
4238template <
typename T>
4242template <
typename T>
4249template <
typename T>
4255template <
typename T>
4267template <
typename T>
4274 ISwap(A[11], A[14]);
4276template <
typename T>
4298template <
typename T>
4302 for (i = 0; i < n; ++i, ni += n) {
4309template <
typename T>
4310inline void IMultAx(
const T *A,
const T *x, T *Ax,
int m,
int n) {
4311 for (
int i = 0; i < m; i++) {
4312 Ax[i] =
IDot(A + i * n, x, n);
4318template <
typename T>
4319inline void IMultAtx(
const T *A,
const T *x, T *Atx,
int m,
int n) {
4321 const T *Ai =
nullptr;
4322 for (
int i = 0; i < n; i++) {
4325 for (
int j = 0; j < m; j++, Ai += n) {
4326 acc += (*Ai) * x[j];
4333template <
typename T>
4335 Ax[0] = A[0] * x[0] + A[1] * x[1] + A[2] * x[2];
4338template <
typename T>
4343 Ax[0] = A[0] * x0 + A[1] * x1;
4344 Ax[1] = A[2] * x0 + A[3] * x1;
4347template <
typename T>
4353 Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2;
4354 Ax[1] = A[3] * x0 + A[4] * x1 + A[5] * x2;
4357template <
typename T>
4363 Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2;
4364 Ax[1] = A[3] * x0 + A[4] * x1 + A[5] * x2;
4365 Ax[2] = A[6] * x0 + A[7] * x1 + A[8] * x2;
4368template <
typename T>
4374 Atx[0] = A[0] * x0 + A[3] * x1 + A[6] * x2;
4375 Atx[1] = A[1] * x0 + A[4] * x1 + A[7] * x2;
4376 Atx[2] = A[2] * x0 + A[5] * x1 + A[8] * x2;
4379template <
typename T>
4386 Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2 + A[3] * x3;
4387 Ax[1] = A[4] * x0 + A[5] * x1 + A[6] * x2 + A[7] * x3;
4388 Ax[2] = A[8] * x0 + A[9] * x1 + A[10] * x2 + A[11] * x3;
4391template <
typename T>
4397 Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2;
4398 Ax[1] = A[3] * x0 + A[4] * x1 + A[5] * x2;
4399 Ax[2] = A[6] * x0 + A[7] * x1 + A[8] * x2;
4400 Ax[3] = A[9] * x0 + A[10] * x1 + A[11] * x2;
4403template <
typename T>
4405 Atx[0] = A[0] * x[0] + A[4] * x[1] + A[8] * x[2];
4406 Atx[1] = A[1] * x[0] + A[5] * x[1] + A[9] * x[2];
4407 Atx[2] = A[2] * x[0] + A[6] * x[1] + A[10] * x[2];
4408 Atx[3] = A[3] * x[0] + A[7] * x[1] + A[11] * x[2];
4411template <
typename T>
4418 Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2 + A[3] * x3;
4419 Ax[1] = A[4] * x0 + A[5] * x1 + A[6] * x2 + A[7] * x3;
4420 Ax[2] = A[8] * x0 + A[9] * x1 + A[10] * x2 + A[11] * x3;
4421 Ax[3] = A[12] * x0 + A[13] * x1 + A[14] * x2 + A[15] * x3;
4424template <
typename T>
4425inline void IMultAB(
const T *A,
const T *B, T *AB,
int m,
int n,
int o) {
4428 for (
int i = 0; i < m; i++) {
4431 for (
int j = 0; j < o; j++) {
4432 acc =
static_cast<T
>(0.0);
4433 for (
int k = 0; k < n; k++) {
4434 acc += A[in + k] * B[k * o + j];
4442template <
typename T>
4444 AB[0] = A[0] * B[0];
4445 AB[1] = A[0] * B[1];
4446 AB[2] = A[0] * B[2];
4447 AB[3] = A[1] * B[0];
4448 AB[4] = A[1] * B[1];
4449 AB[5] = A[1] * B[2];
4450 AB[6] = A[2] * B[0];
4451 AB[7] = A[2] * B[1];
4452 AB[8] = A[2] * B[2];
4456template <
typename T>
4461 AB[0] = a * B[0] + b * B[2];
4462 AB[1] = a * B[1] + b * B[3];
4465 AB[2] = a * B[0] + b * B[2];
4466 AB[3] = a * B[1] + b * B[3];
4470template <
typename T>
4476 AB[0] = a * B[0] + b * B[2] + c * B[4];
4477 AB[1] = a * B[1] + b * B[3] + c * B[5];
4481 AB[2] = a * B[0] + b * B[2] + c * B[4];
4482 AB[3] = a * B[1] + b * B[3] + c * B[5];
4486template <
typename T>
4492 AB[0] = a * B[0] + b * B[3] + c * B[6];
4493 AB[1] = a * B[1] + b * B[4] + c * B[7];
4494 AB[2] = a * B[2] + b * B[5] + c * B[8];
4499 AB[3] = a * B[0] + b * B[3] + c * B[6];
4500 AB[4] = a * B[1] + b * B[4] + c * B[7];
4501 AB[5] = a * B[2] + b * B[5] + c * B[8];
4505template <
typename T>
4511 AB[0] = a * B[0] + b * B[4] + c * B[8];
4512 AB[1] = a * B[1] + b * B[5] + c * B[9];
4513 AB[2] = a * B[2] + b * B[6] + c * B[10];
4514 AB[3] = a * B[3] + b * B[7] + c * B[11];
4518 AB[4] = a * B[0] + b * B[4] + c * B[8];
4519 AB[5] = a * B[1] + b * B[5] + c * B[9];
4520 AB[6] = a * B[2] + b * B[6] + c * B[10];
4521 AB[7] = a * B[3] + b * B[7] + c * B[11];
4525template <
typename T>
4531 AB[0] = a * B[0] + b * B[3] + c * B[6];
4532 AB[1] = a * B[1] + b * B[4] + c * B[7];
4533 AB[2] = a * B[2] + b * B[5] + c * B[8];
4537 AB[3] = a * B[0] + b * B[3] + c * B[6];
4538 AB[4] = a * B[1] + b * B[4] + c * B[7];
4539 AB[5] = a * B[2] + b * B[5] + c * B[8];
4543 AB[6] = a * B[0] + b * B[3] + c * B[6];
4544 AB[7] = a * B[1] + b * B[4] + c * B[7];
4545 AB[8] = a * B[2] + b * B[5] + c * B[8];
4549template <
typename T>
4556 AB[0] = a * B[0] + b * B[4] + c * B[8] + d * B[12];
4557 AB[1] = a * B[1] + b * B[5] + c * B[9] + d * B[13];
4558 AB[2] = a * B[2] + b * B[6] + c * B[10] + d * B[14];
4559 AB[3] = a * B[3] + b * B[7] + c * B[11] + d * B[15];
4565 AB[4] = a * B[0] + b * B[4] + c * B[8] + d * B[12];
4566 AB[5] = a * B[1] + b * B[5] + c * B[9] + d * B[13];
4567 AB[6] = a * B[2] + b * B[6] + c * B[10] + d * B[14];
4568 AB[7] = a * B[3] + b * B[7] + c * B[11] + d * B[15];
4574 AB[8] = a * B[0] + b * B[4] + c * B[8] + d * B[12];
4575 AB[9] = a * B[1] + b * B[5] + c * B[9] + d * B[13];
4576 AB[10] = a * B[2] + b * B[6] + c * B[10] + d * B[14];
4577 AB[11] = a * B[3] + b * B[7] + c * B[11] + d * B[15];
4583 AB[12] = a * B[0] + b * B[4] + c * B[8] + d * B[12];
4584 AB[13] = a * B[1] + b * B[5] + c * B[9] + d * B[13];
4585 AB[14] = a * B[2] + b * B[6] + c * B[10] + d * B[14];
4586 AB[15] = a * B[3] + b * B[7] + c * B[11] + d * B[15];
4590template <
typename T>
4618template <
typename T>
4625 AB[0] = a * B[0] + b * B[3] + c * B[6];
4626 AB[1] = a * B[1] + b * B[4] + c * B[7];
4627 AB[2] = a * B[2] + b * B[5] + c * B[8];
4630 AB[3] = b * B[3] + c * B[6];
4631 AB[4] = b * B[4] + c * B[7];
4632 AB[5] = b * B[5] + c * B[8];
4640template <
typename T>
4648 AB[1] = a * B[1] + b * B[4];
4649 AB[2] = a * B[2] + b * B[5] + c * B[8];
4654 AB[4] = a * B[1] + b * B[4];
4655 AB[5] = a * B[2] + b * B[5] + c * B[8];
4660 AB[7] = a * B[1] + b * B[4];
4661 AB[8] = a * B[2] + b * B[5] + c * B[8];
4664template <
typename T>
4666 AB[0] = A[0] * B[0] + A[1] * B[4] + A[2] * B[8];
4667 AB[1] = A[0] * B[1] + A[1] * B[5] + A[2] * B[9];
4668 AB[2] = A[0] * B[2] + A[1] * B[6] + A[2] * B[10];
4669 AB[3] = A[0] * B[3] + A[1] * B[7] + A[2] * B[11];
4671 AB[4] = A[3] * B[0] + A[4] * B[4] + A[5] * B[8];
4672 AB[5] = A[3] * B[1] + A[4] * B[5] + A[5] * B[9];
4673 AB[6] = A[3] * B[2] + A[4] * B[6] + A[5] * B[10];
4674 AB[7] = A[3] * B[3] + A[4] * B[7] + A[5] * B[11];
4676 AB[8] = A[6] * B[0] + A[7] * B[4] + A[8] * B[8];
4677 AB[9] = A[6] * B[1] + A[7] * B[5] + A[8] * B[9];
4678 AB[10] = A[6] * B[2] + A[7] * B[6] + A[8] * B[10];
4679 AB[11] = A[6] * B[3] + A[7] * B[7] + A[8] * B[11];
4682template <
typename T>
4689 AB[0] = a * B[0] + b * B[3] + c * B[6] + d * B[9];
4690 AB[1] = a * B[1] + b * B[4] + c * B[7] + d * B[10];
4691 AB[2] = a * B[2] + b * B[5] + c * B[8] + d * B[11];
4697 AB[3] = a * B[0] + b * B[3] + c * B[6] + d * B[9];
4698 AB[4] = a * B[1] + b * B[4] + c * B[7] + d * B[10];
4699 AB[5] = a * B[2] + b * B[5] + c * B[8] + d * B[11];
4705 AB[6] = a * B[0] + b * B[3] + c * B[6] + d * B[9];
4706 AB[7] = a * B[1] + b * B[4] + c * B[7] + d * B[10];
4707 AB[8] = a * B[2] + b * B[5] + c * B[8] + d * B[11];
4711template <
typename T>
4717 ABt[0] = a * B[0] + b * B[1] + c * B[2];
4718 ABt[1] = a * B[3] + b * B[4] + c * B[5];
4719 ABt[2] = a * B[6] + b * B[7] + c * B[8];
4723 ABt[3] = a * B[0] + b * B[1] + c * B[2];
4724 ABt[4] = a * B[3] + b * B[4] + c * B[5];
4725 ABt[5] = a * B[6] + b * B[7] + c * B[8];
4729 ABt[6] = a * B[0] + b * B[1] + c * B[2];
4730 ABt[7] = a * B[3] + b * B[4] + c * B[5];
4731 ABt[8] = a * B[6] + b * B[7] + c * B[8];
4735template <
typename T>
4737 ABt[0] = A[0] * B[0] + A[1] * B[1] + A[2] * B[2];
4738 ABt[1] = A[0] * B[3] + A[1] * B[4] + A[2] * B[5];
4739 ABt[2] = A[3] * B[0] + A[4] * B[1] + A[5] * B[2];
4740 ABt[3] = A[3] * B[3] + A[4] * B[4] + A[5] * B[5];
4744template <
typename T>
4746 ABt[0] =
IDot4(A, B);
4747 ABt[1] =
IDot4(A, B + 4);
4748 ABt[2] =
IDot4(A, B + 8);
4750 ABt[3] =
IDot4(A + 4, B);
4751 ABt[4] =
IDot4(A + 4, B + 4);
4752 ABt[5] =
IDot4(A + 4, B + 8);
4754 ABt[6] =
IDot4(A + 8, B);
4755 ABt[7] =
IDot4(A + 8, B + 4);
4756 ABt[8] =
IDot4(A + 8, B + 8);
4758 ABt[9] =
IDot4(A + 12, B);
4759 ABt[10] =
IDot4(A + 12, B + 4);
4760 ABt[11] =
IDot4(A + 12, B + 8);
4764template <
typename T>
4765inline void IMultAtA(
const T *A, T *AtA,
int m,
int n) {
4769 for (i = 0; i < n; ++i) {
4771 for (j = 0; j < i; ++j) {
4772 AtA[ni + j] = AtA[j * n + i];
4774 for (j = i; j < n; ++j) {
4775 acc =
static_cast<T
>(0.0);
4778 for (k = 0; k < m; ++k, Ai += n, Aj += n) {
4779 acc += (*Ai) * (*Aj);
4786template <
typename T>
4788 AtA[0] = A[0] * A[0] + A[2] * A[2];
4789 AtA[1] = AtA[2] = A[0] * A[1] + A[2] * A[3];
4790 AtA[3] = A[1] * A[1] + A[3] * A[3];
4793template <
typename T>
4795 T xx =
static_cast<T
>(0.0);
4796 T xy =
static_cast<T
>(0.0);
4797 T yy =
static_cast<T
>(0.0);
4799 for (
int i = 0; i < 2 * n; i += 2) {
4807 AtA[1] = AtA[2] = xy;
4811template <
typename T>
4813 T xx =
static_cast<T
>(0.0);
4814 T xy =
static_cast<T
>(0.0);
4815 T yy =
static_cast<T
>(0.0);
4817 for (
int i = 0; i < 2 * n; i += 2) {
4825 AtA[1] = AtA[2] = xy;
4829template <
typename T>
4831 AtA[0] = A[0] * A[0] + A[3] * A[3] + A[6] * A[6];
4832 AtA[1] = AtA[3] = A[0] * A[1] + A[3] * A[4] + A[6] * A[7];
4833 AtA[2] = AtA[6] = A[0] * A[2] + A[3] * A[5] + A[6] * A[8];
4834 AtA[4] = A[1] * A[1] + A[4] * A[4] + A[7] * A[7];
4835 AtA[5] = AtA[7] = A[1] * A[2] + A[4] * A[5] + A[7] * A[8];
4836 AtA[8] = A[2] * A[2] + A[5] * A[5] + A[8] * A[8];
4839template <
typename T>
4841 T xx =
static_cast<T
>(0.0);
4842 T xy =
static_cast<T
>(0.0);
4843 T xz =
static_cast<T
>(0.0);
4844 T yy =
static_cast<T
>(0.0);
4845 T yz =
static_cast<T
>(0.0);
4846 T zz =
static_cast<T
>(0.0);
4849 for (i = 0, j = 0; i < n; i++) {
4861 AtA[1] = AtA[3] = xy;
4862 AtA[2] = AtA[6] = xz;
4864 AtA[5] = AtA[7] = yz;
4868template <
typename T>
4870 AtA[0] = A[0] * A[0] + A[4] * A[4] + A[8] * A[8] + A[12] * A[12];
4871 AtA[1] = AtA[4] = A[0] * A[1] + A[4] * A[5] + A[8] * A[9] + A[12] * A[13];
4872 AtA[2] = AtA[8] = A[0] * A[2] + A[4] * A[6] + A[8] * A[10] + A[12] * A[14];
4873 AtA[3] = AtA[12] = A[0] * A[3] + A[4] * A[7] + A[8] * A[11] + A[12] * A[15];
4874 AtA[5] = A[1] * A[1] + A[5] * A[5] + A[9] * A[9] + A[13] * A[13];
4875 AtA[6] = AtA[9] = A[1] * A[2] + A[5] * A[6] + A[9] * A[10] + A[13] * A[14];
4876 AtA[7] = AtA[13] = A[1] * A[3] + A[5] * A[7] + A[9] * A[11] + A[13] * A[15];
4877 AtA[10] = A[2] * A[2] + A[6] * A[6] + A[10] * A[10] + A[14] * A[14];
4878 AtA[11] = AtA[14] = A[2] * A[3] + A[6] * A[7] + A[10] * A[11] + A[14] * A[15];
4879 AtA[15] = A[3] * A[3] + A[7] * A[7] + A[11] * A[11] + A[15] * A[15];
4882template <
typename T>
4884 AtB[0] = A[0] * B[0] + A[2] * B[2];
4885 AtB[1] = A[0] * B[1] + A[2] * B[3];
4886 AtB[2] = A[1] * B[0] + A[3] * B[2];
4887 AtB[3] = A[1] * B[1] + A[3] * B[3];
4890template <
typename T>
4892 AtB[0] = A[0] * B[0] + A[3] * B[3] + A[6] * B[6];
4893 AtB[1] = A[0] * B[1] + A[3] * B[4] + A[6] * B[7];
4894 AtB[2] = A[0] * B[2] + A[3] * B[5] + A[6] * B[8];
4895 AtB[3] = A[1] * B[0] + A[4] * B[3] + A[7] * B[6];
4896 AtB[4] = A[1] * B[1] + A[4] * B[4] + A[7] * B[7];
4897 AtB[5] = A[1] * B[2] + A[4] * B[5] + A[7] * B[8];
4898 AtB[6] = A[2] * B[0] + A[5] * B[3] + A[8] * B[6];
4899 AtB[7] = A[2] * B[1] + A[5] * B[4] + A[8] * B[7];
4900 AtB[8] = A[2] * B[2] + A[5] * B[5] + A[8] * B[8];
4903template <
typename T>
4905 AAt[0] = A[0] * A[0] + A[1] * A[1] + A[2] * A[2];
4906 AAt[1] = AAt[2] = A[0] * A[3] + A[1] * A[4] + A[2] * A[5];
4907 AAt[3] = A[3] * A[3] + A[4] * A[4] + A[5] * A[5];
4910template <
typename T>
4912 AAt[0] = (A[0] * A[0] + A[1] * A[1] + A[2] * A[2]);
4913 AAt[1] = AAt[3] = (A[0] * A[3] + A[1] * A[4] + A[2] * A[5]);
4914 AAt[2] = AAt[6] = (A[0] * A[6] + A[1] * A[7] + A[2] * A[8]);
4915 AAt[4] = (A[3] * A[3] + A[4] * A[4] + A[5] * A[5]);
4916 AAt[5] = AAt[7] = (A[3] * A[6] + A[4] * A[7] + A[5] * A[8]);
4917 AAt[8] = (A[6] * A[6] + A[7] * A[7] + A[8] * A[8]);
4920template <
typename T>
4922 AAt[0] = A[0] * A[0];
4923 AAt[1] = AAt[4] = (A[0] * A[1]);
4924 AAt[2] = AAt[8] = (A[0] * A[2]);
4925 AAt[3] = AAt[12] = (A[0] * A[3]);
4927 AAt[5] = A[1] * A[1];
4928 AAt[6] = AAt[9] = (A[1] * A[2]);
4929 AAt[7] = AAt[13] = (A[1] * A[3]);
4931 AAt[10] = A[2] * A[2];
4932 AAt[11] = AAt[14] = (A[2] * A[3]);
4934 AAt[15] = A[3] * A[3];
4937template <
typename T>
4945template <
typename T>
4953template <
typename T>
4955 const T B[16], T AB[16]) {
4962 AB[0] = A1[0] * B[0] + A1[1] * B[4];
4963 AB[1] = A1[0] * B[1] + A1[1] * B[5];
4964 AB[4] = A1[2] * B[0] + A1[3] * B[4];
4965 AB[5] = A1[2] * B[1] + A1[3] * B[5];
4968 AB[2] = A1[0] * B[2] + A1[1] * B[6];
4969 AB[3] = A1[0] * B[3] + A1[1] * B[7];
4970 AB[6] = A1[2] * B[2] + A1[3] * B[6];
4971 AB[7] = A1[2] * B[3] + A1[3] * B[7];
4974 AB[8] = A2[0] * B[8] + A2[1] * B[12];
4975 AB[9] = A2[0] * B[9] + A2[1] * B[13];
4976 AB[12] = A2[2] * B[8] + A2[3] * B[12];
4977 AB[13] = A2[2] * B[9] + A2[3] * B[13];
4980 AB[10] = A2[0] * B[10] + A2[1] * B[14];
4981 AB[11] = A2[0] * B[11] + A2[1] * B[15];
4982 AB[14] = A2[2] * B[10] + A2[3] * B[14];
4983 AB[15] = A2[2] * B[11] + A2[3] * B[15];
4987template <
typename T>
4989 ISwap(A + r1 * n, A + r2 * n, n);
4992template <
typename T>
4995 for (
int i = 0; i < m; i++, ref += n) {
4996 ISwap(ref[c1], ref[c2]);
5001template <
typename T>
5004 int i_begin =
IMax(0, i1);
5005 int i_end =
IMin(n, i2);
5006 ISwap(A + r1 * n + i_begin, A + r2 * n + i_begin, (i_end - i_begin));
5009template <
typename T>
5012 int i_begin =
IMax(0, i1);
5013 int i_end =
IMin(m, i2);
5014 T *ref = A + i_begin * n;
5015 for (
int i = i_begin; i < i_end; i++, ref += n) {
5016 ISwap(ref[c1], ref[c2]);
5025template <
typename T>
5033 for (i = 0; i < nm1; ++i) {
5034 ISwap(A[dst], A[src]);
5035 ISwap(A[dst + 1], A[src + 1]);
5046template <
typename T>
5054 for (i = 0; i < nm1; ++i) {
5055 ISwap(A[dst], A[src]);
5056 ISwap(A[dst + 1], A[src + 1]);
5057 ISwap(A[dst + 2], A[src + 2]);
void IMultAAt3x3(const T A[9], T AAt[9])
void IScale2(T x[2], T sf)
void ITranspose(T *A, int n)
void ISolve2x2(const double A[4], const double b[2], double x[2])
T IDot15(const T x[15], const T y[15])
void IMultAB3x3And3x3(const T A[9], const T B[9], T AB[9])
void IMultAtA4x4(const T A[16], T AtA[16])
void ISignedUnitize4(T x[4])
T IDot12(const T x[12], const T y[12])
unsigned int IHammingDiff8(const unsigned int x[8], const unsigned int y[8])
void IAdd12(const T x[12], const T y[12], T z[12])
void IMultAtA(const T *A, T *AtA, int m, int n)
void ISubScaled7(const T x[7], T y[7], T k)
void ISubScaled6(const T x[6], T y[6], T k)
void ICross(const T x[3], const T y[3], T xxy[3])
int IMaxAbsIndexSubdiagonalColumn(const T *A, int i, int n)
void ISub5(const T x[5], const T y[5], T z[5])
double ITrace2x2(const double A[4])
void IMultAx1x3(const T A[3], const T x[3], T Ax[1])
void IScale10(T x[10], T sf)
T IL2Norm15(const T x[15])
void IFill(T *a, int n, T val)
void ICopy8(const T src[8], T dst[8])
void IAdd10(const T x[10], const T y[10], T z[10])
void IMultAB3x4And4x3(const T A[12], const T B[12], T AB[9])
T ISquaresumDiffU12(const T x[12], const T y[12])
void IAddScaled2(const T x[2], T y[2], T k)
void IFill12(T a[12], T val)
T ISquaresumDiffU16(const T x[16], const T y[16])
T IDot14(const T x[14], const T y[14])
T IInfinityNorm(const T *A, int m, int n)
void ISqrSkewSymmetric3x3(const T x[3], T e_x2[9])
void IScale5(T x[5], T sf)
void IAdd7(const T x[7], const T y[7], T z[7])
void IMultAB4x1And1x4(const T A[4], const T B[4], T AB[16])
int IMaxAbsIndexInterval(const double *a, int i1, int i2)
void IFill9(T a[9], T val)
void IAxiator(const T x[3], T e_x[9])
void IHomogenize(const T *x, T *y, int n)
void ICopy11(const T src[11], T dst[11])
void IHomogeneousUnitize4(T x[4])
T IMean(const T *x, int n)
void IMultAx4x4(const T A[16], const T x[4], T Ax[4])
void IMultAx4x3(const T A[12], const T x[3], T Ax[4])
void IUnitize(double *x, int n)
unsigned int IHammingDiff(const unsigned int *x, const unsigned int *y, int n)
T ISquaresum16(const T x[16])
void ICopy(const T *src, T *dst, int n)
void IFill14(T a[14], T val)
void ISubScaled5(const T x[5], T y[5], T k)
void IFill2(T a[2], T val)
void ICopy12(const T src[12], T dst[12])
void ISubScaled9(const T x[9], T y[9], T k)
void IFill7(T a[7], T val)
void ICopy9(const T src[9], T dst[9])
double IDeterminant4x4(const double A[16])
T IDot16(const T x[16], const T y[16])
void IMultAtx4x3(const T A[12], const T x[3], T Atx[4])
T ISquaresumDiffU2(const T x[2], const T y[2])
void IAddScaled7(const T x[7], T y[7], T k)
void ISignedUnitize2(T x[2])
void IMultAtA2x2(const T A[4], T AtA[4])
void IAdd8(const T x[8], const T y[8], T z[8])
void ISubdeterminants3x4(const double x[4], const double y[4], const double z[4], double sd[4])
void ICopy3(const T src[3], T dst[3])
void IUpperTriangular3x3(T a0, T a1, T a2, T a4, T a5, T a8, T A[9])
void IMultAB2x2And2x2(const T A[4], const T B[4], T AB[4])
void IMultABt2x3And2x3(const T A[6], const T B[6], T ABt[4])
void IScale14(T x[14], T sf)
T ISquaresum11(const T x[11])
void IFill8(T a[8], T val)
void IMultAtAnx3(const T *A, T AtA[9], int n)
void ICentroid3(const double *a, int n, double centroid[3])
void IHomogenize2(const T x[2], T y[3])
void IAddScaled9(const T x[9], T y[9], T k)
double ITrace3x3(const double A[9])
void IMultAx3x3(const T A[9], const T x[3], T Ax[3])
void IAdd11(const T x[11], const T y[11], T z[11])
void IScale8(T x[8], T sf)
T IL2Norm16(const T x[16])
void IFill13(T a[13], T val)
unsigned int IHammingDiff4(const unsigned int x[4], const unsigned int y[4])
void IFill5(T a[5], T val)
double IL2Norm(const unsigned char *x, int n)
void IMultAB4x4WABlockDiagonal(const T A1[4], const T A2[4], const T B[16], T AB[16])
void IMinMaxElements(const T *a, int n, T *min_val, T *max_val)
void ICopy5(const T src[5], T dst[5])
void IAddScaled3(const T x[3], T y[3], T k)
T ISquaresumDiffU11(const T x[11], const T y[11])
T ISquaresumDiffU8(const T x[8], const T y[8])
void ISub7(const T x[7], const T y[7], T z[7])
T IL2Norm12(const T x[12])
void IAddScaled8(const T x[8], T y[8], T k)
void ICopy1(const T src[1], T dst[1])
void IMultAtx3x3(const T A[9], const T x[3], T Atx[3])
int IMaxIndex(const double *a, int n)
void IScale15(T x[15], T sf)
T ISquaresum5(const T x[5])
T ISquaresumDiffU15(const T x[15], const T y[15])
void IAugmentDiagonal(T *A, int n, const T lambda)
void IInvert2x2(const double A[4], double Ai[4])
void ISwapRowsInterval(T *A, int i1, int i2, int r1, int r2, int m, int n)
T IDot13(const T x[13], const T y[13])
void IHomogeneousUnitize(T *x, int n)
void IAdd5(const T x[5], const T y[5], T z[5])
void ISub1(const T x[1], const T y[1], T z[1])
void IAdd2(const T x[2], const T y[2], T z[2])
void ISubScaled1(const T x[1], T y[1], T k)
void ISwapCols(T *A, int c1, int c2, int m, int n)
void INegCol(T *A, int c, int m, int n)
void IUpperTriangular2x2(T a0, T a1, T a3, T A[4])
void ITranspose4x4(T A[16])
void IAdd20(const T x[20], const T y[20], T z[20])
void IMultAAt4x1(const T A[4], T AAt[16])
void ISub8(const T x[8], const T y[8], T z[8])
double IDeterminant3x3(const double A[9])
T IDot10(const T x[10], const T y[10])
T ISquaresumDiffU9(const T x[9], const T y[9])
int IMaxAbsIndex(const double *a, int n)
void ISub10(const T x[10], const T y[10], T z[10])
void IFill4(T a[4], T val)
void IAdd16(const T x[16], const T y[16], T z[16])
void IScale16(T x[16], T sf)
T ISquaresumDiffU13(const T x[13], const T y[13])
float IDiv(float a, float b)
T ISquaresum12(const T x[12])
T ISquaresum3(const T x[3])
void IScale(T *x, int n, T sf)
void IMultAx3x4(const T A[12], const T x[4], T Ax[3])
void IFill3(T a[3], T val)
T ISquaresum(const T *x, int n)
double IDeterminant2x2(const double A[4])
T IDot7(const T x[7], const T y[7])
void ISubScaled2(const T x[2], T y[2], T k)
T IMaxDiagonalElement(const T *a, int n)
void ICopy14(const T src[14], T dst[14])
void ISub9(const T x[9], const T y[9], T z[9])
T IL2Norm11(const T x[11])
void IInvert3x3UpperTriangular(const double A[9], double Ai[9])
void IMultAtA3x3(const T A[9], T AtA[9])
int ISquaresumU(const unsigned char *x, int n)
T ISquaresum1(const T x[1])
void IAddScaled4(const T x[4], T y[4], T k)
unsigned int IHammingLut(unsigned int a, unsigned int b)
void IMultAAt2x3(const T A[6], T AAt[4])
void IHomogeneousUnitize3(T x[3])
T ISquaresumDiffU7(const T x[7], const T y[7])
void ICopy6(const T src[6], T dst[6])
void ICopy2(const T src[2], T dst[2])
void IAdd(T *x, int n, T k)
void IScale9(T x[9], T sf)
T ISum(const T *x, int n)
void IFill11(T a[11], T val)
void IScale13(T x[13], T sf)
T ISquaresum9(const T x[9])
T IDot(const T *x, const T *y, int n)
void IAddScaled5(const T x[5], T y[5], T k)
void ISwapColsInterval(T *A, int i1, int i2, int c1, int c2, int m, int n)
int IMaxAbsIndexIntervalColumn(const double *a, int i1, int i2, int n)
void IMultAx2x2(const T A[4], const T x[2], T Ax[2])
int ISquaresumDiffU(const unsigned char *x, const unsigned char *y, int n)
T IL2Norm14(const T x[14])
void IAdd1(const T x[1], const T y[1], T z[1])
void IMultAB3x3And3x3WBUpperTriangular(const T A[9], const T B[9], T AB[9])
void IMultAB3x1And1x3(const T A[3], const T B[3], T AB[9])
T IAbsSum(const T *x, int n)
void ISub(T *x, int n, T k)
T ISquaresumDiffU10(const T x[10], const T y[10])
void ISub16(const T x[16], const T y[16], T z[16])
void ISub13(const T x[13], const T y[13], T z[13])
void IHomogenize3(const T x[3], T y[4])
T IDot1(const T x[1], const T y[1])
T ISdv(const T *x, T mean, int n)
void IMultAx(const T *A, const T *x, T *Ax, int m, int n)
T ISquaresumDiffU6(const T x[6], const T y[6])
T IDot5(const T x[5], const T y[5])
T IDot11(const T x[11], const T y[11])
T ISquaresum4(const T x[4])
void IAdd4(const T x[4], const T y[4], T z[4])
void IScale12(T x[12], T sf)
void ISignedUnitize3(T x[3])
void IAddScaled(const T *x, T *y, int n, T k)
void IHomogenize1(const T x[1], T y[2])
void IFill10(T a[10], T val)
void IAdd9(const T x[9], const T y[9], T z[9])
T ISquaresumDiffU4(const T x[4], const T y[4])
void IFill15(T a[15], T val)
T IDot6(const T x[6], const T y[6])
void ISafeUnitize(double *x, int n)
int IMinAbsIndexInterval(const double *a, int i1, int i2)
T IDot9(const T x[9], const T y[9])
void ITranspose2x2(T A[4])
void IMultABt3x3And3x3(const T A[9], const T B[9], T ABt[9])
unsigned int IHammingDiff2(const unsigned int x[2], const unsigned int y[2])
void ISubScaled4(const T x[4], T y[4], T k)
T ISquaresumDiffU14(const T x[14], const T y[14])
void ICopy7(const T src[7], T dst[7])
void IScale3(T x[3], T sf)
T ISquaresum7(const T x[7])
void ISub14(const T x[14], const T y[14], T z[14])
void IAdd3(const T x[3], const T y[3], T z[3])
void IMultAtBC3x3And3x3And3x3(const T A[9], const T B[9], const T C[9], T AtBC[9])
void IScale1(T x[1], T sf)
void IHomogeneousUnitize9(T x[9])
void IShiftHomogeneous3(T *A, int n)
T IMinElement(const T *a, int n)
void IMultAB4x4And4x4(const T A[16], const T B[16], T AB[16])
void IMultAB3x3And3x4(const T A[9], const T B[12], T AB[12])
void ITranspose3x3(T A[9])
void IMultAB2x3And3x3(const T A[6], const T B[9], T AB[6])
void IAdd6(const T x[6], const T y[6], T z[6])
T IL2Norm10(const T x[10])
T ISquaresum10(const T x[10])
T IDot8(const T x[8], const T y[8])
void IMultAtx(const T *A, const T *x, T *Atx, int m, int n)
void IShiftHomogeneous4(T *A, int n)
T ISquaresum8(const T x[8])
void IAdd13(const T x[13], const T y[13], T z[13])
void IAdd15(const T x[15], const T y[15], T z[15])
int IMinAbsIndex(const double *a, int n)
void ISolve3x3(const double A[9], const double b[3], double x[3])
void ISub6(const T x[6], const T y[6], T z[6])
void IFill1(T a[1], T val)
T ISquaresum14(const T x[14])
void IInvert3x3(const double A[9], double Ai[9])
void IAddScaled1(const T x[1], T y[1], T k)
int ISumU(const unsigned char *x, int n)
void ISub12(const T x[12], const T y[12], T z[12])
void IScale6(T x[6], T sf)
int IMeanU(const unsigned char *x, int n)
void IMultAtAnx2(const T *A, T *AtA, int n)
void IScale7(T x[7], T sf)
void ICopy13(const T src[13], T dst[13])
void IAdd14(const T x[14], const T y[14], T z[14])
T ISquaresum15(const T x[15])
void IMultAtB2x2And2x2(const T A[4], const T B[4], T AtB[4])
void ISub11(const T x[11], const T y[11], T z[11])
void ISubdeterminants2x4(const double x[4], const double y[4], double sd[6])
void ICentroid2(const double *a, int n, double centroid[2])
void IScale4(T x[4], T sf)
void IMultABt4x4And3x4(const T A[16], const T B[12], T ABt[12])
T ISquaresum2(const T x[2])
void ISub2(const T x[2], const T y[2], T z[2])
void ISub15(const T x[15], const T y[15], T z[15])
void ISubScaled8(const T x[8], T y[8], T k)
void ICopy16(const T src[16], T dst[16])
void IMultABC3x3And3x3And3x3(const T A[9], const T B[9], const T C[9], T ABC[9])
T IDot2(const T x[2], const T y[2])
void ISub4(const T x[4], const T y[4], T z[4])
void IMultAtB3x3And3x3(const T A[9], const T B[9], T AtB[9])
void ICopy15(const T src[15], T dst[15])
void ICopy4(const T src[4], T dst[4])
void IAddScaled6(const T x[6], T y[6], T k)
T IDot3(const T x[3], const T y[3])
void IMultAB(const T *A, const T *B, T *AB, int m, int n, int o)
T ISquaresumDiffU1(const T x[1], const T y[1])
T IDot4(const T x[4], const T y[4])
void ISubScaled(const T *x, T *y, int n, T k)
void ISwapRows(T *A, int r1, int r2, int m, int n)
void IMultAB2x3And3x4(const T A[6], const T B[12], T AB[8])
T ISquaresum6(const T x[6])
T IL2Norm13(const T x[13])
T ISquaresum13(const T x[13])
void IMultAB3x3And3x3WAUpperTriangular(const T A[9], const T B[9], T AB[9])
int IMinAbsIndexIntervalColumn(const double *a, int i1, int i2, int n)
void ISubScaled3(const T x[3], T y[3], T k)
void IFill6(T a[6], T val)
unsigned int IHammingDiff16(const unsigned int x[16], const unsigned int y[16])
void ICopy10(const T src[10], T dst[10])
void IFill16(T a[16], T val)
T IMaxElement(const T *a, int n)
void ISub3(const T x[3], const T y[3], T z[3])
void IHomogeneousUnitize2(T x[2])
void IMultAB2x3And3x2(const T A[6], const T B[6], T AB[4])
void IMultAx2x3(const T A[6], const T x[3], T Ax[2])
void IScale11(T x[11], T sf)
T ISquaresumDiffU5(const T x[5], const T y[5])
T ISquaresumDiffU3(const T x[3], const T y[3])