23 #ifndef __ND_GENERAL_MATRIX_H__
24 #define __ND_GENERAL_MATRIX_H__
26 #include "ndCoreStdafx.h"
29 #include "ndGeneralVector.h"
31 #define D_LCP_MAX_VALUE ndFloat32 (1.0e15f)
39 void ndMatrixTimeVector(ndInt32 size,
const T*
const matrix,
const T*
const v, T*
const out)
42 for (ndInt32 i = 0; i < size; ++i)
44 const T*
const row = &matrix[stride];
45 out[i] = ndDotProduct(size, row, v);
51 void ndMatrixTimeMatrix(ndInt32 size,
const T*
const matrixA,
const T*
const matrixB, T*
const out)
53 for (ndInt32 i = 0; i < size; ++i)
55 const T*
const rowA = &matrixA[i * size];
56 T*
const rowOut = &out[i * size];
57 for (ndInt32 j = 0; j < size; ++j)
60 for (ndInt32 k = 0; k < size; ++k)
62 acc += rowA[k] * matrixB[k * size + j];
70 void ndCovarianceMatrix(ndInt32 size, T*
const matrix,
const T*
const vectorA,
const T*
const vectorB)
73 for (ndInt32 i = 0; i < size; ++i)
76 T*
const row = &matrix[stride];
77 for (ndInt32 j = 0; j < size; ++j)
79 row[j] = scale * vectorB[j];
86 bool ndCholeskyFactorizationAddRow(ndInt32, ndInt32 stride, ndInt32 n, T*
const matrix, T*
const invDiagonalOut)
88 T*
const rowN = &matrix[stride * n];
91 for (ndInt32 j = 0; j <= n; ++j)
94 T*
const rowJ = &matrix[base];
95 for (ndInt32 k = 0; k < j; ++k)
97 s += rowN[k] * rowJ[k];
102 T diag = rowN[n] - s;
103 if (diag < T(1.0e-6f))
108 rowN[n] = T(sqrt(diag));
109 invDiagonalOut[n] = T(1.0f) / rowN[n];
115 rowN[j] = invDiagonalOut[j] * (rowN[j] - s);
125 bool ndCholeskyFactorization(ndInt32 size, ndInt32 stride, T*
const psdMatrix)
128 T*
const invDiagonal = ndAlloca(T, size);
129 for (ndInt32 i = 0; (i < size) && state; ++i)
131 state = state && ndCholeskyFactorizationAddRow(size, stride, i, psdMatrix, invDiagonal);
137 bool ndTestPSDmatrix(ndInt32 size, ndInt32 stride, T*
const matrix)
139 T*
const copy = ndAlloca(T, size * size);
141 for (ndInt32 i = 0; i < size; ++i)
143 memcpy(©[i * size], &matrix[row], size *
sizeof (T));
146 return ndCholeskyFactorization(size, size, copy);
150 void ndCholeskyApplyRegularizer (ndInt32 size, ndInt32 stride, T*
const psdMatrix, T*
const regularizer)
152 bool isPsdMatrix =
false;
153 ndFloat32*
const lowerTriangule = ndAlloca(ndFloat32, stride * stride);
156 memcpy(lowerTriangule, psdMatrix,
sizeof(ndFloat32) * stride * stride);
157 isPsdMatrix = ndCholeskyFactorization(size, stride, lowerTriangule);
160 for (ndInt32 i = 0; i < size; ++i)
162 regularizer[i] *= ndFloat32(4.0f);
163 psdMatrix[i * stride + i] += regularizer[i];
166 }
while (!isPsdMatrix);
170 void ndSolveCholesky(ndInt32 size, ndInt32 stride,
const T*
const choleskyMatrix, T*
const x,
const T*
const b)
172 ndInt32 rowStart = 0;
173 for (ndInt32 i = 0; i < size; ++i)
176 const T*
const row = &choleskyMatrix[rowStart];
177 for (ndInt32 j = 0; j < i; ++j)
179 acc = acc + row[j] * x[j];
181 x[i] = (b[i] - acc) / row[i];
185 for (ndInt32 i = size - 1; i >= 0; i--)
188 for (ndInt32 j = i + 1; j < size; ++j)
190 acc = acc + choleskyMatrix[stride * j + i] * x[j];
192 x[i] = (x[i] - acc) / choleskyMatrix[stride * i + i];
197 void ndSolveCholesky(ndInt32 size, T*
const choleskyMatrix, T*
const x)
199 ndSolveCholesky(size, size, choleskyMatrix, x);
203 bool ndSolveGaussian(ndInt32 size, T*
const matrix, T*
const b)
205 for (ndInt32 i = 0; i < size - 1; ++i)
207 const T*
const rowI = &matrix[i * size];
209 T maxVal (ndAbs(rowI[i]));
210 for (ndInt32 j = i + 1; j < size - 1; ++j)
212 T val (ndAbs(matrix[size * j + i]));
220 if (maxVal < T(1.0e-12f))
227 T*
const rowK = &matrix[m * size];
228 T*
const rowJ = &matrix[i * size];
229 for (ndInt32 j = 0; j < size; ++j)
231 ndSwap(rowK[j], rowJ[j]);
236 T den = T(1.0f) / rowI[i];
237 for (ndInt32 k = i + 1; k < size; ++k)
239 T*
const rowK = &matrix[size * k];
240 T factor(-rowK[i] * den);
241 for (ndInt32 j = i + 1; j < size; ++j)
243 rowK[j] += rowI[j] * factor;
246 b[k] += b[i] * factor;
250 for (ndInt32 i = size - 1; i >= 0; i--)
253 T*
const rowI = &matrix[i * size];
254 for (ndInt32 j = i + 1; j < size; ++j)
256 acc = acc + rowI[j] * b[j];
258 b[i] = (b[i] - acc) / rowI[i];
264 void ndEigenValues(
const ndInt32 size,
const ndInt32 stride,
const T*
const symmetricMatrix, T*
const eigenValues)
266 T*
const offDiag = ndAlloca(T, size);
267 T*
const matrix = ndAlloca(T, size * stride);
269 memcpy(matrix, symmetricMatrix,
sizeof(T) * size * stride);
270 for (ndInt32 i = size - 1; i > 0; i--)
273 T*
const rowI = &matrix[i * stride];
278 for (ndInt32 k = 0; k < i; ++k)
280 scale += ndAbs(rowI[k]);
283 if (scale == T(0.0f))
285 offDiag[i] = rowI[i - 1];
289 for (ndInt32 k = 0; k < i; ++k)
292 h += rowI[k] * rowI[k];
296 T g((f >= T(0.0f) ? -T(sqrt(h)) : T(sqrt(h))));
297 offDiag[i] = scale * g;
302 for (ndInt32 j = 0; j < i; ++j)
305 const T*
const rowJ = &matrix[j * stride];
306 for (ndInt32 k = 0; k <= j; ++k)
308 g += rowJ[k] * rowI[k];
310 for (ndInt32 k = j + 1; k < i; ++k)
312 g += matrix[k * stride + j] * rowI[k];
315 f += offDiag[j] * rowI[j];
319 for (ndInt32 j = 0; j < i; ++j)
322 T g1(offDiag[j] - hh * f1);
324 T*
const rowJ = &matrix[j * stride];
325 for (ndInt32 k = 0; k <= j; ++k)
327 rowJ[k] -= (f1 * offDiag[k] + g1 * rowI[k]);
334 offDiag[i] = rowI[i - 1];
339 ndInt32 index = stride;
340 eigenValues[0] = matrix[0];
341 for (ndInt32 i = 1; i < size; ++i)
343 eigenValues[i] = matrix[index + i];
344 offDiag[i - 1] = offDiag[i];
348 for (ndInt32 i = 0; i < size; ++i)
354 for (j = i; j < size - 1; ++j)
356 T dd(ndAbs(eigenValues[j]) + ndAbs(eigenValues[j + 1]));
357 if (ndAbs(offDiag[j]) <= (T(1.e-6f) * dd))
372 T g((eigenValues[i + 1] - eigenValues[i]) / (T(2.0f) * offDiag[i]));
373 T r(ndPythag(g, T(1.0f)));
374 g = eigenValues[j] - eigenValues[i] + offDiag[i] / (g + ndSign(r, g));
380 for (k = j - 1; k >= i; k--)
388 eigenValues[k + 1] -= p;
389 offDiag[j] = T(0.0f);
394 g = eigenValues[k + 1] - p;
395 d = (eigenValues[k] - g) * s + T(2.0f) * c * b;
397 eigenValues[k + 1] = g + p;
401 if (r == T(0.0f) && k >= i)
407 offDiag[j] = T(0.0f);
414 T ndConditionNumber(
const ndInt32 size,
const ndInt32 stride,
const T*
const choleskyMatrix)
416 T*
const eigenValues = ndAlloca(T, size);
417 ndEigenValues(size, stride, choleskyMatrix, eigenValues);
419 T minVal = T(1.0e20f);
420 T maxVal = T(-1.0e20f);
421 for (ndInt32 i = 0; i < size; ++i)
423 minVal = ndMin(minVal, eigenValues[i]);
424 maxVal = ndMax(maxVal, eigenValues[i]);
426 T condition = T(ndAbs(maxVal) / ndAbs(minVal));
447 void ndGaussSeidelLcpSor(
const ndInt32 size,
const T*
const matrix, T*
const x,
const T*
const b,
const T*
const low,
const T*
const high, T tol2, ndInt32 maxIterCount, ndInt16*
const clipped, T sor)
449 const T*
const me = matrix;
450 T*
const invDiag1 = ndAlloca(T, size);
453 for (ndInt32 i = 0; i < size; ++i)
455 x[i] = ndClamp(T(0.0f), low[i], high[i]);
456 invDiag1[i] = T(1.0f) / me[stride + i];
460 T tolerance(tol2 * 2.0f);
461 const T*
const invDiag = invDiag1;
465 for (ndInt32 i = 0; (i < maxIterCount) && (tolerance > tol2); ++i)
472 for (ndInt32 j = 0; j < size; ++j)
474 const T*
const row = &me[base];
475 T r(b[j] - ndDotProduct(size, row, x));
476 T f((r + row[j] * x[j]) * invDiag[j]);
491 x[j] = x[j] + (f - x[j]) * sor;
499 void ndGaussSeidelLcpSor(
const ndInt32 size,
const ndInt32 stride,
const T*
const matrix, T*
const x,
const T*
const b,
const ndInt32*
const normalIndex,
const T*
const low,
const T*
const high, T tol2, ndInt32 maxIterCount, T sor)
501 const T*
const me = matrix;
502 T*
const invDiag1 = ndAlloca(T, size);
503 T*
const u = ndAlloca(T, size + 1);
504 ndInt32*
const index = ndAlloca(ndInt32, size);
507 ndInt32 rowStart = 0;
508 for (ndInt32 j = 0; j < size; ++j)
511 index[j] = normalIndex[j] ? j + normalIndex[j] : size;
514 for (ndInt32 j = 0; j < size; ++j)
516 const T val = u[index[j]];
517 const T l = low[j] * val;
518 const T h = high[j] * val;
519 u[j] = ndClamp(u[j], l, h);
520 invDiag1[j] = T(1.0f) / me[rowStart + j];
524 T tolerance(tol2 * 2.0f);
525 const T*
const invDiag = invDiag1;
526 const ndInt32 maxCount = ndMax(8, size);
527 for (ndInt32 i = 0; (i < maxCount) && (tolerance > tol2); ++i)
531 for (ndInt32 j = 0; j < size; ++j)
533 const T*
const row = &me[base];
534 T r(b[j] - ndDotProduct(size, row, u));
535 T f((r + row[j] * u[j]) * invDiag[j]);
537 const T val = u[index[j]];
538 const T l = low[j] * val;
539 const T h = high[j] * val;
560 for (ndInt32 i = 0; (i < maxIterCount) && (tolerance > tol2); ++i)
567 for (ndInt32 j = 0; j < size; ++j)
569 const T*
const row = &me[base];
570 T r(b[j] - ndDotProduct(size, row, u));
571 T f((r + row[j] * u[j]) * invDiag[j]);
572 f = u[j] + (f - u[j]) * sor;
574 const T val = u[index[j]];
575 const T l = low[j] * val;
576 const T h = high[j] * val;
594 for (ndInt32 j = 0; j < size; ++j)
617 void ndGaussSeidelLCP(
const ndInt32 size,
const T*
const matrix, T*
const x,
const T*
const b,
const T*
const low,
const T*
const high, T sor = T(1.2f))
619 ndInt16*
const clipped = ndAlloca(ndInt16, size);
620 ndGaussSeidelLcpSor(size, matrix, x, b, low, high, T(1.0e-3f), size * size, clipped, sor);
624 void ndPermuteRows(ndInt32 size, ndInt32 i, ndInt32 j, T*
const matrix, T*
const choleskyMatrix, T*
const x, T*
const r, T*
const low, T*
const high, ndInt16*
const permute)
628 T*
const A = &matrix[size * i];
629 T*
const B = &matrix[size * j];
630 T*
const invA = &choleskyMatrix[size * i];
631 T*
const invB = &choleskyMatrix[size * j];
632 for (ndInt32 k = 0; k < size; ++k)
635 ndSwap(invA[k], invB[k]);
639 for (ndInt32 k = 0; k < size; ++k)
641 ndSwap(matrix[stride + i], matrix[stride + j]);
647 ndSwap(low[i], low[j]);
648 ndSwap(high[i], high[j]);
649 ndSwap(permute[i], permute[j]);
654 void ndCalculateDelta_x(ndInt32 size, ndInt32 n,
const T*
const matrix,
const T*
const choleskyMatrix, T*
const delta_x)
656 const T*
const row = &matrix[size * n];
657 for (ndInt32 i = 0; i < n; ++i)
659 delta_x[i] = -row[i];
661 ndSolveCholesky(size, n, choleskyMatrix, delta_x, delta_x);
662 delta_x[n] = T(1.0f);
667 void ndCalculateDelta_r(ndInt32 size, ndInt32 n,
const T*
const matrix,
const T*
const delta_x, T*
const delta_r)
669 ndInt32 stride = n * size;
670 const ndInt32 size1 = n + 1;
671 for (ndInt32 i = n; i < size; ++i)
673 delta_r[i] = ndDotProduct(size1, &matrix[stride], delta_x);
679 void ndHouseholderReflection(ndInt32 size, ndInt32 row, ndInt32 colum, T*
const choleskyMatrix, T*
const tmp, T*
const reflection)
681 ndAssert(row <= colum);
684 for (ndInt32 i = row; i <= colum; ++i)
686 T*
const rowI = &choleskyMatrix[size * i];
688 for (ndInt32 j = i + 1; j <= colum; ++j)
690 mag2 += rowI[j] * rowI[j];
691 reflection[j] = rowI[j];
693 if (mag2 > T(1.0e-14f))
695 reflection[i] = rowI[i] + ndSign(rowI[i]) * T(sqrt(mag2 + rowI[i] * rowI[i]));
697 const T vMag2(mag2 + reflection[i] * reflection[i]);
698 const T den = T(2.0f) / vMag2;
699 for (ndInt32 j = i; j < size; ++j)
702 T*
const rowJ = &choleskyMatrix[size * j];
703 for (ndInt32 k = i; k <= colum; ++k) {
704 acc += rowJ[k] * reflection[k];
709 for (ndInt32 j = i + 1; j < size; ++j)
712 T*
const rowJ = &choleskyMatrix[size * j];
713 const T a = tmp[j] * den;
714 for (ndInt32 k = i; k <= colum; ++k)
716 rowJ[k] -= a * reflection[k];
719 rowI[i] -= tmp[i] * reflection[i] * den;
722 if (rowI[i] < T(0.0f))
724 for (ndInt32 k = i; k < size; ++k)
726 choleskyMatrix[size * k + i] = -choleskyMatrix[size * k + i];
731 for (ndInt32 i = row; i < size; ++i)
733 choleskyMatrix[size * i + i] = ndMax(choleskyMatrix[size * i + i], T(1.0e-6f));
739 void ndCholeskyUpdate(ndInt32 size, ndInt32 row, ndInt32 colum, T*
const choleskyMatrix, T*
const tmp, T*
const reflexion,
const T*
const psdMatrix)
741 const ndInt32 n0 = colum - row;
742 const ndInt32 n1 = n0 + 1;
743 const ndInt32 choleskyCost = size * size * size / 3;
744 const ndInt32 householdCost = n0 * (n0 + 1) / 2 + n1 * (n1 + 1) * (2 * (2 * n1 + 1) - 3 + 3 * (size - colum - 1)) / 6 - 1;
746 if (householdCost < choleskyCost)
748 ndHouseholderReflection(size, row, colum, choleskyMatrix, tmp, reflexion);
752 memcpy (choleskyMatrix, psdMatrix,
sizeof (T) * size * size);
753 ndCholeskyFactorization(size, choleskyMatrix);
758 T*
const psdMatrixCopy = dAlloca(T, size * size);
759 memcpy(psdMatrixCopy, psdMatrix,
sizeof(T) * size * size);
760 dCholeskyFactorization(size, psdMatrixCopy);
762 for (dInt32 i = 0; i < size; ++i)
764 for (dInt32 j = 0; j < size; ++j)
766 T err = psdMatrixCopy[i*size + j] - choleskyMatrix[i*size + j];
767 dAssert(dAbs(err) < T(1.0e-4f));
790 void ndSolveDantzigLcpLow(ndInt32 size, T*
const symmetricMatrixPSD, T*
const x, T*
const b, T*
const low, T*
const high)
792 T*
const x0 = ndAlloca(T, size);
793 T*
const r0 = ndAlloca(T, size);
794 T*
const tmp0 = ndAlloca(T, size);
795 T*
const tmp1 = ndAlloca(T, size);
796 T*
const delta_r = ndAlloca(T, size);
797 T*
const delta_x = ndAlloca(T, size);
798 T*
const lowerTriangularMatrix = ndAlloca(T, size * size);
799 ndInt16*
const permute = ndAlloca(ndInt16, size);
801 for (ndInt32 i = 0; i < size; ++i)
803 permute[i] = ndInt16(i);
805 x[i] = ndMax (b[i] * b[i], T (1.0f));
808 for (ndInt32 n = size - 1, i = size - 1; i >= 0; i--)
812 ndPermuteRows(size, n, i, symmetricMatrixPSD, lowerTriangularMatrix, x, b, low, high, permute);
817 for (ndInt32 i = size - 1; (i >= 0) && (x[i] > T(1.0f)) ; i--)
820 for (ndInt32 j = i - 1; (j >= 0) && (x[j] > T(1.0f)); j--)
829 ndPermuteRows(size, i, min, symmetricMatrixPSD, lowerTriangularMatrix, x, b, low, high, permute);
833 ndInt32 initialGuessCount = size;
834 while (x[initialGuessCount - 1] >= T(16.0f))
836 initialGuessCount --;
839 memcpy(lowerTriangularMatrix, symmetricMatrixPSD,
sizeof(T) * size * size);
841 bool valid = ndCholeskyFactorization(size, lowerTriangularMatrix);
844 ndCholeskyFactorization(size, lowerTriangularMatrix);
846 for (ndInt32 j = 0; (j != -1) && initialGuessCount;)
848 ndSolveCholesky(size, initialGuessCount, lowerTriangularMatrix, x0, b);
853 for (ndInt32 i = initialGuessCount - 1; i >= 0; i--)
855 T x1 = alpha * x0[i];
860 alpha = low[i] / x0[i];
862 else if (x1 > high[i])
866 alpha = high[i] / x0[i];
874 ndPermuteRows(size, j, initialGuessCount, symmetricMatrixPSD, lowerTriangularMatrix, x0, b, low, high, permute);
875 ndCholeskyUpdate(size, j, initialGuessCount, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
879 if (initialGuessCount == size)
881 for (ndInt32 i = 0; i < size; ++i)
883 ndInt32 j = permute[i];
890 ndInt32 clampedIndex = size;
891 ndInt32 index = initialGuessCount;
892 ndInt32 count = size - initialGuessCount;
893 ndInt32 stride = index * size;
895 for (ndInt32 i = 0; i < size; ++i)
898 delta_x[i] = T(0.0f);
899 delta_r[i] = T(0.0f);
902 for (ndInt32 i = index; i < size; ++i)
904 r0[i] = ndDotProduct(size, &symmetricMatrixPSD[stride], x0) - b[i];
917 ndInt32 swapIndex = -1;
919 if (ndAbs(r0[index]) > T(1.0e-12f))
921 ndCalculateDelta_x(size, index, symmetricMatrixPSD, lowerTriangularMatrix, delta_x);
922 ndCalculateDelta_r(size, index, symmetricMatrixPSD, delta_x, delta_r);
924 ndAssert(delta_r[index] != T(0.0f));
925 ndAssert(ndAbs(delta_x[index]) == T(1.0f));
926 delta_r[index] = (delta_r[index] == T(0.0f)) ? T(1.0e-12f) : delta_r[index];
928 T scale = -r0[index] / delta_r[index];
929 ndAssert(ndAbs(scale) >= T(0.0f));
931 for (ndInt32 i = 0; i <= index; ++i)
933 T x1 = x0[i] + scale * delta_x[i];
938 scale = (high[i] - x0[i]) / delta_x[i];
940 else if (x1 < low[i])
944 scale = (low[i] - x0[i]) / delta_x[i];
947 ndAssert(ndAbs(scale) >= T(0.0f));
949 for (ndInt32 i = clampedIndex; (i < size) && (scale > T(1.0e-12f)); ++i)
951 T r1 = r0[i] + scale * delta_r[i];
952 if ((r1 * r0[i]) < T(0.0f))
954 ndAssert(ndAbs(delta_r[i]) > T(0.0f));
955 T s1 = -r0[i] / delta_r[i];
956 ndAssert(ndAbs(s1) >= T(0.0f));
957 ndAssert(ndAbs(s1) <= ndAbs(scale));
958 if (ndAbs(s1) < ndAbs(scale))
966 if (ndAbs(scale) > T(1.0e-12f))
968 for (ndInt32 i = 0; i < size; ++i)
970 x0[i] += scale * delta_x[i];
971 r0[i] += scale * delta_r[i];
979 delta_r[index] = T(0.0f);
984 else if (swapIndex == index)
989 ndPermuteRows(size, index, clampedIndex, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
990 ndCholeskyUpdate(size, index, clampedIndex, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
991 loop = count ? true :
false;
993 else if (swapIndex > index)
996 r0[swapIndex] = T(0.0f);
997 ndAssert(swapIndex < size);
998 ndAssert(clampedIndex <= size);
999 if (swapIndex < clampedIndex)
1003 ndPermuteRows(size, clampedIndex, swapIndex, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1004 ndCholeskyUpdate(size, swapIndex, clampedIndex, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
1005 ndAssert(clampedIndex >= index);
1010 ndAssert(clampedIndex < size);
1011 ndPermuteRows(size, clampedIndex, swapIndex, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1012 ndCholeskyUpdate(size, clampedIndex, swapIndex, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
1014 ndAssert(clampedIndex <= size);
1015 ndAssert(clampedIndex >= index);
1020 ndAssert(index > 0);
1021 x0[swapIndex] = clamp_x;
1022 delta_x[index] = T(0.0f);
1024 ndAssert(swapIndex < index);
1025 ndPermuteRows(size, swapIndex, index - 1, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1026 ndPermuteRows(size, index - 1, index, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1027 ndPermuteRows(size, clampedIndex - 1, index, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1028 ndCholeskyUpdate (size, swapIndex, clampedIndex - 1, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
1037 for (ndInt32 i = 0; i < size; ++i)
1039 ndInt32 j = permute[i];
1099 bool ndSolvePartitionDantzigLCP(ndInt32 size, T*
const symmetricMatrixPSD , T*
const x, T*
const b, T*
const low, T*
const high)
1101 ndInt16*
const permute = ndAlloca(ndInt16, size);
1103 for (ndInt32 i = 0; i < size; ++i)
1106 permute[i] = ndInt16(i);
1109 ndInt32 unboundedSize = size;
1110 for (ndInt32 i = 0; i < unboundedSize; ++i)
1112 if ((low[i] <= T(-D_LCP_MAX_VALUE)) && (high[i] >= T(D_LCP_MAX_VALUE)))
1114 ndCholeskyFactorizationAddRow(size, i, symmetricMatrixPSD );
1118 ndInt32 j = unboundedSize - 1;
1121 T*
const A = &symmetricMatrixPSD [size * i];
1122 T*
const B = &symmetricMatrixPSD [size * j];
1123 for (ndInt32 k = 0; k < size; ++k)
1129 for (ndInt32 k = 0; k < size; ++k)
1131 ndSwap(symmetricMatrixPSD [stride + i], symmetricMatrixPSD [stride + j]);
1136 ndSwap(low[i], low[j]);
1137 ndSwap(high[i], high[j]);
1138 ndSwap(permute[i], permute[j]);
1147 if (unboundedSize > 0)
1149 ndSolveCholesky(size, unboundedSize, symmetricMatrixPSD , x);
1150 ndInt32 base = unboundedSize * size;
1151 for (ndInt32 i = unboundedSize; i < size; ++i)
1153 b[i] -= ndDotProduct(unboundedSize, &symmetricMatrixPSD[base], x);
1157 const ndInt32 boundedSize = size - unboundedSize;
1158 T*
const l = ndAlloca(T, boundedSize);
1159 T*
const h = ndAlloca(T, boundedSize);
1160 T*
const c = ndAlloca(T, boundedSize);
1161 T*
const u = ndAlloca(T, boundedSize);
1162 T*
const a11 = ndAlloca(T, boundedSize * boundedSize);
1163 T*
const a10 = ndAlloca(T, boundedSize * unboundedSize);
1165 for (ndInt32 i = 0; i < boundedSize; ++i)
1167 T*
const g = &a10[i * unboundedSize];
1168 const T*
const row = &symmetricMatrixPSD [(unboundedSize + i) * size];
1169 for (ndInt32 j = 0; j < unboundedSize; ++j)
1173 ndSolveCholesky(size, unboundedSize, symmetricMatrixPSD, g);
1175 T*
const arow = &a11[i * boundedSize];
1176 const T*
const row2 = &symmetricMatrixPSD[(unboundedSize + i) * size];
1177 arow[i] = row2[unboundedSize + i] + ndDotProduct(unboundedSize, g, row2);
1178 for (ndInt32 j = i + 1; j < boundedSize; ++j)
1180 const T*
const row1 = &symmetricMatrixPSD [(unboundedSize + j) * size];
1181 T elem = row1[unboundedSize + i] + ndDotProduct(unboundedSize, g, row1);
1183 a11[j * boundedSize + i] = elem;
1186 c[i] = b[i + unboundedSize];
1187 l[i] = low[i + unboundedSize];
1188 h[i] = high[i + unboundedSize];
1191 if (ndSolveDantzigLCP(boundedSize, a11, u, c, l, h))
1193 for (ndInt32 i = 0; i < boundedSize; ++i)
1196 x[unboundedSize + i] = s;
1197 const T*
const g = &a10[i * unboundedSize];
1198 for (ndInt32 j = 0; j < unboundedSize; ++j)
1208 for (ndInt32 i = 0; i < size; ++i)
1212 ret = ndSolveDantzigLCP(size, symmetricMatrixPSD, x, b, low, high);
1215 for (ndInt32 i = 0; i < size; ++i)
1219 for (ndInt32 i = 0; i < size; ++i)
1221 ndInt32 j = permute[i];
1229 void ndSolveDantzigLCP(ndInt32 size, T*
const symmetricMatrixPSD, T*
const x, T*
const b, T*
const low, T*
const high)
1231 T tol2 = T(0.25f * 0.25f);
1232 ndInt32 passes = ndClamp(size, 12, 20);
1233 T*
const r = ndAlloca(T, size);
1234 ndInt16*
const clipped = ndAlloca(ndInt16, size);
1237 ndGaussSeidelLcpSor(size, symmetricMatrixPSD, x, b, low, high, tol2, passes, clipped, T(1.3f));
1241 ndInt32 clippeCount = 0;
1242 for (ndInt32 i = 0; i < size; ++i)
1244 const T*
const row = &symmetricMatrixPSD[stride];
1245 r[i] = b[i] - ndDotProduct(size, row, x);
1246 clippeCount += clipped[i];
1247 err2 += clipped[i] ? T(0.0f) : r[i] * r[i];
1254 if ((clippeCount < 16) && ((clippeCount < 32) && (err2 < T(16.0f))))
1257 T*
const x0 = ndAlloca(T, size);
1258 for (ndInt32 i = 0; i < size; ++i)
1263 ndSolveDantzigLcpLow(size, symmetricMatrixPSD, x0, r, low, high);
1264 for (ndInt32 i = 0; i < size; ++i)
1272 ndGaussSeidelLcpSor(size, symmetricMatrixPSD, x, b, low, high, tol2, 20, clipped, T(1.3f));