23 #ifndef __D_GENERAL_MATRIX_H__
24 #define __D_GENERAL_MATRIX_H__
26 #include "dCoreStdafx.h"
28 #include "dGeneralVector.h"
30 #define D_LCP_MAX_VALUE dFloat32 (1.0e10f)
41 void SetBuffers(T*
const r0, T*
const z0, T*
const p0, T*
const q0);
42 T Solve(dInt32 size, T tolerance, T*
const x,
const T*
const b);
45 virtual void MatrixTimeVector(T*
const out,
const T*
const v)
const = 0;
46 virtual void InversePrecoditionerTimeVector(T*
const out,
const T*
const v)
const = 0;
49 T SolveInternal(dInt32 size, T tolerance, T*
const x,
const T*
const b)
const;
63 SetBuffers(
nullptr,
nullptr,
nullptr,
nullptr);
69 SetBuffers(r0, z0, p0, q0);
91 return SolveInternal(size, tolerance, x, b);
95 T*
const r0 = dAlloca(T, size);
96 T*
const z0 = dAlloca(T, size);
97 T*
const p0 = dAlloca(T, size);
98 T*
const q0 = dAlloca(T, size);
99 SetBuffers(r0, z0, p0, q0);
100 T error = SolveInternal(size, tolerance, x, b);
101 SetBuffers(
nullptr,
nullptr,
nullptr,
nullptr);
109 MatrixTimeVector(m_z0, x);
110 dSub(size, m_r0, b, m_z0);
111 InversePrecoditionerTimeVector(m_p0, m_r0);
114 T num = dDotProduct(size, m_r0, m_p0);
116 for (dInt32 j = 0; (j < size) && (error2 > tolerance); j++)
118 MatrixTimeVector(m_z0, m_p0);
119 T den = dDotProduct(size, m_p0, m_z0);
121 dAssert(fabs(den) > T(0.0f));
124 dMulAdd(size, x, x, m_p0, alpha);
127 dMulAdd(size, m_r0, m_r0, m_z0, -alpha);
130 MatrixTimeVector(m_z0, x);
131 dSub(size, m_r0, b, m_z0);
134 InversePrecoditionerTimeVector(m_q0, m_r0);
136 T num1 = dDotProduct(size, m_r0, m_q0);
138 dMulAdd(size, m_p0, m_q0, m_p0, beta);
139 num = dDotProduct(size, m_r0, m_q0);
145 for (dInt32 i = 0; i < size; i++)
147 error2 = dMax(error2, m_r0[i] * m_r0[i]);
151 dAssert(iter <= size);
162 void dMatrixTimeVector(dInt32 size,
const T*
const matrix,
const T*
const v, T*
const out)
165 for (dInt32 i = 0; i < size; i++)
167 const T*
const row = &matrix[stride];
168 out[i] = dDotProduct(size, row, v);
174 void dMatrixTimeMatrix(dInt32 size,
const T*
const matrixA,
const T*
const matrixB, T*
const out)
176 for (dInt32 i = 0; i < size; i++)
178 const T*
const rowA = &matrixA[i * size];
179 T*
const rowOut = &out[i * size];
180 for (dInt32 j = 0; j < size; j++)
183 for (dInt32 k = 0; k < size; k++)
185 acc += rowA[k] * matrixB[k * size + j];
193 void dCovarianceMatrix(dInt32 size, T*
const matrix,
const T*
const vectorA,
const T*
const vectorB)
196 for (dInt32 i = 0; i < size; i++)
198 T*
const row = &matrix[stride];
199 T scale (vectorA[i]);
200 for (dInt32 j = 0; j < size; j++)
202 row[j] = scale * vectorA[j];
209 bool dCholeskyFactorizationAddRow(dInt32 size, dInt32 stride, dInt32 n, T*
const matrix, T*
const invDiagonalOut)
211 T*
const rowN = &matrix[stride * n];
214 for (dInt32 j = 0; j <= n; j++)
217 T*
const rowJ = &matrix[base];
218 for (dInt32 k = 0; k < j; k++)
220 s += rowN[k] * rowJ[k];
225 T diag = rowN[n] - s;
226 if (diag < T(1.0e-6f))
231 rowN[n] = T(sqrt(diag));
232 invDiagonalOut[n] = T(1.0f) / rowN[n];
238 rowN[j] = invDiagonalOut[j] * (rowN[j] - s);
248 bool dCholeskyFactorization(dInt32 size, dInt32 stride, T*
const psdMatrix)
251 T*
const invDiagonal = dAlloca(T, size);
252 for (dInt32 i = 0; (i < size) && state; i++)
254 state = state && dCholeskyFactorizationAddRow(size, stride, i, psdMatrix, invDiagonal);
260 bool dTestPSDmatrix(dInt32 size, dInt32 stride, T*
const matrix)
262 T*
const copy = dAlloca(T, size * size);
264 for (dInt32 i = 0; i < size; i++)
266 memcpy(©[i * size], &matrix[row], size *
sizeof (T));
269 return dCholeskyFactorization(size, size, copy);
273 void dCholeskyApplyRegularizer (dInt32 size, dInt32 stride, T*
const psdMatrix, T*
const regularizer)
275 bool isPsdMatrix =
false;
276 dFloat32*
const lowerTriangule = dAlloca(dFloat32, stride * stride);
279 memcpy(lowerTriangule, psdMatrix,
sizeof(dFloat32) * stride * stride);
280 isPsdMatrix = dCholeskyFactorization(size, stride, lowerTriangule);
283 for (dInt32 i = 0; i < size; i++)
285 regularizer[i] *= dFloat32(4.0f);
286 psdMatrix[i * stride + i] += regularizer[i];
289 }
while (!isPsdMatrix);
293 void dSolveCholesky(dInt32 size, dInt32 stride,
const T*
const choleskyMatrix, T*
const x,
const T*
const b)
296 for (dInt32 i = 0; i < size; i++)
299 const T*
const row = &choleskyMatrix[rowStart];
300 for (dInt32 j = 0; j < i; j++)
302 acc = acc + row[j] * x[j];
304 x[i] = (b[i] - acc) / row[i];
308 for (dInt32 i = size - 1; i >= 0; i--)
311 for (dInt32 j = i + 1; j < size; j++)
313 acc = acc + choleskyMatrix[stride * j + i] * x[j];
315 x[i] = (x[i] - acc) / choleskyMatrix[stride * i + i];
320 void dSolveCholesky(dInt32 size, T*
const choleskyMatrix, T*
const x)
322 dSolveCholesky(size, size, choleskyMatrix, x);
326 bool dSolveGaussian(dInt32 size, T*
const matrix, T*
const b)
328 for (dInt32 i = 0; i < size - 1; i++)
330 const T*
const rowI = &matrix[i * size];
332 T maxVal (dAbs(rowI[i]));
333 for (dInt32 j = i + 1; j < size - 1; j++)
335 T val (dAbs(matrix[size * j + i]));
343 if (maxVal < T(1.0e-12f))
350 T*
const rowK = &matrix[m * size];
351 T*
const rowJ = &matrix[i * size];
352 for (dInt32 j = 0; j < size; j++)
354 dSwap(rowK[j], rowJ[j]);
359 T den = T(1.0f) / rowI[i];
360 for (dInt32 k = i + 1; k < size; k++)
362 T*
const rowK = &matrix[size * k];
363 T factor(-rowK[i] * den);
364 for (dInt32 j = i + 1; j < size; j++)
366 rowK[j] += rowI[j] * factor;
369 b[k] += b[i] * factor;
373 for (dInt32 i = size - 1; i >= 0; i--)
376 T*
const rowI = &matrix[i * size];
377 for (dInt32 j = i + 1; j < size; j++)
379 acc = acc + rowI[j] * b[j];
381 b[i] = (b[i] - acc) / rowI[i];
387 void dEigenValues(
const dInt32 size,
const dInt32 stride,
const T*
const symmetricMatrix, T*
const eigenValues)
389 T*
const offDiag = dAlloca(T, size);
390 T*
const matrix = dAlloca(T, size * stride);
392 memcpy(matrix, symmetricMatrix,
sizeof(T) * size * stride);
393 for (dInt32 i = size - 1; i > 0; i--)
396 T*
const rowI = &matrix[i * stride];
401 for (dInt32 k = 0; k < i; k++)
403 scale += dAbs(rowI[k]);
406 if (scale == T(0.0f))
408 offDiag[i] = rowI[i - 1];
412 for (dInt32 k = 0; k < i; k++)
415 h += rowI[k] * rowI[k];
419 T g((f >= T(0.0f) ? -T(sqrt(h)) : T(sqrt(h))));
420 offDiag[i] = scale * g;
425 for (dInt32 j = 0; j < i; j++)
428 const T*
const rowJ = &matrix[j * stride];
429 for (dInt32 k = 0; k <= j; k++)
431 g += rowJ[k] * rowI[k];
433 for (dInt32 k = j + 1; k < i; k++)
435 g += matrix[k * stride + j] * rowI[k];
438 f += offDiag[j] * rowI[j];
442 for (dInt32 j = 0; j < i; j++)
445 T g1(offDiag[j] - hh * f1);
447 T*
const rowJ = &matrix[j * stride];
448 for (dInt32 k = 0; k <= j; k++)
450 rowJ[k] -= (f1 * offDiag[k] + g1 * rowI[k]);
457 offDiag[i] = rowI[i - 1];
462 dInt32 index = stride;
463 eigenValues[0] = matrix[0];
464 for (dInt32 i = 1; i < size; i++)
466 eigenValues[i] = matrix[index + i];
467 offDiag[i - 1] = offDiag[i];
471 for (dInt32 i = 0; i < size; i++)
477 for (j = i; j < size - 1; j++)
479 T dd(dAbs(eigenValues[j]) + dAbs(eigenValues[j + 1]));
480 if (dAbs(offDiag[j]) <= (T(1.e-6f) * dd))
495 T g((eigenValues[i + 1] - eigenValues[i]) / (T(2.0f) * offDiag[i]));
496 T r(dPythag(g, T(1.0f)));
497 g = eigenValues[j] - eigenValues[i] + offDiag[i] / (g + dSign(r, g));
503 for (k = j - 1; k >= i; k--)
511 eigenValues[k + 1] -= p;
512 offDiag[j] = T(0.0f);
517 g = eigenValues[k + 1] - p;
518 d = (eigenValues[k] - g) * s + T(2.0f) * c * b;
520 eigenValues[k + 1] = g + p;
524 if (r == T(0.0f) && k >= i)
530 offDiag[j] = T(0.0f);
537 T dConditionNumber(
const dInt32 size,
const dInt32 stride,
const T*
const choleskyMatrix)
539 T*
const eigenValues = dAlloca(T, size);
540 dEigenValues(size, stride, choleskyMatrix, eigenValues);
542 T minVal = T(1.0e20f);
543 T maxVal = T(-1.0e20f);
544 for (dInt32 i = 0; i < size; i++)
546 minVal = dMin(minVal, eigenValues[i]);
547 maxVal = dMax(maxVal, eigenValues[i]);
549 T condition = T(dAbs(maxVal) / dAbs(minVal));
571 void dGaussSeidelLcpSor(
const dInt32 size,
const T*
const matrix, T*
const x,
const T*
const b,
const T*
const low,
const T*
const high, T tol2, dInt32 maxIterCount, dInt16*
const clipped, T sor)
573 const T*
const me = matrix;
574 T*
const invDiag1 = dAlloca(T, size);
577 for (dInt32 i = 0; i < size; i++)
579 x[i] = dClamp(T(0.0f), low[i], high[i]);
580 invDiag1[i] = T(1.0f) / me[stride + i];
584 T tolerance(tol2 * 2.0f);
585 const T*
const invDiag = invDiag1;
589 for (dInt32 i = 0; (i < maxIterCount) && (tolerance > tol2); i++)
596 for (dInt32 j = 0; j < size; j++)
598 const T*
const row = &me[base];
599 T r(b[j] - dDotProduct(size, row, x));
600 T f((r + row[j] * x[j]) * invDiag[j]);
615 x[j] = x[j] + (f - x[j]) * sor;
623 void dGaussSeidelLcpSor(
const dInt32 size,
const dInt32 stride,
const T*
const matrix, T*
const x,
const T*
const b,
const dInt32*
const normalIndex,
const T*
const low,
const T*
const high, T tol2, dInt32 maxIterCount, T sor)
625 const T*
const me = matrix;
626 T*
const invDiag1 = dAlloca(T, size);
627 T*
const u = dAlloca(T, size + 1);
628 dInt32*
const index = dAlloca(dInt32, size);
632 for (dInt32 j = 0; j < size; j++)
635 index[j] = normalIndex[j] ? j + normalIndex[j] : size;
638 for (dInt32 j = 0; j < size; j++)
640 const T val = u[index[j]];
641 const T l = low[j] * val;
642 const T h = high[j] * val;
643 u[j] = dClamp(u[j], l, h);
644 invDiag1[j] = T(1.0f) / me[rowStart + j];
648 T tolerance(tol2 * 2.0f);
649 const T*
const invDiag = invDiag1;
650 const dInt32 maxCount = dMax(8, size);
651 for (dInt32 i = 0; (i < maxCount) && (tolerance > tol2); i++)
655 for (dInt32 j = 0; j < size; j++)
657 const T*
const row = &me[base];
658 T r(b[j] - dDotProduct(size, row, u));
659 T f((r + row[j] * u[j]) * invDiag[j]);
661 const T val = u[index[j]];
662 const T l = low[j] * val;
663 const T h = high[j] * val;
684 for (dInt32 i = 0; (i < maxIterCount) && (tolerance > tol2); i++)
691 for (dInt32 j = 0; j < size; j++)
693 const T*
const row = &me[base];
694 T r(b[j] - dDotProduct(size, row, u));
695 T f((r + row[j] * u[j]) * invDiag[j]);
696 f = u[j] + (f - u[j]) * sor;
698 const T val = u[index[j]];
699 const T l = low[j] * val;
700 const T h = high[j] * val;
718 for (dInt32 j = 0; j < size; j++)
741 void dGaussSeidelLCP(
const dInt32 size,
const T*
const matrix, T*
const x,
const T*
const b,
const T*
const low,
const T*
const high, T sor = T(1.2f))
743 dInt16*
const clipped = dAlloca(dInt16, size);
744 dGaussSeidelLcpSor(size, matrix, x, b, low, high, T(1.0e-3f), size * size, clipped, sor);
748 void dPermuteRows(dInt32 size, dInt32 i, dInt32 j, T*
const matrix, T*
const choleskyMatrix, T*
const x, T*
const r, T*
const low, T*
const high, dInt16*
const permute)
752 T*
const A = &matrix[size * i];
753 T*
const B = &matrix[size * j];
754 T*
const invA = &choleskyMatrix[size * i];
755 T*
const invB = &choleskyMatrix[size * j];
756 for (dInt32 k = 0; k < size; k++)
759 dSwap(invA[k], invB[k]);
763 for (dInt32 k = 0; k < size; k++)
765 dSwap(matrix[stride + i], matrix[stride + j]);
771 dSwap(low[i], low[j]);
772 dSwap(high[i], high[j]);
773 dSwap(permute[i], permute[j]);
778 void dCalculateDelta_x(dInt32 size, dInt32 n,
const T*
const matrix,
const T*
const choleskyMatrix, T*
const delta_x)
780 const T*
const row = &matrix[size * n];
781 for (dInt32 i = 0; i < n; i++)
783 delta_x[i] = -row[i];
785 dSolveCholesky(size, n, choleskyMatrix, delta_x, delta_x);
786 delta_x[n] = T(1.0f);
791 void dCalculateDelta_r(dInt32 size, dInt32 n,
const T*
const matrix,
const T*
const delta_x, T*
const delta_r)
793 dInt32 stride = n * size;
794 const dInt32 size1 = n + 1;
795 for (dInt32 i = n; i < size; i++)
797 delta_r[i] = dDotProduct(size1, &matrix[stride], delta_x);
803 void dHouseholderReflection(dInt32 size, dInt32 row, dInt32 colum, T*
const choleskyMatrix, T*
const tmp, T*
const reflection)
805 dAssert(row <= colum);
808 for (dInt32 i = row; i <= colum; i++)
810 T*
const rowI = &choleskyMatrix[size * i];
812 for (dInt32 j = i + 1; j <= colum; j++)
814 mag2 += rowI[j] * rowI[j];
815 reflection[j] = rowI[j];
817 if (mag2 > T(1.0e-14f))
819 reflection[i] = rowI[i] + dSign(rowI[i]) * T(sqrt(mag2 + rowI[i] * rowI[i]));
821 const T vMag2(mag2 + reflection[i] * reflection[i]);
822 const T den = T(2.0f) / vMag2;
823 for (dInt32 j = i; j < size; j++)
826 T*
const rowJ = &choleskyMatrix[size * j];
827 for (dInt32 k = i; k <= colum; k++) {
828 acc += rowJ[k] * reflection[k];
833 for (dInt32 j = i + 1; j < size; j++)
836 T*
const rowJ = &choleskyMatrix[size * j];
837 const T a = tmp[j] * den;
838 for (dInt32 k = i; k <= colum; k++)
840 rowJ[k] -= a * reflection[k];
843 rowI[i] -= tmp[i] * reflection[i] * den;
846 if (rowI[i] < T(0.0f))
848 for (dInt32 k = i; k < size; k++)
850 choleskyMatrix[size * k + i] = -choleskyMatrix[size * k + i];
855 for (dInt32 i = row; i < size; i++)
857 choleskyMatrix[size * i + i] = dMax(choleskyMatrix[size * i + i], T(1.0e-6f));
863 void dCholeskyUpdate(dInt32 size, dInt32 row, dInt32 colum, T*
const choleskyMatrix, T*
const tmp, T*
const reflexion,
const T*
const psdMatrix)
865 const dInt32 n0 = colum - row;
866 const dInt32 n1 = n0 + 1;
867 const dInt32 choleskyCost = size * size * size / 3;
868 const dInt32 householdCost = n0 * (n0 + 1) / 2 + n1 * (n1 + 1) * (2 * (2 * n1 + 1) - 3 + 3 * (size - colum - 1)) / 6 - 1;
870 if (householdCost < choleskyCost)
872 dHouseholderReflection(size, row, colum, choleskyMatrix, tmp, reflexion);
876 memcpy (choleskyMatrix, psdMatrix,
sizeof (T) * size * size);
877 dCholeskyFactorization(size, choleskyMatrix);
882 T*
const psdMatrixCopy = dAlloca(T, size * size);
883 memcpy(psdMatrixCopy, psdMatrix,
sizeof(T) * size * size);
884 dCholeskyFactorization(size, psdMatrixCopy);
886 for (dInt32 i = 0; i < size; i++)
888 for (dInt32 j = 0; j < size; j++)
890 T err = psdMatrixCopy[i*size + j] - choleskyMatrix[i*size + j];
891 dAssert(dAbs(err) < T(1.0e-4f));
914 void dSolveDantzigLcpLow(dInt32 size, T*
const symmetricMatrixPSD, T*
const x, T*
const b, T*
const low, T*
const high)
916 T*
const x0 = dAlloca(T, size);
917 T*
const r0 = dAlloca(T, size);
918 T*
const tmp0 = dAlloca(T, size);
919 T*
const tmp1 = dAlloca(T, size);
920 T*
const delta_r = dAlloca(T, size);
921 T*
const delta_x = dAlloca(T, size);
922 T*
const lowerTriangularMatrix = dAlloca(T, size * size);
923 dInt16*
const permute = dAlloca(dInt16, size);
925 for (dInt32 i = 0; i < size; i++)
927 permute[i] = dInt16(i);
929 x[i] = dMax (b[i] * b[i], T (1.0f));
932 for (dInt32 n = size - 1, i = size - 1; i >= 0; i--)
936 dPermuteRows(size, n, i, symmetricMatrixPSD, lowerTriangularMatrix, x, b, low, high, permute);
941 for (dInt32 i = size - 1; (i >= 0) && (x[i] > T(1.0f)) ; i--)
944 for (dInt32 j = i - 1; (j >= 0) && (x[j] > T(1.0f)); j--)
953 dPermuteRows(size, i, min, symmetricMatrixPSD, lowerTriangularMatrix, x, b, low, high, permute);
957 dInt32 initialGuessCount = size;
958 while (x[initialGuessCount - 1] >= T(16.0f))
960 initialGuessCount --;
963 memcpy(lowerTriangularMatrix, symmetricMatrixPSD,
sizeof(T) * size * size);
965 bool valid = dCholeskyFactorization(size, lowerTriangularMatrix);
968 dCholeskyFactorization(size, lowerTriangularMatrix);
970 for (dInt32 j = 0; (j != -1) && initialGuessCount;)
972 dSolveCholesky(size, initialGuessCount, lowerTriangularMatrix, x0, b);
977 for (dInt32 i = initialGuessCount - 1; i >= 0; i--)
979 T x1 = alpha * x0[i];
984 alpha = low[i] / x0[i];
986 else if (x1 > high[i])
990 alpha = high[i] / x0[i];
998 dPermuteRows(size, j, initialGuessCount, symmetricMatrixPSD, lowerTriangularMatrix, x0, b, low, high, permute);
999 dCholeskyUpdate(size, j, initialGuessCount, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
1003 if (initialGuessCount == size)
1005 for (dInt32 i = 0; i < size; i++)
1007 dInt32 j = permute[i];
1014 dInt32 clampedIndex = size;
1015 dInt32 index = initialGuessCount;
1016 dInt32 count = size - initialGuessCount;
1017 dInt32 stride = index * size;
1019 for (dInt32 i = 0; i < size; i++)
1022 delta_x[i] = T(0.0f);
1023 delta_r[i] = T(0.0f);
1026 for (dInt32 i = index; i < size; i++)
1028 r0[i] = dDotProduct(size, &symmetricMatrixPSD[stride], x0) - b[i];
1041 dInt32 swapIndex = -1;
1043 if (dAbs(r0[index]) > T(1.0e-12f))
1045 dCalculateDelta_x(size, index, symmetricMatrixPSD, lowerTriangularMatrix, delta_x);
1046 dCalculateDelta_r(size, index, symmetricMatrixPSD, delta_x, delta_r);
1048 dAssert(delta_r[index] != T(0.0f));
1049 dAssert(dAbs(delta_x[index]) == T(1.0f));
1050 delta_r[index] = (delta_r[index] == T(0.0f)) ? T(1.0e-12f) : delta_r[index];
1052 T scale = -r0[index] / delta_r[index];
1053 dAssert(dAbs(scale) >= T(0.0f));
1055 for (dInt32 i = 0; i <= index; i++)
1057 T x1 = x0[i] + scale * delta_x[i];
1062 scale = (high[i] - x0[i]) / delta_x[i];
1064 else if (x1 < low[i])
1068 scale = (low[i] - x0[i]) / delta_x[i];
1071 dAssert(dAbs(scale) >= T(0.0f));
1073 for (dInt32 i = clampedIndex; (i < size) && (scale > T(1.0e-12f)); i++)
1075 T r1 = r0[i] + scale * delta_r[i];
1076 if ((r1 * r0[i]) < T(0.0f))
1078 dAssert(dAbs(delta_r[i]) > T(0.0f));
1079 T s1 = -r0[i] / delta_r[i];
1080 dAssert(dAbs(s1) >= T(0.0f));
1081 dAssert(dAbs(s1) <= dAbs(scale));
1082 if (dAbs(s1) < dAbs(scale))
1090 if (dAbs(scale) > T(1.0e-12f))
1092 for (dInt32 i = 0; i < size; i++)
1094 x0[i] += scale * delta_x[i];
1095 r0[i] += scale * delta_r[i];
1100 if (swapIndex == -1)
1102 r0[index] = T(0.0f);
1103 delta_r[index] = T(0.0f);
1108 else if (swapIndex == index)
1112 x0[index] = clamp_x;
1113 dPermuteRows(size, index, clampedIndex, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1114 dCholeskyUpdate(size, index, clampedIndex, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
1115 loop = count ? true :
false;
1117 else if (swapIndex > index)
1120 r0[swapIndex] = T(0.0f);
1121 dAssert(swapIndex < size);
1122 dAssert(clampedIndex <= size);
1123 if (swapIndex < clampedIndex)
1127 dPermuteRows(size, clampedIndex, swapIndex, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1128 dCholeskyUpdate(size, swapIndex, clampedIndex, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
1129 dAssert(clampedIndex >= index);
1134 dAssert(clampedIndex < size);
1135 dPermuteRows(size, clampedIndex, swapIndex, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1136 dCholeskyUpdate(size, clampedIndex, swapIndex, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
1138 dAssert(clampedIndex <= size);
1139 dAssert(clampedIndex >= index);
1145 x0[swapIndex] = clamp_x;
1146 delta_x[index] = T(0.0f);
1148 dAssert(swapIndex < index);
1149 dPermuteRows(size, swapIndex, index - 1, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1150 dPermuteRows(size, index - 1, index, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1151 dPermuteRows(size, clampedIndex - 1, index, symmetricMatrixPSD, lowerTriangularMatrix, x0, r0, low, high, permute);
1152 dCholeskyUpdate (size, swapIndex, clampedIndex - 1, lowerTriangularMatrix, tmp0, tmp1, symmetricMatrixPSD);
1161 for (dInt32 i = 0; i < size; i++)
1163 dInt32 j = permute[i];
1223 bool dSolvePartitionDantzigLCP(dInt32 size, T*
const symmetricMatrixPSD , T*
const x, T*
const b, T*
const low, T*
const high)
1225 dInt16*
const permute = dAlloca(dInt16, size);
1227 for (dInt32 i = 0; i < size; i++)
1230 permute[i] = dInt16(i);
1233 dInt32 unboundedSize = size;
1234 for (dInt32 i = 0; i < unboundedSize; i++)
1236 if ((low[i] <= T(-D_LCP_MAX_VALUE)) && (high[i] >= T(D_LCP_MAX_VALUE)))
1238 dCholeskyFactorizationAddRow(size, i, symmetricMatrixPSD );
1242 dInt32 j = unboundedSize - 1;
1245 T*
const A = &symmetricMatrixPSD [size * i];
1246 T*
const B = &symmetricMatrixPSD [size * j];
1247 for (dInt32 k = 0; k < size; k++)
1253 for (dInt32 k = 0; k < size; k++)
1255 dSwap(symmetricMatrixPSD [stride + i], symmetricMatrixPSD [stride + j]);
1260 dSwap(low[i], low[j]);
1261 dSwap(high[i], high[j]);
1262 dSwap(permute[i], permute[j]);
1271 if (unboundedSize > 0)
1273 dSolveCholesky(size, unboundedSize, symmetricMatrixPSD , x);
1274 dInt32 base = unboundedSize * size;
1275 for (dInt32 i = unboundedSize; i < size; i++)
1277 b[i] -= dDotProduct(unboundedSize, &symmetricMatrixPSD[base], x);
1281 const dInt32 boundedSize = size - unboundedSize;
1282 T*
const l = dAlloca(T, boundedSize);
1283 T*
const h = dAlloca(T, boundedSize);
1284 T*
const c = dAlloca(T, boundedSize);
1285 T*
const u = dAlloca(T, boundedSize);
1286 T*
const a11 = dAlloca(T, boundedSize * boundedSize);
1287 T*
const a10 = dAlloca(T, boundedSize * unboundedSize);
1289 for (dInt32 i = 0; i < boundedSize; i++)
1291 T*
const g = &a10[i * unboundedSize];
1292 const T*
const row = &symmetricMatrixPSD [(unboundedSize + i) * size];
1293 for (dInt32 j = 0; j < unboundedSize; j++)
1297 dSolveCholesky(size, unboundedSize, symmetricMatrixPSD, g);
1299 T*
const arow = &a11[i * boundedSize];
1300 const T*
const row2 = &symmetricMatrixPSD[(unboundedSize + i) * size];
1301 arow[i] = row2[unboundedSize + i] + dDotProduct(unboundedSize, g, row2);
1302 for (dInt32 j = i + 1; j < boundedSize; j++)
1304 const T*
const row1 = &symmetricMatrixPSD [(unboundedSize + j) * size];
1305 T elem = row1[unboundedSize + i] + dDotProduct(unboundedSize, g, row1);
1307 a11[j * boundedSize + i] = elem;
1310 c[i] = b[i + unboundedSize];
1311 l[i] = low[i + unboundedSize];
1312 h[i] = high[i + unboundedSize];
1315 if (dSolveDantzigLCP(boundedSize, a11, u, c, l, h))
1317 for (dInt32 i = 0; i < boundedSize; i++)
1320 x[unboundedSize + i] = s;
1321 const T*
const g = &a10[i * unboundedSize];
1322 for (dInt32 j = 0; j < unboundedSize; j++)
1332 for (dInt32 i = 0; i < size; i++)
1336 ret = dSolveDantzigLCP(size, symmetricMatrixPSD, x, b, low, high);
1339 for (dInt32 i = 0; i < size; i++)
1343 for (dInt32 i = 0; i < size; i++)
1345 dInt32 j = permute[i];
1353 void dSolveDantzigLCP(dInt32 size, T*
const symmetricMatrixPSD, T*
const x, T*
const b, T*
const low, T*
const high)
1355 T tol2 = T(0.25f * 0.25f);
1356 dInt32 passes = dClamp(size, 12, 20);
1357 T*
const r = dAlloca(T, size);
1358 dInt16*
const clipped = dAlloca(dInt16, size);
1361 dGaussSeidelLcpSor(size, symmetricMatrixPSD, x, b, low, high, tol2, passes, clipped, T(1.3f));
1365 dInt32 clippeCount = 0;
1366 for (dInt32 i = 0; i < size; i++)
1368 const T*
const row = &symmetricMatrixPSD[stride];
1369 r[i] = b[i] - dDotProduct(size, row, x);
1370 clippeCount += clipped[i];
1371 err2 += clipped[i] ? T(0.0f) : r[i] * r[i];
1378 if ((clippeCount < 16) && ((clippeCount < 32) && (err2 < T(16.0f))))
1381 T*
const x0 = dAlloca(T, size);
1382 for (dInt32 i = 0; i < size; i++)
1387 dSolveDantzigLcpLow(size, symmetricMatrixPSD, x0, r, low, high);
1388 for (dInt32 i = 0; i < size; i++)
1396 dGaussSeidelLcpSor(size, symmetricMatrixPSD, x, b, low, high, tol2, 20, clipped, T(1.3f));