22 #ifndef __D_VECTOR_ARM_NEON_H__
23 #define __D_VECTOR_ARM_NEON_H__
31 #ifdef D_NEWTON_USE_DOUBLE
32 #define dVector dBigVector
36 #if DG_ARCH >= DG_ARCH_NEON_64
40 #define vec_minv vminvq_u32
41 #define vec_maxv maxvq_u32
42 #define vec_hadd4 vaddvq_f32
43 #define vec_floor vrndq_f32
48 #define D_INLINE_FORCEINLINE(type) static inline type
50 D_INLINE_FORCEINLINE(dInt32) vec_minv(uint32x4_t v)
52 uint32x2_t tmp = vpmin_u32(vget_low_u32(v), vget_high_u32(v));
53 tmp = vpmin_u32(tmp, tmp);
57 D_INLINE_FORCEINLINE(dInt32) vec_maxv(uint32x4_t v)
59 uint32x2_t tmp = vpmax_u32(vget_low_u32(v), vget_high_u32(v));
60 tmp = vpmax_u32(tmp, tmp);
65 D_INLINE_FORCEINLINE(
float)vec_hadd4(float32x4_t v)
67 float32x4_t tmp = vaddq_f32(v, vrev64q_f32(v));
68 tmp = vaddq_f32(tmp, vcombine_f32(vget_high_f32(tmp), vget_low_f32(tmp)));
77 D_INLINE_FORCEINLINE(
float) vec_hadd3(float32x4_t v)
79 float32x2_t temp = vpadd_f32(vget_low_f32(v), vget_low_f32(v));
80 temp = vadd_f32(temp, vget_high_f32(v));
81 return vget_lane_f32(temp, 0);
85 #define vec_mul vmulq_f32
86 #define vec_add vaddq_f32
87 #define vec_sub vsubq_f32
88 #define vec_max vmaxq_f32
89 #define vec_min vminq_f32
90 #define vec_splat vdupq_n_f32
91 #define vec_div vdivq_f32
92 #define vec_rcp vrecpeq_f32
93 #define vec_store vst1q_f32
94 #define vec_load vld1q_f32
95 #define vec_abs vabsq_f32
96 #define vec_cvt vcvtq_s32_f32
97 #define vec_sqrt vrsqrtsq_f32
98 #define vec_recp vrecpsq_f32
99 #define vec_rsqrt rsqrteq_f32
100 #define vec_cmpne vceqq_f32
101 #define vec_cmpgt vcgtq_f32
102 #define vec_cmpge vcgeq_f32
103 #define vec_cmpeq vceqq_f32
104 #define vec_cmplt vcltq_f32
105 #define vec_cmple vcleq_f32
106 #define vec_xor veorq_u32
107 #define vec_or vorrq_u32
108 #define vec_and vandq_u32
109 #define vec_not vmvnq_u32
110 #define vec_andnot vbicq_u32
112 #if defined __ARM_FEATURE_FMA
114 #define vec_madd vfmaq_f32
115 #define vec_msub vmlsq_f32
117 #define vec_madd vmlaq_f32
118 #define vec_msub vmlsq_f32
121 static inline float32x4_t vec_set(
const float w,
const float z,
const float y,
const float x)
123 float ptr[] = { x, y, z, w };
124 return vec_load(ptr);
129 DG_MSC_VECTOR_ALIGMENT
136 D_INLINE
dVector(
const float32x4_t type)
140 D_INLINE
dVector(
const uint32x4_t type)
146 : m_type(vmovq_n_f32(val))
156 D_INLINE
dVector (
const dFloat32*
const ptr)
157 :m_x(ptr[0]), m_y(ptr[1]), m_z(ptr[2]), m_w (dFloat32 (0.0f))
159 dAssert (dgCheckVector ((*
this)));
162 #ifndef D_NEWTON_USE_DOUBLE
163 D_INLINE
dVector(
const dFloat64*
const ptr)
164 :m_x(dFloat32(ptr[0]))
165 ,m_y(dFloat32(ptr[1]))
166 ,m_z(dFloat32(ptr[2]))
173 D_INLINE
dVector (dFloat32 x, dFloat32 y, dFloat32 z, dFloat32 w)
174 :m_x(x), m_y(y), m_z(z), m_w(w)
176 dAssert (dgCheckVector ((*
this)));
179 D_INLINE
dVector (dInt32 ix, dInt32 iy, dInt32 iz, dInt32 iw)
180 :m_x(*((dFloat32*)&ix)), m_y(*((dFloat32*)&iy)), m_z(*((dFloat32*)&iz)), m_w(*((dFloat32*)&iw))
184 #ifndef D_NEWTON_USE_DOUBLE
186 :m_x(dFloat32 (((dFloat64*)©)[0]))
187 ,m_y(dFloat32 (((dFloat64*)©)[1]))
188 ,m_z(dFloat32 (((dFloat64*)©)[2]))
189 ,m_w(dFloat32 (((dFloat64*)©)[3]))
191 dAssert (dgCheckVector ((*
this)));
195 D_INLINE dFloat32 GetScalar ()
const
200 D_INLINE
void Store (dFloat32*
const dst)
const
202 vec_store(dst, m_type);
205 D_INLINE
dVector BroadcastX ()
const
210 D_INLINE
dVector BroadcastY ()
const
215 D_INLINE
dVector BroadcastZ ()
const
220 D_INLINE
dVector BroadcastW ()
const
226 D_INLINE dFloat32& operator[] (dInt32 i)
233 D_INLINE
const dFloat32& operator[] (dInt32 i)
const
242 return vec_add(m_type, A.m_type);
247 return vec_sub(m_type, A.m_type);
252 return vec_mul(m_type, A.m_type);
257 m_type = vec_add(m_type, A.m_type);
263 m_type = vec_sub(m_type, A.m_type);
269 m_type = vec_mul(m_type, A.m_type);
275 D_INLINE
dVector AddHorizontal ()
const
277 return vec_hadd3(m_type);
280 D_INLINE
dVector Scale3 (dFloat32 scale)
const
282 return dVector (m_x * scale, m_y * scale, m_z * scale, m_w);
285 D_INLINE
dVector Scale (dFloat32 scale)
const
287 return vec_mul(vmovq_n_f32(scale), m_type);
293 return dVector (m_x * A.m_x, m_y * A.m_y, m_z * A.m_z, A.m_w);
306 return dVector (m_y * B.m_z - m_z * B.m_y,
307 m_z * B.m_x - m_x * B.m_z,
308 m_x * B.m_y - m_y * B.m_x, m_w);
313 dFloat32 cofactor[3][3];
314 dFloat32 array[4][4];
317 for (dInt32 i = 0; i < 4; i ++) {
321 array[3][i] = dFloat32 (1.0f);
325 dFloat32 sign = dFloat32 (-1.0f);
326 for (dInt32 i = 0; i < 4; i ++)
328 for (dInt32 j = 0; j < 3; j ++)
331 for (dInt32 k = 0; k < 4; k ++)
335 cofactor[j][k0] = array[j][k];
340 dFloat32 x = cofactor[0][0] * (cofactor[1][1] * cofactor[2][2] - cofactor[1][2] * cofactor[2][1]);
341 dFloat32 y = cofactor[0][1] * (cofactor[1][2] * cofactor[2][0] - cofactor[1][0] * cofactor[2][2]);
342 dFloat32 z = cofactor[0][2] * (cofactor[1][0] * cofactor[2][1] - cofactor[1][1] * cofactor[2][0]);
343 dFloat32 det = x + y + z;
345 normal[i] = sign * det;
346 sign *= dFloat32 (-1.0f);
352 D_INLINE
dVector GetInt ()
const
354 return vcvtq_u32_f32(m_type);
357 D_INLINE
dVector GetFloat ()
const
359 return vcvtq_f32_u32(m_type);
362 D_INLINE
dVector TestZero()
const
364 return m_negOne & (*
this == m_zero);
367 D_INLINE
dVector Floor ()
const
369 #if DG_ARCH >= DG_ARCH_NEON_64
371 return vec_floor(m_type);
373 return dVector (dgFloor (m_x), dgFloor (m_y), dgFloor (m_z), dgFloor (m_w));
380 auto tmp = vec_mul(m_type, A.m_type);
381 return vec_hadd4(tmp);
386 D_INLINE
dVector InvMagSqrt()
const
388 return dVector(dgRsqrt(DotProduct(*this).m_x));
391 D_INLINE
dVector Reciproc()
const
393 float32x4_t reciprocal = vrecpeq_f32(m_type);
394 reciprocal = vrecpsq_f32(m_type, reciprocal) * reciprocal;
403 return vec_madd(A.m_type, B.m_type, m_type);
409 return vec_msub(A.m_type, B.m_type, m_type);
412 D_INLINE
dVector InvSqrt()
const
414 float32x4_t sqrt_reciprocal = vrsqrteq_f32(m_type);
415 return vrsqrtsq_f32(m_type * sqrt_reciprocal, sqrt_reciprocal) * sqrt_reciprocal;
420 float32x4_t sqrt_reciprocal = vrsqrteq_f32(m_type);
421 float32x4_t tmp = vrsqrtsq_f32(m_type * sqrt_reciprocal, sqrt_reciprocal) * sqrt_reciprocal;
422 return vec_mul(m_type, tmp);
425 D_INLINE
dVector Normalize ()
const
427 dAssert (m_w == dFloat32 (0.0f));
429 return me * InvMagSqrt();
434 return vec_abs(m_type);
437 dFloat32 GetMax ()
const
439 return dMax(dMax(m_x, m_y), dMax(m_z, m_w));
444 return vec_max(m_type, data.m_type);
449 return vec_min(m_type, data.m_type);
455 return vec_cmpeq(m_typeInt, data.m_typeInt);
460 return vec_cmpne(m_typeInt, data.m_typeInt);
465 return vec_cmpgt(m_typeInt, data.m_typeInt);
470 return vec_cmplt(m_typeInt, data.m_typeInt);
475 return vec_cmpge(m_typeInt, data.m_typeInt);
480 return vec_cmple(m_typeInt, data.m_typeInt);
486 return vec_and(m_typeInt, data.m_typeInt);
491 return vec_or(m_typeInt, data.m_typeInt);
496 return vec_xor(m_typeInt, data.m_typeInt);
501 return vec_andnot(m_typeInt, data.m_typeInt);
504 D_INLINE dInt32 GetSignMask()
const
506 const dInt32*
const a = (dInt32*)&m_x;
507 return (((a[0] & 0x80000000) ? 1 : 0) | ((a[1] & 0x80000000) ? 2 : 0) | ((a[2] & 0x80000000) ? 4 : 0) | ((a[3] & 0x80000000) ? 8 : 0));
510 D_INLINE
dVector ShiftTripleRight ()
const
512 return dVector (m_z, m_x, m_y, m_w);
515 D_INLINE
dVector ShiftTripleLeft ()
const
517 return dVector (m_y, m_z, m_x, m_w);
520 D_INLINE
dVector ShiftRightLogical (dInt32 bits)
const
522 return dVector (dInt32 (dUnsigned32 (m_ix) >> bits), dInt32 (dUnsigned32 (m_iy) >> bits), dInt32 (dUnsigned32 (m_iz) >> bits), dInt32 (dUnsigned32 (m_iw) >> bits));
527 float32x4x2_t vtrn1 = vzipq_f32(src0.m_type, src2.m_type);
528 float32x4x2_t vtrn2 = vzipq_f32(src1.m_type, src3.m_type);
529 float32x4x2_t res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
530 float32x4x2_t res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
531 dst0.m_type = res1.val[0];
532 dst1.m_type = res1.val[1];
533 dst2.m_type = res2.val[0];
534 dst3.m_type = res2.val[1];
538 DG_CLASS_ALLOCATOR(allocator)
542 uint32x4_t m_typeInt;
571 } DG_GCC_VECTOR_ALIGMENT;
575 DG_MSC_VECTOR_ALIGMENT
584 :m_x(val), m_y(val), m_z(val), m_w(val)
589 : m_x(v.m_x), m_y(v.m_y), m_z(v.m_z), m_w(v.m_w)
593 #ifndef D_NEWTON_USE_DOUBLE
595 : m_x(v.m_x), m_y(v.m_y), m_z(v.m_z), m_w(v.m_w)
599 D_INLINE
dBigVector(
const dFloat32*
const ptr)
600 : m_x(ptr[0]), m_y(ptr[1]), m_z(ptr[2]), m_w(dFloat32(0.0f))
602 dAssert(dgCheckVector((*
this)));
606 D_INLINE
dBigVector(
const dFloat64*
const ptr)
607 :m_x(ptr[0]), m_y(ptr[1]), m_z(ptr[2]), m_w(dFloat32(0.0f))
609 dAssert(dgCheckVector((*
this)));
612 D_INLINE
dBigVector(dFloat64 x, dFloat64 y, dFloat64 z, dFloat64 w)
613 : m_x(x), m_y(y), m_z(z), m_w(w)
615 dAssert(dgCheckVector((*
this)));
618 D_INLINE
dBigVector(dInt32 ix, dInt32 iy, dInt32 iz, dInt32 iw)
619 : m_ix(ix), m_iy(iy), m_iz(iz), m_iw(iw)
623 D_INLINE
dBigVector(dInt64 ix, dInt64 iy, dInt64 iz, dInt64 iw)
624 : m_ix(ix), m_iy(iy), m_iz(iz), m_iw(iw)
628 D_INLINE dFloat64 GetScalar()
const
633 D_INLINE
void Store(dFloat64*
const dst)
const
662 D_INLINE dFloat64& operator[] (dInt32 i)
669 D_INLINE
const dFloat64& operator[] (dInt32 i)
const
678 return dBigVector(m_x + A.m_x, m_y + A.m_y, m_z + A.m_z, m_w + A.m_w);
683 return dBigVector(m_x - A.m_x, m_y - A.m_y, m_z - A.m_z, m_w - A.m_w);
688 return dBigVector(m_x * A.m_x, m_y * A.m_y, m_z * A.m_z, m_w * A.m_w);
693 return (*
this =
dBigVector(m_x + A.m_x, m_y + A.m_y, m_z + A.m_z, m_w + A.m_w));
698 return (*
this =
dBigVector(m_x - A.m_x, m_y - A.m_y, m_z - A.m_z, m_w - A.m_w));
703 return (*
this =
dBigVector(m_x * A.m_x, m_y * A.m_y, m_z * A.m_z, m_w * A.m_w));
711 D_INLINE
dBigVector Scale3(dFloat64 scale)
const
713 return dBigVector(m_x * scale, m_y * scale, m_z * scale, m_w);
716 D_INLINE
dBigVector Scale(dFloat64 scale)
const
718 return dBigVector(m_x * scale, m_y * scale, m_z * scale, m_w * scale);
724 return dBigVector(m_x * A.m_x, m_y * A.m_y, m_z * A.m_z, A.m_w);
730 return dBigVector(m_y * B.m_z - m_z * B.m_y, m_z * B.m_x - m_x * B.m_z, m_x * B.m_y - m_y * B.m_x, m_w);
735 dFloat64 cofactor[3][3];
736 dFloat64 array[4][4];
739 for (dInt32 i = 0; i < 4; i++) {
743 array[3][i] = dFloat32(1.0f);
747 dFloat64 sign = dFloat64(-1.0f);
748 for (dInt32 i = 0; i < 4; i++) {
750 for (dInt32 j = 0; j < 3; j++) {
752 for (dInt32 k = 0; k < 4; k++) {
754 cofactor[j][k0] = array[j][k];
759 dFloat64 x = cofactor[0][0] * (cofactor[1][1] * cofactor[2][2] - cofactor[1][2] * cofactor[2][1]);
760 dFloat64 y = cofactor[0][1] * (cofactor[1][2] * cofactor[2][0] - cofactor[1][0] * cofactor[2][2]);
761 dFloat64 z = cofactor[0][2] * (cofactor[1][0] * cofactor[2][1] - cofactor[1][1] * cofactor[2][0]);
762 dFloat64 det = x + y + z;
764 normal[i] = sign * det;
765 sign *= dFloat64(-1.0f);
773 return dBigVector(dInt64(floor(m_x)), dInt64(floor(m_y)), dInt64(floor(m_z)), dInt64(floor(m_w)));
778 const dInt64*
const a = (dInt64*)&m_x;
779 return dBigVector((a[0] == 0) ? dFloat64(-1.0f) : dFloat64(1.0f),
780 (a[1] == 0) ? dFloat64(-1.0f) : dFloat64(1.0f),
781 (a[2] == 0) ? dFloat64(-1.0f) : dFloat64(1.0f),
782 (a[3] == 0) ? dFloat64(-1.0f) : dFloat64(1.0f));
788 return dBigVector(floor(m_x), floor(m_y), floor(m_z), floor(m_w));
793 return dBigVector(m_x * A.m_x + m_y * A.m_y + m_z * A.m_z + m_w * A.m_w);
798 return dBigVector(dFloat64(1.0f) / m_x, dFloat64(1.0f) / m_y, dFloat64(1.0f) / m_z, dFloat64(1.0f) / m_w);
803 return dBigVector(sqrt(m_x), sqrt(m_y), sqrt(m_z), sqrt(m_w));
808 return dBigVector(dFloat64(1.0f) / sqrt(m_x), dFloat64(1.0f) / sqrt(m_y), dFloat64(1.0f) / sqrt(m_z), dFloat64(1.0f) / sqrt(m_w));
813 return dBigVector(dFloat64(1.0f) / sqrt(DotProduct(*this).m_x));
818 dAssert(m_w == dFloat64(0.0f));
821 return *
this * InvMagSqrt();
826 return dBigVector((m_x > dFloat64(0.0f)) ? m_x : -m_x,
827 (m_y > dFloat64(0.0f)) ? m_y : -m_y,
828 (m_z > dFloat64(0.0f)) ? m_z : -m_z,
829 (m_w > dFloat64(0.0f)) ? m_w : -m_w);
832 dFloat64 GetMax()
const
834 return dMax(dMax(m_x, m_y), dMax(m_z, m_w));
839 return dBigVector((m_x > data.m_x) ? m_x : data.m_x,
840 (m_y > data.m_y) ? m_y : data.m_y,
841 (m_z > data.m_z) ? m_z : data.m_z,
842 (m_w > data.m_w) ? m_w : data.m_w);
847 return dBigVector((m_x < data.m_x) ? m_x : data.m_x,
848 (m_y < data.m_y) ? m_y : data.m_y,
849 (m_z < data.m_z) ? m_z : data.m_z,
850 (m_w < data.m_w) ? m_w : data.m_w);
856 return dBigVector((m_x == data.m_x) ? dInt64(-1) : dInt64(0),
857 (m_y == data.m_y) ? dInt64(-1) : dInt64(0),
858 (m_z == data.m_z) ? dInt64(-1) : dInt64(0),
859 (m_w == data.m_w) ? dInt64(-1) : dInt64(0));
864 return dBigVector((m_x > data.m_x) ? dInt64(-1) : dInt64(0),
865 (m_y > data.m_y) ? dInt64(-1) : dInt64(0),
866 (m_z > data.m_z) ? dInt64(-1) : dInt64(0),
867 (m_w > data.m_w) ? dInt64(-1) : dInt64(0));
872 return dBigVector((m_x < data.m_x) ? dInt64(-1) : dInt64(0),
873 (m_y < data.m_y) ? dInt64(-1) : dInt64(0),
874 (m_z < data.m_z) ? dInt64(-1) : dInt64(0),
875 (m_w < data.m_w) ? dInt64(-1) : dInt64(0));
880 return dBigVector((m_x >= data.m_x) ? dInt64(-1) : dInt64(0),
881 (m_y >= data.m_y) ? dInt64(-1) : dInt64(0),
882 (m_z >= data.m_z) ? dInt64(-1) : dInt64(0),
883 (m_w >= data.m_w) ? dInt64(-1) : dInt64(0));
888 return dBigVector((m_x <= data.m_x) ? dInt64(-1) : dInt64(0),
889 (m_y <= data.m_y) ? dInt64(-1) : dInt64(0),
890 (m_z <= data.m_z) ? dInt64(-1) : dInt64(0),
891 (m_w <= data.m_w) ? dInt64(-1) : dInt64(0));
898 const dInt64*
const a = (dInt64*)&m_x;
899 const dInt64*
const b = (dInt64*)&data.m_x;
900 return dBigVector(a[0] & b[0], a[1] & b[1], a[2] & b[2], a[3] & b[3]);
905 const dInt64*
const a = (dInt64*)&m_x;
906 const dInt64*
const b = (dInt64*)&data.m_x;
907 return dBigVector(a[0] | b[0], a[1] | b[1], a[2] | b[2], a[3] | b[3]);
912 const dInt64*
const a = (dInt64*)&m_x;
913 const dInt64*
const b = (dInt64*)&data.m_x;
914 return dBigVector(a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]);
919 const dInt64*
const a = (dInt64*)&m_x;
920 const dInt64*
const b = (dInt64*)&data.m_x;
921 return dBigVector(a[0] & ~b[0], a[1] & ~b[1], a[2] & ~b[2], a[3] & ~b[3]);
924 D_INLINE dInt32 GetSignMask()
const
926 const dInt64*
const a = (dInt64*)&m_x;
927 return (((a[0] >> 63) ? 1 : 0) | ((a[1] >> 63) ? 2 : 0) | ((a[2] >> 63) ? 4 : 0) | ((a[3] >> 63) ? 8 : 0));
940 D_INLINE
dBigVector ShiftRightLogical(dInt32 bits)
const
942 return dBigVector(dInt64(dUnsigned64(m_ix) >> bits), dInt64(dUnsigned64(m_iy) >> bits), dInt64(dUnsigned64(m_iz) >> bits), dInt64(dUnsigned64(m_iw) >> bits));
952 dst0 =
dBigVector(tmp0.m_x, tmp1.m_x, tmp2.m_x, tmp3.m_x);
953 dst1 =
dBigVector(tmp0.m_y, tmp1.m_y, tmp2.m_y, tmp3.m_y);
954 dst2 =
dBigVector(tmp0.m_z, tmp1.m_z, tmp2.m_z, tmp3.m_z);
955 dst3 =
dBigVector(tmp0.m_w, tmp1.m_w, tmp2.m_w, tmp3.m_w);
958 DG_CLASS_ALLOCATOR(allocator)
962 #if DG_ARCH >= DG_ARCH_NEON_64
1002 } DG_GCC_VECTOR_ALIGMENT;
1005 DG_MSC_VECTOR_ALIGMENT
1015 for (dInt32 i = 0; i < 6; i++) {
1022 for (dInt32 i = 0; i < 3; i++) {
1024 m_d[i + 3] = high[i];
1030 for (dInt32 i = 0; i < 6; i++) {
1035 D_INLINE dFloat64& operator[] (dInt32 i)
1042 D_INLINE
const dFloat64& operator[] (dInt32 i)
const
1052 for (dInt32 i = 0; i < 6; i++) {
1053 tmp[i] = m_d[i] + A.m_d[i];
1061 for (dInt32 i = 0; i < 6; i++) {
1062 tmp[i] = m_d[i] * A.m_d[i];
1069 dFloat64 ret = dFloat64(0.0f);
1070 for (dInt32 i = 0; i < 6; i++) {
1071 ret += m_d[i] * v.m_d[i];
1079 for (dInt32 i = 0; i < 6; i++) {
1080 tmp[i] = m_d[i] * s;
1087 } DG_GCC_VECTOR_ALIGMENT;
1097 #ifdef D_NEWTON_USE_DOUBLE
1098 #define dVector dBigVector
1102 D_MSV_NEWTON_ALIGN_16
1110 D_INLINE
dVector(dFloat32 val)
1111 :m_type(vmovq_n_f32(val))
1120 D_INLINE
dVector(
const float32x4_t type)
1125 D_INLINE
dVector(
const dFloat32*
const ptr)
1127 :m_type(vld1q_f32 (ptr))
1129 dAssert(dgCheckVector((*
this)));
1132 #ifndef D_NEWTON_USE_DOUBLE
1133 D_INLINE
dVector(
const dFloat64*
const ptr)
1134 :m_x(dFloat32(ptr[0]))
1135 ,m_y(dFloat32(ptr[1]))
1136 ,m_z(dFloat32(ptr[2]))
1137 ,m_w(dFloat32(ptr[3]))
1142 D_INLINE
dVector(dFloat32 x, dFloat32 y, dFloat32 z, dFloat32 w)
1143 :m_x(x), m_y(y), m_z(z), m_w(w)
1145 dAssert(dgCheckVector((*
this)));
1148 D_INLINE
dVector(dInt32 ix, dInt32 iy, dInt32 iz, dInt32 iw)
1149 : m_x(*((dFloat32*)&ix)), m_y(*((dFloat32*)&iy)), m_z(*((dFloat32*)&iz)), m_w(*((dFloat32*)&iw))
1153 #ifndef D_NEWTON_USE_DOUBLE
1155 :m_x(dFloat32(((dFloat64*)©)[0]))
1156 ,m_y(dFloat32(((dFloat64*)©)[1]))
1157 ,m_z(dFloat32(((dFloat64*)©)[2]))
1158 ,m_w(dFloat32(((dFloat64*)©)[3]))
1160 dAssert(dgCheckVector((*
this)));
1164 D_INLINE dFloat32 GetScalar()
const
1169 D_INLINE
void Store(dFloat32*
const dst)
const
1171 vst1q_f32(dst, m_type);
1174 D_INLINE
dVector BroadcastX()
const
1179 D_INLINE
dVector BroadcastY()
const
1184 D_INLINE
dVector BroadcastZ()
const
1189 D_INLINE
dVector BroadcastW()
const
1195 D_INLINE dFloat32& operator[] (dInt32 i)
1202 D_INLINE
const dFloat32& operator[] (dInt32 i)
const
1211 return vaddq_f32(m_type, A.m_type);
1216 return vsubq_f32(m_type, A.m_type);
1221 return vmulq_f32(m_type, A.m_type);
1226 return (*
this = vsubq_f32(m_type, A.m_type));
1231 return (*
this = vsubq_f32(m_type, A.m_type));
1236 return (*
this = vmulq_f32(m_type, A.m_type));
1243 return vmlaq_f32(m_type, A.m_type, B.m_type);
1249 return vmlsq_f32(m_type, A.m_type, B.m_type);
1252 D_INLINE
dVector AddHorizontal()
const
1254 return dVector(m_x + m_y + m_z + m_w);
1260 D_INLINE
dVector Scale(dFloat32 scale)
const
1262 return dVector(m_x * scale, m_y * scale, m_z * scale, m_w * scale);
1268 return dVector(m_y * B.m_z - m_z * B.m_y,
1269 m_z * B.m_x - m_x * B.m_z,
1270 m_x * B.m_y - m_y * B.m_x, m_w);
1275 dFloat32 cofactor[3][3];
1276 dFloat32 array[4][4];
1279 for (dInt32 i = 0; i < 4; i++)
1281 array[0][i] = me[i];
1284 array[3][i] = dFloat32(1.0f);
1288 dFloat32 sign = dFloat32(-1.0f);
1289 for (dInt32 i = 0; i < 4; i++)
1291 for (dInt32 j = 0; j < 3; j++)
1294 for (dInt32 k = 0; k < 4; k++)
1298 cofactor[j][k0] = array[j][k];
1303 dFloat32 x = cofactor[0][0] * (cofactor[1][1] * cofactor[2][2] - cofactor[1][2] * cofactor[2][1]);
1304 dFloat32 y = cofactor[0][1] * (cofactor[1][2] * cofactor[2][0] - cofactor[1][0] * cofactor[2][2]);
1305 dFloat32 z = cofactor[0][2] * (cofactor[1][0] * cofactor[2][1] - cofactor[1][1] * cofactor[2][0]);
1306 dFloat32 det = x + y + z;
1308 normal[i] = sign * det;
1309 sign *= dFloat32(-1.0f);
1315 D_INLINE
dVector GetInt()
const
1317 return dVector(dInt32(dgFloor(m_x)), dInt32(dgFloor(m_y)), dInt32(dgFloor(m_z)), dInt32(dgFloor(m_w)));
1320 D_INLINE
dVector TestZero()
const
1322 const dInt32*
const a = (dInt32*)&m_x;
1323 return dVector((a[0] == 0) ? dFloat32(-1.0f) : dFloat32(1.0f),
1324 (a[1] == 0) ? dFloat32(-1.0f) : dFloat32(1.0f),
1325 (a[2] == 0) ? dFloat32(-1.0f) : dFloat32(1.0f),
1326 (a[3] == 0) ? dFloat32(-1.0f) : dFloat32(1.0f));
1329 D_INLINE
dVector Floor()
const
1331 return dVector(dgFloor(m_x), dgFloor(m_y), dgFloor(m_z), dgFloor(m_w));
1336 return dVector(m_x * A.m_x + m_y * A.m_y + m_z * A.m_z + m_w * A.m_w);
1339 D_INLINE
dVector Reciproc()
const
1341 return dVector(dFloat32(1.0f) / m_x, dFloat32(1.0f) / m_y, dFloat32(1.0f) / m_z, dFloat32(1.0f) / m_w);
1346 return dVector(dgSqrt(m_x), dgSqrt(m_y), dgSqrt(m_z), dgSqrt(m_w));
1349 D_INLINE
dVector InvSqrt()
const
1351 return dVector(dgRsqrt(m_x), dgRsqrt(m_y), dgRsqrt(m_z), dgRsqrt(m_w));
1354 D_INLINE
dVector InvMagSqrt()
const
1356 return dVector(dgRsqrt(DotProduct(*this).m_x));
1359 D_INLINE
dVector Normalize()
const
1361 dAssert(m_w == dFloat32(0.0f));
1363 return me * InvMagSqrt();
1368 return vabsq_f32(m_type);
1371 dFloat32 GetMax()
const
1373 return dMax(dMax(m_x, m_y), dMax(m_z, m_w));
1378 return vmaxq_f32(m_type, data.m_type);
1383 return vminq_f32(m_type, data.m_type);
1389 return vceqq_f32(m_typeInt, data.m_typeInt);
1394 return vcgtq_f32(m_typeInt, data.m_typeInt);
1399 return vcltq_f32(m_typeInt, data.m_typeInt);
1404 return vcgeq_f32(m_typeInt, data.m_typeInt);
1409 return vcleq_f32(m_typeInt, data.m_typeInt);
1415 return vandq_u32(m_typeInt, data.m_typeInt);
1420 return vorrq_u32(m_typeInt, data.m_typeInt);
1425 return veorq_u32(m_typeInt, data.m_typeInt);
1430 return vbicq_u32(m_typeInt, data.m_typeInt);
1436 return (*
this) ^ (mask & (data ^ (*this)));
1439 D_INLINE dInt32 GetSignMask()
const
1441 const dInt32*
const a = (dInt32*)&m_x;
1442 return (((a[0] & 0x80000000) ? 1 : 0) | ((a[1] & 0x80000000) ? 2 : 0) | ((a[2] & 0x80000000) ? 4 : 0) | ((a[3] & 0x80000000) ? 8 : 0));
1445 D_INLINE
dVector ShiftRight()
const
1447 return dVector(m_w, m_x, m_y, m_z);
1450 D_INLINE
dVector ShiftTripleRight()
const
1452 return dVector(m_z, m_x, m_y, m_w);
1455 D_INLINE
dVector ShiftTripleLeft()
const
1457 return dVector(m_y, m_z, m_x, m_w);
1460 D_INLINE
dVector ShiftRightLogical(dInt32 bits)
const
1462 return dVector(dInt32(dUnsigned32(m_ix) >> bits), dInt32(dUnsigned32(m_iy) >> bits), dInt32(dUnsigned32(m_iz) >> bits), dInt32(dUnsigned32(m_iw) >> bits));
1467 float32x4x2_t vtrn1 = vzipq_f32(src0.m_type, src2.m_type);
1468 float32x4x2_t vtrn2 = vzipq_f32(src1.m_type, src3.m_type);
1469 float32x4x2_t res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
1470 float32x4x2_t res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
1471 dst0.m_type = res1.val[0];
1472 dst1.m_type = res1.val[1];
1473 dst2.m_type = res2.val[0];
1474 dst3.m_type = res2.val[1];
1477 DG_CLASS_ALLOCATOR(allocator)
1484 uint32x4_t m_typeInt;
1515 } D_GCC_NEWTON_ALIGN_32 ;
1519 D_MSV_NEWTON_ALIGN_32
1528 :m_x(val), m_y(val), m_z(val), m_w(val)
1533 : m_x(v.m_x), m_y(v.m_y), m_z(v.m_z), m_w(v.m_w)
1537 #ifndef D_NEWTON_USE_DOUBLE
1539 : m_x(v.m_x), m_y(v.m_y), m_z(v.m_z), m_w(v.m_w)
1543 D_INLINE
dBigVector(
const dFloat32*
const ptr)
1544 : m_x(ptr[0]), m_y(ptr[1]), m_z(ptr[2]), m_w(dFloat32(0.0f))
1546 dAssert(dgCheckVector((*
this)));
1550 D_INLINE
dBigVector(
const dFloat64*
const ptr)
1551 :m_x(ptr[0]), m_y(ptr[1]), m_z(ptr[2]), m_w(ptr[3])
1553 dAssert(dgCheckVector((*
this)));
1556 D_INLINE
dBigVector(dFloat64 x, dFloat64 y, dFloat64 z, dFloat64 w)
1557 : m_x(x), m_y(y), m_z(z), m_w(w)
1559 dAssert(dgCheckVector((*
this)));
1562 D_INLINE
dBigVector(dInt32 ix, dInt32 iy, dInt32 iz, dInt32 iw)
1563 : m_ix(ix), m_iy(iy), m_iz(iz), m_iw(iw)
1567 D_INLINE
dBigVector(dInt64 ix, dInt64 iy, dInt64 iz, dInt64 iw)
1568 : m_ix(ix), m_iy(iy), m_iz(iz), m_iw(iw)
1572 D_INLINE dFloat64 GetScalar()
const
1577 D_INLINE
void Store(dFloat64*
const dst)
const
1605 D_INLINE dFloat64& operator[] (dInt32 i)
1612 D_INLINE
const dFloat64& operator[] (dInt32 i)
const
1621 return dBigVector(m_x + A.m_x, m_y + A.m_y, m_z + A.m_z, m_w + A.m_w);
1626 return dBigVector(m_x - A.m_x, m_y - A.m_y, m_z - A.m_z, m_w - A.m_w);
1631 return dBigVector(m_x * A.m_x, m_y * A.m_y, m_z * A.m_z, m_w * A.m_w);
1636 return (*
this =
dBigVector(m_x + A.m_x, m_y + A.m_y, m_z + A.m_z, m_w + A.m_w));
1641 return (*
this =
dBigVector(m_x - A.m_x, m_y - A.m_y, m_z - A.m_z, m_w - A.m_w));
1646 return (*
this =
dBigVector(m_x * A.m_x, m_y * A.m_y, m_z * A.m_z, m_w * A.m_w));
1651 return *
this + A * B;
1656 return *
this - A * B;
1665 D_INLINE
dBigVector Scale(dFloat64 scale)
const
1667 return dBigVector(m_x * scale, m_y * scale, m_z * scale, m_w * scale);
1673 return dBigVector(m_y * B.m_z - m_z * B.m_y, m_z * B.m_x - m_x * B.m_z, m_x * B.m_y - m_y * B.m_x, m_w);
1678 dFloat64 cofactor[3][3];
1679 dFloat64 array[4][4];
1682 for (dInt32 i = 0; i < 4; i++) {
1683 array[0][i] = me[i];
1686 array[3][i] = dFloat32(1.0f);
1690 dFloat64 sign = dFloat64(-1.0f);
1691 for (dInt32 i = 0; i < 4; i++)
1693 for (dInt32 j = 0; j < 3; j++)
1696 for (dInt32 k = 0; k < 4; k++)
1700 cofactor[j][k0] = array[j][k];
1705 dFloat64 x = cofactor[0][0] * (cofactor[1][1] * cofactor[2][2] - cofactor[1][2] * cofactor[2][1]);
1706 dFloat64 y = cofactor[0][1] * (cofactor[1][2] * cofactor[2][0] - cofactor[1][0] * cofactor[2][2]);
1707 dFloat64 z = cofactor[0][2] * (cofactor[1][0] * cofactor[2][1] - cofactor[1][1] * cofactor[2][0]);
1708 dFloat64 det = x + y + z;
1710 normal[i] = sign * det;
1711 sign *= dFloat64(-1.0f);
1719 return dBigVector(dInt64(floor(m_x)), dInt64(floor(m_y)), dInt64(floor(m_z)), dInt64(floor(m_w)));
1724 const dInt64*
const a = (dInt64*)&m_x;
1725 return dBigVector((a[0] == 0) ? dFloat64(-1.0f) : dFloat64(1.0f),
1726 (a[1] == 0) ? dFloat64(-1.0f) : dFloat64(1.0f),
1727 (a[2] == 0) ? dFloat64(-1.0f) : dFloat64(1.0f),
1728 (a[3] == 0) ? dFloat64(-1.0f) : dFloat64(1.0f));
1734 return dBigVector(floor(m_x), floor(m_y), floor(m_z), floor(m_w));
1739 return dBigVector(m_x * A.m_x + m_y * A.m_y + m_z * A.m_z + m_w * A.m_w);
1744 return dBigVector(dFloat64(1.0f) / m_x, dFloat64(1.0f) / m_y, dFloat64(1.0f) / m_z, dFloat64(1.0f) / m_w);
1749 return dBigVector(sqrt(m_x), sqrt(m_y), sqrt(m_z), sqrt(m_w));
1754 return dBigVector(dFloat64(1.0f) / sqrt(m_x), dFloat64(1.0f) / sqrt(m_y), dFloat64(1.0f) / sqrt(m_z), dFloat64(1.0f) / sqrt(m_w));
1759 return dBigVector(dFloat64(1.0f) / sqrt(DotProduct(*this).m_x));
1764 dAssert(m_w == dFloat64(0.0f));
1767 return *
this * InvMagSqrt();
1772 return dBigVector((m_x > dFloat64(0.0f)) ? m_x : -m_x,
1773 (m_y > dFloat64(0.0f)) ? m_y : -m_y,
1774 (m_z > dFloat64(0.0f)) ? m_z : -m_z,
1775 (m_w > dFloat64(0.0f)) ? m_w : -m_w);
1778 dFloat64 GetMax()
const
1780 return dMax(dMax(m_x, m_y), dMax(m_z, m_w));
1785 return dBigVector((m_x > data.m_x) ? m_x : data.m_x,
1786 (m_y > data.m_y) ? m_y : data.m_y,
1787 (m_z > data.m_z) ? m_z : data.m_z,
1788 (m_w > data.m_w) ? m_w : data.m_w);
1793 return dBigVector((m_x < data.m_x) ? m_x : data.m_x,
1794 (m_y < data.m_y) ? m_y : data.m_y,
1795 (m_z < data.m_z) ? m_z : data.m_z,
1796 (m_w < data.m_w) ? m_w : data.m_w);
1802 return dBigVector((m_x == data.m_x) ? dInt64(-1) : dInt64(0),
1803 (m_y == data.m_y) ? dInt64(-1) : dInt64(0),
1804 (m_z == data.m_z) ? dInt64(-1) : dInt64(0),
1805 (m_w == data.m_w) ? dInt64(-1) : dInt64(0));
1810 return dBigVector((m_x > data.m_x) ? dInt64(-1) : dInt64(0),
1811 (m_y > data.m_y) ? dInt64(-1) : dInt64(0),
1812 (m_z > data.m_z) ? dInt64(-1) : dInt64(0),
1813 (m_w > data.m_w) ? dInt64(-1) : dInt64(0));
1818 return dBigVector((m_x < data.m_x) ? dInt64(-1) : dInt64(0),
1819 (m_y < data.m_y) ? dInt64(-1) : dInt64(0),
1820 (m_z < data.m_z) ? dInt64(-1) : dInt64(0),
1821 (m_w < data.m_w) ? dInt64(-1) : dInt64(0));
1826 return dBigVector((m_x >= data.m_x) ? dInt64(-1) : dInt64(0),
1827 (m_y >= data.m_y) ? dInt64(-1) : dInt64(0),
1828 (m_z >= data.m_z) ? dInt64(-1) : dInt64(0),
1829 (m_w >= data.m_w) ? dInt64(-1) : dInt64(0));
1834 return dBigVector((m_x <= data.m_x) ? dInt64(-1) : dInt64(0),
1835 (m_y <= data.m_y) ? dInt64(-1) : dInt64(0),
1836 (m_z <= data.m_z) ? dInt64(-1) : dInt64(0),
1837 (m_w <= data.m_w) ? dInt64(-1) : dInt64(0));
1844 const dInt64*
const a = (dInt64*)&m_x;
1845 const dInt64*
const b = (dInt64*)&data.m_x;
1846 return dBigVector(a[0] & b[0], a[1] & b[1], a[2] & b[2], a[3] & b[3]);
1851 const dInt64*
const a = (dInt64*)&m_x;
1852 const dInt64*
const b = (dInt64*)&data.m_x;
1853 return dBigVector(a[0] | b[0], a[1] | b[1], a[2] | b[2], a[3] | b[3]);
1858 const dInt64*
const a = (dInt64*)&m_x;
1859 const dInt64*
const b = (dInt64*)&data.m_x;
1860 return dBigVector(a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]);
1865 const dInt64*
const a = (dInt64*)&m_x;
1866 const dInt64*
const b = (dInt64*)&data.m_x;
1867 return dBigVector(a[0] & ~b[0], a[1] & ~b[1], a[2] & ~b[2], a[3] & ~b[3]);
1873 return (*
this) ^ (mask & (data ^ (*this)));
1876 D_INLINE dInt32 GetSignMask()
const
1878 const dInt64*
const a = (dInt64*)&m_x;
1879 return (((a[0] >> 63) ? 1 : 0) | ((a[1] >> 63) ? 2 : 0) | ((a[2] >> 63) ? 4 : 0) | ((a[3] >> 63) ? 8 : 0));
1882 D_INLINE
dVector ShiftRight()
const
1897 D_INLINE
dBigVector ShiftRightLogical(dInt32 bits)
const
1899 return dBigVector(dInt64(dUnsigned64(m_ix) >> bits), dInt64(dUnsigned64(m_iy) >> bits), dInt64(dUnsigned64(m_iz) >> bits), dInt64(dUnsigned64(m_iw) >> bits));
1909 dst0 =
dBigVector(tmp0.m_x, tmp1.m_x, tmp2.m_x, tmp3.m_x);
1910 dst1 =
dBigVector(tmp0.m_y, tmp1.m_y, tmp2.m_y, tmp3.m_y);
1911 dst2 =
dBigVector(tmp0.m_z, tmp1.m_z, tmp2.m_z, tmp3.m_z);
1912 dst3 =
dBigVector(tmp0.m_w, tmp1.m_w, tmp2.m_w, tmp3.m_w);
1915 DG_CLASS_ALLOCATOR(allocator)
1950 } D_GCC_NEWTON_ALIGN_32 ;
1952 D_MSV_NEWTON_ALIGN_32
1962 for (dInt32 i = 0; i < 6; i++)
1970 for (dInt32 i = 0; i < 3; i++)
1973 m_d[i + 3] = high[i];
1979 for (dInt32 i = 0; i < 6; i++)
1985 D_INLINE dFloat64& operator[] (dInt32 i)
1992 D_INLINE
const dFloat64& operator[] (dInt32 i)
const
2002 for (dInt32 i = 0; i < 6; i++)
2004 tmp[i] = m_d[i] + A.m_d[i];
2012 for (dInt32 i = 0; i < 6; i++)
2014 tmp[i] = m_d[i] * A.m_d[i];
2021 dFloat64 ret = dFloat64(0.0f);
2022 for (dInt32 i = 0; i < 6; i++)
2024 ret += m_d[i] * v.m_d[i];
2032 for (dInt32 i = 0; i < 6; i++)
2034 tmp[i] = m_d[i] * s;
2041 } D_GCC_NEWTON_ALIGN_32 ;