22 #ifndef __ND_VECTOR_ARM_NEON_H__
23 #define __ND_VECTOR_ARM_NEON_H__
27 #ifndef D_SCALAR_VECTOR_CLASS
29 #ifdef D_NEWTON_USE_DOUBLE
30 #define ndVector ndBigVector
43 D_OPERATOR_NEW_AND_DELETE
50 :m_type(vmovq_n_f32(val))
59 inline ndVector(
const float32x4_t type)
64 inline ndVector(
const ndFloat32*
const ptr)
65 :m_type(vld1q_f32 (ptr))
67 ndAssert(ndCheckVector((*
this)));
70 inline ndVector(
const ndFloat32*
const baseAddr,
const ndInt32*
const index)
71 :m_x(baseAddr[index[0]])
72 ,m_y(baseAddr[index[1]])
73 ,m_z(baseAddr[index[2]])
74 ,m_w(baseAddr[index[3]])
79 #ifndef D_NEWTON_USE_DOUBLE
80 inline ndVector(
const ndFloat64*
const ptr)
81 :m_x(ndFloat32(ptr[0]))
82 ,m_y(ndFloat32(ptr[1]))
83 ,m_z(ndFloat32(ptr[2]))
84 ,m_w(ndFloat32(ptr[3]))
89 inline ndVector(ndFloat32 x, ndFloat32 y, ndFloat32 z, ndFloat32 w)
90 :m_x(x), m_y(y), m_z(z), m_w(w)
92 ndAssert(ndCheckVector((*
this)));
95 inline ndVector(ndInt32 ix, ndInt32 iy, ndInt32 iz, ndInt32 iw)
96 :m_ix(ix), m_iy(iy), m_iz(iz), m_iw(iw)
100 #ifndef D_NEWTON_USE_DOUBLE
102 :m_x(ndFloat32(((ndFloat64*)©)[0]))
103 ,m_y(ndFloat32(((ndFloat64*)©)[1]))
104 ,m_z(ndFloat32(((ndFloat64*)©)[2]))
105 ,m_w(ndFloat32(((ndFloat64*)©)[3]))
107 ndAssert(ndCheckVector((*
this)));
111 inline ndFloat32 GetScalar()
const
116 inline void Store(ndFloat32*
const dst)
const
118 vst1q_f32(dst, m_type);
141 inline ndFloat32& operator[] (ndInt32 i)
148 inline const ndFloat32& operator[] (ndInt32 i)
const
157 return vaddq_f32(m_type, A.m_type);
162 return vsubq_f32(m_type, A.m_type);
167 return vmulq_f32(m_type, A.m_type);
172 return (*
this = vaddq_f32(m_type, A.m_type));
177 return (*
this = vsubq_f32(m_type, A.m_type));
182 return (*
this = vmulq_f32(m_type, A.m_type));
188 return vmlaq_f32(m_type, A.m_type, B.m_type);
194 return vmlsq_f32(m_type, A.m_type, B.m_type);
197 inline ndVector AddHorizontal()
const
199 return ndVector(m_x + m_y + m_z + m_w);
202 inline ndVector Scale(ndFloat32 scale)
const
204 return ndVector(m_x * scale, m_y * scale, m_z * scale, m_w * scale);
211 m_y * B.m_z - m_z * B.m_y,
212 m_z * B.m_x - m_x * B.m_z,
213 m_x * B.m_y - m_y * B.m_x, m_w);
218 ndFloat32 cofactor[3][3];
219 ndFloat32 array[4][4];
222 for (ndInt32 i = 0; i < 4; ++i)
227 array[3][i] = ndFloat32(1.0f);
231 ndFloat32 sign = ndFloat32(-1.0f);
232 for (ndInt32 i = 0; i < 4; ++i)
234 for (ndInt32 j = 0; j < 3; ++j)
237 for (ndInt32 k = 0; k < 4; ++k)
241 cofactor[j][k0] = array[j][k];
246 ndFloat32 x = cofactor[0][0] * (cofactor[1][1] * cofactor[2][2] - cofactor[1][2] * cofactor[2][1]);
247 ndFloat32 y = cofactor[0][1] * (cofactor[1][2] * cofactor[2][0] - cofactor[1][0] * cofactor[2][2]);
248 ndFloat32 z = cofactor[0][2] * (cofactor[1][0] * cofactor[2][1] - cofactor[1][1] * cofactor[2][0]);
249 ndFloat32 det = x + y + z;
251 normal[i] = sign * det;
252 sign *= ndFloat32(-1.0f);
260 return ndVector(ndInt32(ndFloor(m_x)), ndInt32(ndFloor(m_y)), ndInt32(ndFloor(m_z)), ndInt32(ndFloor(m_w)));
265 const ndInt32*
const a = (ndInt32*)&m_x;
267 (a[0] == 0) ? ndFloat32(-1.0f) : ndFloat32(0.0f),
268 (a[1] == 0) ? ndFloat32(-1.0f) : ndFloat32(0.0f),
269 (a[2] == 0) ? ndFloat32(-1.0f) : ndFloat32(0.0f),
270 (a[3] == 0) ? ndFloat32(-1.0f) : ndFloat32(0.0f));
275 return ndVector(ndFloor(m_x), ndFloor(m_y), ndFloor(m_z), ndFloor(m_w));
281 return (*
this * A).AddHorizontal();
286 return ndVector(ndFloat32(1.0f) / m_x, ndFloat32(1.0f) / m_y, ndFloat32(1.0f) / m_z, ndFloat32(1.0f) / m_w);
291 return ndVector(ndSqrt(m_x), ndSqrt(m_y), ndSqrt(m_z), ndSqrt(m_w));
296 return ndVector(ndRsqrt(m_x), ndRsqrt(m_y), ndRsqrt(m_z), ndRsqrt(m_w));
301 return ndVector(ndRsqrt(DotProduct(*this).m_x));
307 return me * InvMagSqrt();
312 return vabsq_f32(m_type);
317 return ndVector(ndMax(ndMax(m_x, m_y), ndMax(m_z, m_w)));
322 return vmaxq_f32(m_type, data.m_type);
327 return vminq_f32(m_type, data.m_type);
333 return vceqq_f32(m_typeInt, data.m_typeInt);
338 return vcgtq_f32(m_typeInt, data.m_typeInt);
343 return vcltq_f32(m_typeInt, data.m_typeInt);
348 return vcgeq_f32(m_typeInt, data.m_typeInt);
353 return vcleq_f32(m_typeInt, data.m_typeInt);
359 return vandq_u32(m_typeInt, data.m_typeInt);
364 return vorrq_u32(m_typeInt, data.m_typeInt);
369 return veorq_u32(m_typeInt, data.m_typeInt);
374 return vbicq_u32(m_typeInt, data.m_typeInt);
380 return (*
this) ^ (mask & (data ^ (*this)));
383 inline ndInt32 GetSignMask()
const
385 const ndInt32*
const a = (ndInt32*)&m_x;
386 return (((a[0] & 0x80000000) ? 1 : 0) | ((a[1] & 0x80000000) ? 2 : 0) | ((a[2] & 0x80000000) ? 4 : 0) | ((a[3] & 0x80000000) ? 8 : 0));
391 return ndVector(m_w, m_x, m_y, m_z);
394 inline ndVector ShiftTripleRight()
const
396 return ndVector(m_z, m_x, m_y, m_w);
399 inline ndVector ShiftTripleLeft()
const
401 return ndVector(m_y, m_z, m_x, m_w);
404 inline ndVector ShiftRightLogical(ndInt32 bits)
const
406 return ndVector(ndInt32(ndUnsigned32(m_ix) >> bits), ndInt32(ndUnsigned32(m_iy) >> bits), ndInt32(ndUnsigned32(m_iz) >> bits), ndInt32(ndUnsigned32(m_iw) >> bits));
411 float32x4x2_t vtrn1 = vzipq_f32(src0.m_type, src2.m_type);
412 float32x4x2_t vtrn2 = vzipq_f32(src1.m_type, src3.m_type);
413 float32x4x2_t res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
414 float32x4x2_t res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
415 dst0.m_type = res1.val[0];
416 dst1.m_type = res1.val[1];
417 dst2.m_type = res2.val[0];
418 dst3.m_type = res2.val[1];
426 uint32x4_t m_typeInt;
449 D_CORE_API
static ndVector m_negOne;
454 D_CORE_API
static ndVector m_xyzwMask;
455 D_CORE_API
static ndVector m_epsilon;
456 D_CORE_API
static ndVector m_signMask;
457 D_CORE_API
static ndVector m_triplexMask;
458 } D_GCC_NEWTON_ALIGN_16;
461 D_MSV_NEWTON_ALIGN_32
465 D_OPERATOR_NEW_AND_DELETE
472 :m_x(val), m_y(val), m_z(val), m_w(val)
477 : m_x(v.m_x), m_y(v.m_y), m_z(v.m_z), m_w(v.m_w)
481 #ifndef D_NEWTON_USE_DOUBLE
483 : m_x(v.m_x), m_y(v.m_y), m_z(v.m_z), m_w(v.m_w)
488 : m_x(ptr[0]), m_y(ptr[1]), m_z(ptr[2]), m_w(ndFloat32(0.0f))
490 ndAssert(ndCheckVector((*
this)));
495 :m_x(ptr[0]), m_y(ptr[1]), m_z(ptr[2]), m_w(ptr[3])
497 ndAssert(ndCheckVector((*
this)));
500 inline ndBigVector(ndFloat64 x, ndFloat64 y, ndFloat64 z, ndFloat64 w)
501 : m_x(x), m_y(y), m_z(z), m_w(w)
503 ndAssert(ndCheckVector((*
this)));
506 inline ndBigVector(ndInt32 ix, ndInt32 iy, ndInt32 iz, ndInt32 iw)
507 : m_ix(ix), m_iy(iy), m_iz(iz), m_iw(iw)
511 inline ndBigVector(ndInt64 ix, ndInt64 iy, ndInt64 iz, ndInt64 iw)
512 : m_ix(ix), m_iy(iy), m_iz(iz), m_iw(iw)
516 inline ndFloat64 GetScalar()
const
521 inline void Store(ndFloat64*
const dst)
const
549 inline ndFloat64& operator[] (ndInt32 i)
556 inline const ndFloat64& operator[] (ndInt32 i)
const
565 return ndBigVector(m_x + A.m_x, m_y + A.m_y, m_z + A.m_z, m_w + A.m_w);
570 return ndBigVector(m_x - A.m_x, m_y - A.m_y, m_z - A.m_z, m_w - A.m_w);
575 return ndBigVector(m_x * A.m_x, m_y * A.m_y, m_z * A.m_z, m_w * A.m_w);
580 return (*
this =
ndBigVector(m_x + A.m_x, m_y + A.m_y, m_z + A.m_z, m_w + A.m_w));
585 return (*
this =
ndBigVector(m_x - A.m_x, m_y - A.m_y, m_z - A.m_z, m_w - A.m_w));
590 return (*
this =
ndBigVector(m_x * A.m_x, m_y * A.m_y, m_z * A.m_z, m_w * A.m_w));
595 return *
this + A * B;
600 return *
this - A * B;
611 return ndBigVector(m_x * scale, m_y * scale, m_z * scale, m_w * scale);
617 return ndBigVector(m_y * B.m_z - m_z * B.m_y, m_z * B.m_x - m_x * B.m_z, m_x * B.m_y - m_y * B.m_x, m_w);
622 ndFloat64 cofactor[3][3];
623 ndFloat64 array[4][4];
626 for (ndInt32 i = 0; i < 4; ++i) {
630 array[3][i] = ndFloat32(1.0f);
634 ndFloat64 sign = ndFloat64(-1.0f);
635 for (ndInt32 i = 0; i < 4; ++i)
637 for (ndInt32 j = 0; j < 3; ++j)
640 for (ndInt32 k = 0; k < 4; ++k)
644 cofactor[j][k0] = array[j][k];
649 ndFloat64 x = cofactor[0][0] * (cofactor[1][1] * cofactor[2][2] - cofactor[1][2] * cofactor[2][1]);
650 ndFloat64 y = cofactor[0][1] * (cofactor[1][2] * cofactor[2][0] - cofactor[1][0] * cofactor[2][2]);
651 ndFloat64 z = cofactor[0][2] * (cofactor[1][0] * cofactor[2][1] - cofactor[1][1] * cofactor[2][0]);
652 ndFloat64 det = x + y + z;
654 normal[i] = sign * det;
655 sign *= ndFloat64(-1.0f);
663 return ndBigVector(ndInt64(floor(m_x)), ndInt64(floor(m_y)), ndInt64(floor(m_z)), ndInt64(floor(m_w)));
668 const ndInt64*
const a = (ndInt64*)&m_x;
669 return ndBigVector((a[0] == 0) ? ndFloat64(-1.0f) : ndFloat64(1.0f),
670 (a[1] == 0) ? ndFloat64(-1.0f) : ndFloat64(1.0f),
671 (a[2] == 0) ? ndFloat64(-1.0f) : ndFloat64(1.0f),
672 (a[3] == 0) ? ndFloat64(-1.0f) : ndFloat64(1.0f));
678 return ndBigVector(floor(m_x), floor(m_y), floor(m_z), floor(m_w));
684 return (*
this * A).AddHorizontal();
689 return ndBigVector(ndFloat64(1.0f) / m_x, ndFloat64(1.0f) / m_y, ndFloat64(1.0f) / m_z, ndFloat64(1.0f) / m_w);
694 return ndBigVector(sqrt(m_x), sqrt(m_y), sqrt(m_z), sqrt(m_w));
699 return ndBigVector(ndFloat64(1.0f) / sqrt(m_x), ndFloat64(1.0f) / sqrt(m_y), ndFloat64(1.0f) / sqrt(m_z), ndFloat64(1.0f) / sqrt(m_w));
704 return ndBigVector(ndFloat64(1.0f) / sqrt(DotProduct(*this).m_x));
709 return *
this * InvMagSqrt();
715 (m_x > ndFloat64(0.0f)) ? m_x : -m_x,
716 (m_y > ndFloat64(0.0f)) ? m_y : -m_y,
717 (m_z > ndFloat64(0.0f)) ? m_z : -m_z,
718 (m_w > ndFloat64(0.0f)) ? m_w : -m_w);
723 return ndBigVector(ndMax(ndMax(m_x, m_y), ndMax(m_z, m_w)));
729 (m_x > data.m_x) ? m_x : data.m_x,
730 (m_y > data.m_y) ? m_y : data.m_y,
731 (m_z > data.m_z) ? m_z : data.m_z,
732 (m_w > data.m_w) ? m_w : data.m_w);
738 (m_x < data.m_x) ? m_x : data.m_x,
739 (m_y < data.m_y) ? m_y : data.m_y,
740 (m_z < data.m_z) ? m_z : data.m_z,
741 (m_w < data.m_w) ? m_w : data.m_w);
748 (m_x == data.m_x) ? ndInt64(-1) : ndInt64(0),
749 (m_y == data.m_y) ? ndInt64(-1) : ndInt64(0),
750 (m_z == data.m_z) ? ndInt64(-1) : ndInt64(0),
751 (m_w == data.m_w) ? ndInt64(-1) : ndInt64(0));
757 (m_x > data.m_x) ? ndInt64(-1) : ndInt64(0),
758 (m_y > data.m_y) ? ndInt64(-1) : ndInt64(0),
759 (m_z > data.m_z) ? ndInt64(-1) : ndInt64(0),
760 (m_w > data.m_w) ? ndInt64(-1) : ndInt64(0));
766 (m_x < data.m_x) ? ndInt64(-1) : ndInt64(0),
767 (m_y < data.m_y) ? ndInt64(-1) : ndInt64(0),
768 (m_z < data.m_z) ? ndInt64(-1) : ndInt64(0),
769 (m_w < data.m_w) ? ndInt64(-1) : ndInt64(0));
775 (m_x >= data.m_x) ? ndInt64(-1) : ndInt64(0),
776 (m_y >= data.m_y) ? ndInt64(-1) : ndInt64(0),
777 (m_z >= data.m_z) ? ndInt64(-1) : ndInt64(0),
778 (m_w >= data.m_w) ? ndInt64(-1) : ndInt64(0));
784 (m_x <= data.m_x) ? ndInt64(-1) : ndInt64(0),
785 (m_y <= data.m_y) ? ndInt64(-1) : ndInt64(0),
786 (m_z <= data.m_z) ? ndInt64(-1) : ndInt64(0),
787 (m_w <= data.m_w) ? ndInt64(-1) : ndInt64(0));
793 const ndInt64*
const a = (ndInt64*)&m_x;
794 const ndInt64*
const b = (ndInt64*)&data.m_x;
795 return ndBigVector(a[0] & b[0], a[1] & b[1], a[2] & b[2], a[3] & b[3]);
800 const ndInt64*
const a = (ndInt64*)&m_x;
801 const ndInt64*
const b = (ndInt64*)&data.m_x;
802 return ndBigVector(a[0] | b[0], a[1] | b[1], a[2] | b[2], a[3] | b[3]);
807 const ndInt64*
const a = (ndInt64*)&m_x;
808 const ndInt64*
const b = (ndInt64*)&data.m_x;
809 return ndBigVector(a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]);
814 const ndInt64*
const a = (ndInt64*)&m_x;
815 const ndInt64*
const b = (ndInt64*)&data.m_x;
816 return ndBigVector(a[0] & ~b[0], a[1] & ~b[1], a[2] & ~b[2], a[3] & ~b[3]);
822 return (*
this) ^ (mask & (data ^ (*this)));
825 inline ndInt32 GetSignMask()
const
827 const ndInt64*
const a = (ndInt64*)&m_x;
828 return (((a[0] >> 63) ? 1 : 0) | ((a[1] >> 63) ? 2 : 0) | ((a[2] >> 63) ? 4 : 0) | ((a[3] >> 63) ? 8 : 0));
846 inline ndBigVector ShiftRightLogical(ndInt32 bits)
const
848 return ndBigVector(ndInt64(ndUnsigned64(m_ix) >> bits), ndInt64(ndUnsigned64(m_iy) >> bits), ndInt64(ndUnsigned64(m_iz) >> bits), ndInt64(ndUnsigned64(m_iw) >> bits));
858 dst0 =
ndBigVector(tmp0.m_x, tmp1.m_x, tmp2.m_x, tmp3.m_x);
859 dst1 =
ndBigVector(tmp0.m_y, tmp1.m_y, tmp2.m_y, tmp3.m_y);
860 dst2 =
ndBigVector(tmp0.m_z, tmp1.m_z, tmp2.m_z, tmp3.m_z);
861 dst3 =
ndBigVector(tmp0.m_w, tmp1.m_w, tmp2.m_w, tmp3.m_w);
898 } D_GCC_NEWTON_ALIGN_32;