Newton Dynamics  4.00
ndVectorSimd.h
1 /* Copyright (c) <2003-2022> <Julio Jerez, Newton Game Dynamics>
2 *
3 * This software is provided 'as-is', without any express or implied
4 * warranty. In no event will the authors be held liable for any damages
5 * arising from the use of this software.
6 *
7 * Permission is granted to anyone to use this software for any purpose,
8 * including commercial applications, and to alter it and redistribute it
9 * freely, subject to the following restrictions:
10 *
11 * 1. The origin of this software must not be misrepresented; you must not
12 * claim that you wrote the original software. If you use this software
13 * in a product, an acknowledgment in the product documentation would be
14 * appreciated but is not required.
15 *
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 *
19 * 3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #ifndef __ND_VECTOR_X86_SIMD_H__
23 #define __ND_VECTOR_X86_SIMD_H__
24 
25 #ifndef D_SCALAR_VECTOR_CLASS
26 
27 #ifdef D_NEWTON_USE_DOUBLE
28  #define ndVector ndBigVector
29 #else
30 
31 class ndBigVector;
32 // *****************************************************************************************
33 //
34 // 4 x 1 single precision SSE vector class declaration
35 //
36 // *****************************************************************************************
37 D_MSV_NEWTON_ALIGN_16
38 class ndVector
39 {
40  #define PERMUTE_MASK(w, z, y, x) _MM_SHUFFLE (w, z, y, x)
41  public:
42  D_OPERATOR_NEW_AND_DELETE
43 
44  inline ndVector()
45  {
46  }
47 
48  inline ndVector(const __m128i type)
49  :m_typeInt (type)
50  {
51  }
52 
53  inline ndVector(const __m128 type)
54  : m_type(type)
55  {
56  }
57 
58  inline ndVector (const ndFloat32 a)
59  :m_x(a)
60  ,m_y(a)
61  ,m_z(a)
62  ,m_w(a)
63  {
64  }
65 
66  inline ndVector (const ndFloat32* const ptr)
67  :m_type(_mm_loadu_ps (ptr))
68  {
69  }
70 
71  // emulate gather instruction
72  inline ndVector(const ndFloat32* const baseAddr, const ndInt32* const index)
73  :m_x(baseAddr[index[0]])
74  ,m_y(baseAddr[index[1]])
75  ,m_z(baseAddr[index[2]])
76  ,m_w(baseAddr[index[3]])
77  {
78  }
79 
80 #ifndef D_NEWTON_USE_DOUBLE
81  inline ndVector(const ndFloat64* const ptr)
82  :m_x(ndFloat32(ptr[0]))
83  ,m_y(ndFloat32(ptr[1]))
84  ,m_z(ndFloat32(ptr[2]))
85  ,m_w(ndFloat32(ptr[3]))
86  {
87  }
88 #endif
89 
90  inline ndVector (const ndVector& copy)
91  :m_type(copy.m_type)
92  {
93  }
94 
95  inline ndVector (const ndBigVector& copy)
96  :m_type(_mm_shuffle_ps (_mm_cvtpd_ps (((__m128d*)&copy)[0]), _mm_cvtpd_ps (((__m128d*)&copy)[1]), PERMUTE_MASK(1, 0, 1, 0)))
97  {
98  ndAssert (ndCheckVector ((*this)));
99  }
100 
101  inline ndVector (ndFloat32 x, ndFloat32 y, ndFloat32 z, ndFloat32 w)
102  :m_x(x)
103  ,m_y(y)
104  ,m_z(z)
105  ,m_w(w)
106  {
107  }
108 
109  inline ndVector (ndInt32 ix, ndInt32 iy, ndInt32 iz, ndInt32 iw)
110  :m_ix(ix)
111  ,m_iy(iy)
112  ,m_iz(iz)
113  ,m_iw(iw)
114  {
115  }
116 
117  inline ndFloat32 GetX() const
118  {
119  return m_x;
120  }
121 
122  inline ndFloat32 GetY() const
123  {
124  return m_y;
125  }
126 
127  inline ndFloat32 GetZ() const
128  {
129  return m_z;
130  }
131 
132  inline ndFloat32 GetW() const
133  {
134  return m_w;
135  }
136 
137  inline void SetX(ndFloat32 x)
138  {
139  m_x = x;
140  }
141 
142  inline void SetY(ndFloat32 x)
143  {
144  m_y = x;
145  }
146 
147  inline void SetZ(ndFloat32 x)
148  {
149  m_z = x;
150  }
151 
152  inline void SetW(ndFloat32 x)
153  {
154  m_w = x;
155  }
156 
157  //return the x component
158  inline ndFloat32 GetScalar () const
159  {
160  return _mm_cvtss_f32 (m_type);
161  }
162 
163  inline void Store (ndFloat32* const dst) const
164  {
165  _mm_storeu_ps(dst, m_type);
166  }
167 
168  inline ndVector BroadcastX () const
169  {
170  return _mm_shuffle_ps (m_type, m_type, PERMUTE_MASK(0, 0, 0, 0));
171  }
172 
173  inline ndVector BroadcastY () const
174  {
175  return _mm_shuffle_ps (m_type, m_type, PERMUTE_MASK(1, 1, 1, 1));
176  }
177 
178  inline ndVector BroadcastZ () const
179  {
180  return _mm_shuffle_ps (m_type, m_type, PERMUTE_MASK(2, 2, 2, 2));
181  }
182 
183  inline ndVector BroadcastW () const
184  {
185  return _mm_shuffle_ps (m_type, m_type, PERMUTE_MASK(3, 3, 3, 3));
186  }
187 
188  inline ndVector Scale (ndFloat32 s) const
189  {
190  return _mm_mul_ps (m_type, _mm_set_ps1(s));
191  }
192 
193  inline ndFloat32& operator[] (ndInt32 i)
194  {
195  ndAssert (i < 4);
196  ndAssert (i >= 0);
197  return m_f[i];
198  }
199 
200  inline const ndFloat32& operator[] (ndInt32 i) const
201  {
202  ndAssert (i < 4);
203  ndAssert (i >= 0);
204  return m_f[i];
205  }
206 
207  inline ndVector operator+ (const ndVector& A) const
208  {
209  return _mm_add_ps (m_type, A.m_type);
210  }
211 
212  inline ndVector operator- (const ndVector& A) const
213  {
214  return _mm_sub_ps (m_type, A.m_type);
215  }
216 
217  inline ndVector operator* (const ndVector& A) const
218  {
219  return _mm_mul_ps(m_type, A.m_type);
220  }
221 
222  inline ndVector& operator+= (const ndVector& A)
223  {
224  return (*this = _mm_add_ps (m_type, A.m_type));
225  }
226 
227  inline ndVector& operator-= (const ndVector& A)
228  {
229  return (*this = _mm_sub_ps (m_type, A.m_type));
230  }
231 
232  inline ndVector& operator*= (const ndVector& A)
233  {
234  return (*this = _mm_mul_ps(m_type, A.m_type));
235  }
236 
237  // return 4d dot product
238  inline ndVector DotProduct(const ndVector& A) const
239  {
240  //const ndVector tmp(_mm_mul_ps(m_type, A.m_type));
241  //return tmp.AddHorizontal();
242  return (*this * A).AddHorizontal();
243  }
244 
245  // return 3d cross product
246  inline ndVector CrossProduct (const ndVector& B) const
247  {
248  __m128 tmp0 = _mm_shuffle_ps( m_type, m_type, _MM_SHUFFLE(3, 0, 2, 1));
249  __m128 tmp1 = _mm_shuffle_ps(B.m_type, B.m_type, _MM_SHUFFLE(3, 1, 0, 2));
250  __m128 tmp2 = _mm_mul_ps(tmp0, B.m_type);
251  __m128 tmp3 = _mm_mul_ps(tmp0, tmp1);
252  __m128 tmp4 = _mm_shuffle_ps(tmp2, tmp2, _MM_SHUFFLE(3, 0, 2, 1));
253  return _mm_sub_ps(tmp3, tmp4);
254  }
255 
256  // return 4d cross product
257  inline ndVector CrossProduct (const ndVector& A, const ndVector& B) const
258  {
259  ndFloat32 array[4][4];
260  ndFloat32 cofactor[3][3];
261 
262  const ndVector& me = *this;
263  for (ndInt32 i = 0; i < 4; ++i)
264  {
265  array[0][i] = me[i];
266  array[1][i] = A[i];
267  array[2][i] = B[i];
268  array[3][i] = ndFloat32 (1.0f);
269  }
270 
271  ndVector normal;
272  ndFloat32 sign = ndFloat32 (-1.0f);
273  for (ndInt32 i = 0; i < 4; ++i)
274  {
275  for (ndInt32 j = 0; j < 3; ++j)
276  {
277  ndInt32 k0 = 0;
278  for (ndInt32 k = 0; k < 4; ++k)
279  {
280  if (k != i)
281  {
282  cofactor[j][k0] = array[j][k];
283  k0 ++;
284  }
285  }
286  }
287  ndFloat32 x = cofactor[0][0] * (cofactor[1][1] * cofactor[2][2] - cofactor[1][2] * cofactor[2][1]);
288  ndFloat32 y = cofactor[0][1] * (cofactor[1][2] * cofactor[2][0] - cofactor[1][0] * cofactor[2][2]);
289  ndFloat32 z = cofactor[0][2] * (cofactor[1][0] * cofactor[2][1] - cofactor[1][1] * cofactor[2][0]);
290  ndFloat32 det = x + y + z;
291 
292  normal[i] = sign * det;
293  sign *= ndFloat32 (-1.0f);
294  }
295 
296  return normal;
297  }
298 
299  inline ndVector Reciproc () const
300  {
301  return _mm_div_ps (m_one.m_type, m_type);
302  }
303 
304  inline ndVector MulAdd(const ndVector& A, const ndVector& B) const
305  {
306  return _mm_add_ps(m_type, _mm_mul_ps(A.m_type, B.m_type));
307  }
308 
309  inline ndVector MulSub(const ndVector& A, const ndVector& B) const
310  {
311  return _mm_sub_ps(m_type, _mm_mul_ps(A.m_type, B.m_type));
312  }
313 
314  inline ndVector AddHorizontal () const
315  {
316  #ifdef D_USE_SSE3
317  __m128 tmp (_mm_hadd_ps (m_type, m_type));
318  return _mm_hadd_ps (tmp, tmp);
319  #else
320  __m128 tmp (_mm_add_ps (m_type, _mm_shuffle_ps(m_type, m_type, PERMUTE_MASK(1, 0, 3, 2))));
321  return _mm_add_ps(tmp, _mm_shuffle_ps(tmp, tmp, PERMUTE_MASK(2, 3, 0, 1)));
322  #endif
323  }
324 
325  inline ndVector Abs () const
326  {
327  return _mm_and_ps (m_type, m_signMask.m_type);
328  }
329 
330  inline ndVector GetMax() const
331  {
332  __m128 tmp(_mm_max_ps(m_type, _mm_shuffle_ps(m_type, m_type, PERMUTE_MASK(1, 0, 3, 2))));
333  return _mm_max_ps(tmp, _mm_shuffle_ps(tmp, tmp, PERMUTE_MASK(2, 3, 0, 1)));
334  }
335 
336  inline ndVector GetMax (const ndVector& data) const
337  {
338  return _mm_max_ps (m_type, data.m_type);
339  }
340 
341  inline ndVector GetMin (const ndVector& data) const
342  {
343  return _mm_min_ps (m_type, data.m_type);
344  }
345 
346  inline ndVector GetInt () const
347  {
348  return ndVector(_mm_cvtps_epi32(Floor().m_type));
349  }
350 
351  inline ndVector TestZero() const
352  {
353  //return ndVector (_mm_cmpeq_epi32 (m_typeInt, m_zero.m_typeInt)) & m_negOne;
354  return m_negOne & (*this == m_zero);
355  }
356 
357  inline ndVector Floor () const
358  {
359  ndVector truncated (_mm_cvtepi32_ps (_mm_cvttps_epi32 (m_type)));
360  ndVector ret (truncated - (ndVector::m_one & (*this < truncated)));
361  ndAssert (ret.m_f[0] == ndFloor(m_f[0]));
362  ndAssert (ret.m_f[1] == ndFloor(m_f[1]));
363  ndAssert (ret.m_f[2] == ndFloor(m_f[2]));
364  ndAssert (ret.m_f[3] == ndFloor(m_f[3]));
365  return ret;
366  }
367 
368  inline ndVector Sqrt () const
369  {
370  return _mm_sqrt_ps(m_type);
371  }
372 
373  inline ndVector InvSqrt () const
374  {
375  ndVector tmp0 (_mm_rsqrt_ps(m_type));
376  return m_half * tmp0 * (m_three - *this * tmp0 * tmp0);
377  }
378 
379  inline ndVector InvMagSqrt () const
380  {
381  return DotProduct(*this).InvSqrt();
382  }
383 
384  inline ndVector Normalize () const
385  {
386  return Scale(ndFloat32(1.0f) / ndSqrt(DotProduct(*this).GetScalar()));
387  }
388 
389  // relational operators
390  inline ndVector operator> (const ndVector& data) const
391  {
392  return _mm_cmpgt_ps (m_type, data.m_type);
393  }
394 
395  inline ndVector operator== (const ndVector& data) const
396  {
397  return _mm_cmpeq_ps (m_type, data.m_type);
398  }
399 
400  inline ndVector operator< (const ndVector& data) const
401  {
402  return _mm_cmplt_ps (m_type, data.m_type);
403  }
404 
405  inline ndVector operator>= (const ndVector& data) const
406  {
407  return _mm_cmpge_ps (m_type, data.m_type);
408  }
409 
410  inline ndVector operator<= (const ndVector& data) const
411  {
412  return _mm_cmple_ps (m_type, data.m_type);
413  }
414 
415  // logical operations
416  inline ndVector operator& (const ndVector& data) const
417  {
418  return _mm_and_ps (m_type, data.m_type);
419  }
420 
421  inline ndVector operator| (const ndVector& data) const
422  {
423  return _mm_or_ps (m_type, data.m_type);
424  }
425 
426  inline ndVector operator^ (const ndVector& data) const
427  {
428  return _mm_xor_ps (m_type, data.m_type);
429  }
430 
431  inline ndVector AndNot(const ndVector& data) const
432  {
433  return _mm_andnot_ps(data.m_type, m_type);
434  }
435 
436  inline ndVector Select(const ndVector& data, const ndVector& mask) const
437  {
438  // (((b ^ a) & mask)^a)
439  //return _mm_or_ps (_mm_and_ps (mask.m_type, data.m_type), _mm_andnot_ps(mask.m_type, m_type));
440  return _mm_xor_ps(m_type, _mm_and_ps (mask.m_type, _mm_xor_ps(m_type, data.m_type)));
441  }
442 
443  inline ndInt32 GetSignMask() const
444  {
445  return _mm_movemask_ps(m_type);
446  }
447 
448  inline ndVector ShiftRight() const
449  {
450  return _mm_shuffle_ps(m_type, m_type, PERMUTE_MASK(2, 1, 0, 3));
451  }
452 
453  inline ndVector ShiftTripleRight () const
454  {
455  return _mm_shuffle_ps(m_type, m_type, PERMUTE_MASK(3, 1, 0, 2));
456  }
457 
458  inline ndVector ShiftTripleLeft () const
459  {
460  return _mm_shuffle_ps (m_type, m_type, PERMUTE_MASK(3, 0, 2, 1));
461  }
462 
463  inline ndVector ShiftRightLogical (ndInt32 bits) const
464  {
465  return ndVector (_mm_srli_epi32(m_typeInt, bits));
466  }
467 
468  inline static void Transpose4x4 (ndVector& dst0, ndVector& dst1, ndVector& dst2, ndVector& dst3, const ndVector& src0, const ndVector& src1, const ndVector& src2, const ndVector& src3)
469  {
470  __m128 tmp0 (_mm_unpacklo_ps (src0.m_type, src1.m_type));
471  __m128 tmp1 (_mm_unpacklo_ps (src2.m_type, src3.m_type));
472  __m128 tmp2 (_mm_unpackhi_ps (src0.m_type, src1.m_type));
473  __m128 tmp3 (_mm_unpackhi_ps (src2.m_type, src3.m_type));
474 
475  dst0 = ndVector (_mm_movelh_ps (tmp0, tmp1));
476  dst1 = ndVector (_mm_movehl_ps (tmp1, tmp0));
477  dst2 = ndVector (_mm_movelh_ps (tmp2, tmp3));
478  dst3 = ndVector (_mm_movehl_ps (tmp3, tmp2));
479  }
480 
481 #ifdef _DEBUG
482  //inline void Trace(char* const name) const
483  inline void Trace(char* const) const
484  {
485  ndAssert(0);
486  //dTrace(("%s %f %f %f %f\n", name, m_x, m_y, m_z, m_w));
487  }
488 #else
489  inline void Trace(char* const) const {}
490 #endif
491 
492  union
493  {
494  ndFloat32 m_f[4];
495  ndInt32 m_i[4];
496  __m128 m_type;
497  __m128i m_typeInt;
498  struct
499  {
500  ndFloat32 m_x;
501  ndFloat32 m_y;
502  ndFloat32 m_z;
503  ndFloat32 m_w;
504  };
505  struct
506  {
507  ndInt32 m_ix;
508  ndInt32 m_iy;
509  ndInt32 m_iz;
510  ndInt32 m_iw;
511  };
512  };
513 
514  D_CORE_API static ndVector m_zero;
515  D_CORE_API static ndVector m_one;
516  D_CORE_API static ndVector m_wOne;
517  D_CORE_API static ndVector m_two;
518  D_CORE_API static ndVector m_half;
519  D_CORE_API static ndVector m_three;
520  D_CORE_API static ndVector m_negOne;
521  D_CORE_API static ndVector m_xMask;
522  D_CORE_API static ndVector m_yMask;
523  D_CORE_API static ndVector m_zMask;
524  D_CORE_API static ndVector m_wMask;
525  D_CORE_API static ndVector m_xyzwMask;
526  D_CORE_API static ndVector m_epsilon;
527  D_CORE_API static ndVector m_signMask;
528  D_CORE_API static ndVector m_triplexMask;
529 } D_GCC_NEWTON_ALIGN_16 ;
530 #endif
531 
532 
533 // *****************************************************************************************
534 //
535 // 4 x 1 double precision SSE2 vector class declaration
536 //
537 // *****************************************************************************************
538 D_MSV_NEWTON_ALIGN_32
539 class ndBigVector
540 {
541  #define PERMUT_MASK_DOUBLE(y, x) _MM_SHUFFLE2 (y, x)
542 
543  public:
544  D_OPERATOR_NEW_AND_DELETE
545 
546  inline ndBigVector()
547  {
548  }
549 
550  inline ndBigVector(const ndBigVector& copy)
551  :m_typeLow(copy.m_typeLow)
552  ,m_typeHigh(copy.m_typeHigh)
553  {
554  }
555 
556  inline ndBigVector(const __m128d typeLow, const __m128d typeHigh)
557  :m_typeLow(typeLow)
558  ,m_typeHigh(typeHigh)
559  {
560  }
561 
562  inline ndBigVector(const __m128i typeLow, const __m128i typeHigh)
563  :m_typeIntLow(typeLow)
564  ,m_typeIntHigh(typeHigh)
565  {
566  }
567 
568  inline ndBigVector(const ndFloat64 a)
569  :m_typeLow(_mm_set1_pd(a))
570  ,m_typeHigh(_mm_set1_pd(a))
571  {
572  }
573 
574  inline ndBigVector(const ndFloat64* const baseAddr, const ndInt64* const index)
575  :m_x(baseAddr[index[0]])
576  ,m_y(baseAddr[index[1]])
577  ,m_z(baseAddr[index[2]])
578  ,m_w(baseAddr[index[3]])
579  {
580  }
581 
582 #ifdef D_NEWTON_USE_DOUBLE
583  inline ndBigVector (const ndFloat32* const ptr)
584  :m_typeLow(_mm_loadu_pd(ptr))
585  ,m_typeHigh(_mm_loadu_pd(&ptr[2]))
586  {
587  }
588 #else
589 
590  inline ndBigVector(const ndVector& v)
591  :m_typeLow(_mm_cvtps_pd (v.m_type))
592  ,m_typeHigh(_mm_cvtps_pd (_mm_shuffle_ps (v.m_type, v.m_type, PERMUTE_MASK(3, 2, 3, 2))))
593  {
594  ndAssert(ndCheckVector((*this)));
595  }
596 
597  inline ndBigVector(const ndFloat64* const ptr)
598  :m_typeLow(_mm_loadu_pd(ptr))
599  ,m_typeHigh(_mm_loadu_pd(&ptr[2]))
600  {
601  }
602 #endif
603 
604  inline ndBigVector(ndFloat64 x, ndFloat64 y, ndFloat64 z, ndFloat64 w)
605  :m_typeLow(_mm_set_pd(y, x))
606  ,m_typeHigh(_mm_set_pd(w, z))
607  {
608  }
609 
610  inline ndBigVector(ndInt32 ix, ndInt32 iy, ndInt32 iz, ndInt32 iw)
611  :m_ix(ndInt64(ix)), m_iy(ndInt64(iy)), m_iz(ndInt64(iz)), m_iw(ndInt64(iw))
612  {
613  }
614 
615  inline ndBigVector(ndInt64 ix, ndInt64 iy, ndInt64 iz, ndInt64 iw)
616  :m_ix(ix), m_iy(iy), m_iz(iz), m_iw(iw)
617  {
618  }
619 
620  inline ndFloat64 GetX() const
621  {
622  return m_x;
623  }
624 
625  inline ndFloat64 GetY() const
626  {
627  return m_y;
628  }
629 
630  inline ndFloat64 GetZ() const
631  {
632  return m_z;
633  }
634 
635  inline ndFloat64 GetW() const
636  {
637  return m_w;
638  }
639 
640  inline void SetX(ndFloat64 x)
641  {
642  m_x = x;
643  }
644 
645  inline void SetY(ndFloat64 x)
646  {
647  m_y = x;
648  }
649 
650  inline void SetZ(ndFloat64 x)
651  {
652  m_z = x;
653  }
654 
655  inline void SetW(ndFloat64 x)
656  {
657  m_w = x;
658  }
659 
660  inline ndFloat64 GetScalar() const
661  {
662  //return m_x;
663  return _mm_cvtsd_f64(m_typeLow);
664  }
665 
666  inline ndFloat64& operator[] (ndInt32 i)
667  {
668  ndAssert(i < 4);
669  ndAssert(i >= 0);
670  return m_f[i];
671  }
672 
673  inline const ndFloat64& operator[] (ndInt32 i) const
674  {
675  ndAssert(i < 4);
676  ndAssert(i >= 0);
677  return m_f[i];
678  }
679 
680  inline ndBigVector operator+ (const ndBigVector& A) const
681  {
682  return ndBigVector(_mm_add_pd(m_typeLow, A.m_typeLow), _mm_add_pd(m_typeHigh, A.m_typeHigh));
683  }
684 
685  inline ndBigVector operator- (const ndBigVector& A) const
686  {
687  return ndBigVector(_mm_sub_pd(m_typeLow, A.m_typeLow), _mm_sub_pd(m_typeHigh, A.m_typeHigh));
688  }
689 
690  inline ndBigVector operator* (const ndBigVector& A) const
691  {
692  return ndBigVector(_mm_mul_pd(m_typeLow, A.m_typeLow), _mm_mul_pd(m_typeHigh, A.m_typeHigh));
693  }
694 
695  inline ndBigVector& operator+= (const ndBigVector& A)
696  {
697  m_typeLow = _mm_add_pd(m_typeLow, A.m_typeLow);
698  m_typeHigh = _mm_add_pd(m_typeHigh, A.m_typeHigh);
699  return *this;
700  }
701 
702  inline ndBigVector& operator-= (const ndBigVector& A)
703  {
704  m_typeLow = _mm_sub_pd(m_typeLow, A.m_typeLow);
705  m_typeHigh = _mm_sub_pd(m_typeHigh, A.m_typeHigh);
706  return *this;
707  }
708 
709  inline ndBigVector& operator*= (const ndBigVector& A)
710  {
711  m_typeLow = _mm_mul_pd(m_typeLow, A.m_typeLow);
712  m_typeHigh = _mm_mul_pd(m_typeHigh, A.m_typeHigh);
713  return *this;
714  }
715 
716  inline ndBigVector MulAdd(const ndBigVector& A, const ndBigVector& B) const
717  {
718  return *this + A * B;
719  }
720 
721  inline ndBigVector MulSub(const ndBigVector& A, const ndBigVector& B) const
722  {
723  return *this - A * B;
724  }
725 
726  inline ndBigVector AddHorizontal() const
727  {
728  __m128d tmp0(_mm_add_pd(m_typeHigh, m_typeLow));
729  #ifdef D_USE_SSE3
730  __m128d tmp1(_mm_hadd_pd(tmp0, tmp0));
731  #else
732  __m128d tmp1(_mm_add_pd(tmp0, _mm_shuffle_pd(tmp0, tmp0, PERMUT_MASK_DOUBLE(0, 1))));
733  #endif
734  return ndBigVector(tmp1, tmp1);
735  }
736 
737  inline ndBigVector BroadcastX() const
738  {
739  return ndBigVector(m_x);
740  }
741 
742  inline ndBigVector BroadcastY() const
743  {
744  return ndBigVector(m_y);
745  }
746 
747  inline ndBigVector BroadcastZ() const
748  {
749  return ndBigVector(m_z);
750  }
751 
752  inline ndBigVector BroadcastW() const
753  {
754  return ndBigVector(m_w);
755  }
756 
757  inline ndBigVector Scale(ndFloat64 s) const
758  {
759  __m128d tmp0(_mm_set1_pd(s));
760  return ndBigVector(_mm_mul_pd(m_typeLow, tmp0), _mm_mul_pd(m_typeHigh, tmp0));
761  }
762 
763  inline ndBigVector Abs() const
764  {
765  return ndBigVector(_mm_and_pd(m_typeLow, m_signMask.m_typeLow), _mm_and_pd(m_typeHigh, m_signMask.m_typeLow));
766  }
767 
768  inline ndBigVector Reciproc() const
769  {
770  return ndBigVector(_mm_div_pd(m_one.m_typeLow, m_typeLow), _mm_div_pd(m_one.m_typeHigh, m_typeHigh));
771  }
772 
773  inline ndBigVector Sqrt() const
774  {
775  return ndBigVector(_mm_sqrt_pd(m_typeLow), _mm_sqrt_pd(m_typeHigh));
776  }
777 
778  inline ndBigVector InvSqrt() const
779  {
780  return Sqrt().Reciproc();
781  }
782 
783  inline ndBigVector InvMagSqrt() const
784  {
785  return DotProduct(*this).InvSqrt();
786  }
787 
788  inline ndBigVector Normalize() const
789  {
790  ndFloat64 mag2 = DotProduct(*this).GetScalar();
791  return Scale(ndFloat64 (1.0f) / sqrt (mag2));
792  }
793 
794  inline ndBigVector GetMax() const
795  {
796  __m128d tmp(_mm_max_pd(m_typeLow, m_typeHigh));
797  tmp = _mm_max_pd(tmp, _mm_shuffle_pd(tmp, tmp, PERMUT_MASK_DOUBLE(0, 1)));
798  return ndBigVector(tmp, tmp);
799  }
800 
801  inline ndBigVector GetMax(const ndBigVector& data) const
802  {
803  return ndBigVector(_mm_max_pd(m_typeLow, data.m_typeLow), _mm_max_pd(m_typeHigh, data.m_typeHigh));
804  }
805 
806  inline ndBigVector GetMin(const ndBigVector& data) const
807  {
808  return ndBigVector(_mm_min_pd(m_typeLow, data.m_typeLow), _mm_min_pd(m_typeHigh, data.m_typeHigh));
809  }
810 
811  inline ndBigVector GetInt() const
812  {
813  ndBigVector temp(Floor());
814  ndInt64 x = _mm_cvtsd_si32(temp.m_typeLow);
815  ndInt64 y = _mm_cvtsd_si32(_mm_shuffle_pd(temp.m_typeLow, temp.m_typeLow, PERMUT_MASK_DOUBLE(1, 1)));
816  ndInt64 z = _mm_cvtsd_si32(temp.m_typeHigh);
817  ndInt64 w = _mm_cvtsd_si32(_mm_shuffle_pd(temp.m_typeHigh, temp.m_typeHigh, PERMUT_MASK_DOUBLE(1, 1)));
818  return ndBigVector(_mm_set_epi64x(y, x), _mm_set_epi64x(w, z));
819  }
820 
821  // relational operators
822  inline ndBigVector operator> (const ndBigVector& data) const
823  {
824  return ndBigVector(_mm_cmpgt_pd(m_typeLow, data.m_typeLow), _mm_cmpgt_pd(m_typeHigh, data.m_typeHigh));
825  }
826 
827  inline ndBigVector operator== (const ndBigVector& data) const
828  {
829  return ndBigVector(_mm_cmpeq_pd(m_typeLow, data.m_typeLow), _mm_cmpeq_pd(m_typeHigh, data.m_typeHigh));
830  }
831 
832  inline ndBigVector operator< (const ndBigVector& data) const
833  {
834  return ndBigVector(_mm_cmplt_pd(m_typeLow, data.m_typeLow), _mm_cmplt_pd(m_typeHigh, data.m_typeHigh));
835  }
836 
837  inline ndBigVector operator>= (const ndBigVector& data) const
838  {
839  return ndBigVector(_mm_cmpge_pd(m_typeLow, data.m_typeLow), _mm_cmpge_pd(m_typeHigh, data.m_typeHigh));
840  }
841 
842  inline ndBigVector operator<= (const ndBigVector& data) const
843  {
844  return ndBigVector(_mm_cmple_pd(m_typeLow, data.m_typeLow), _mm_cmple_pd(m_typeHigh, data.m_typeHigh));
845  }
846 
847  // logical operations
848  inline ndBigVector operator& (const ndBigVector& data) const
849  {
850  return ndBigVector(_mm_and_pd(m_typeLow, data.m_typeLow), _mm_and_pd(m_typeHigh, data.m_typeHigh));
851  }
852 
853  inline ndBigVector operator| (const ndBigVector& data) const
854  {
855  return ndBigVector(_mm_or_pd(m_typeLow, data.m_typeLow), _mm_or_pd(m_typeHigh, data.m_typeHigh));
856  }
857 
858  inline ndBigVector operator^ (const ndBigVector& data) const
859  {
860  return ndBigVector(_mm_xor_pd(m_typeLow, data.m_typeLow), _mm_xor_pd(m_typeHigh, data.m_typeHigh));
861  }
862 
863  inline ndBigVector AndNot(const ndBigVector& data) const
864  {
865  return ndBigVector(_mm_andnot_pd(data.m_typeLow, m_typeLow), _mm_andnot_pd(data.m_typeHigh, m_typeHigh));
866  }
867 
868  inline ndBigVector Select(const ndBigVector& data, const ndBigVector& mask) const
869  {
870  // (((b ^ a) & mask)^a)
871  return ndBigVector(_mm_xor_pd(m_typeLow, _mm_and_pd(mask.m_typeLow, _mm_xor_pd(m_typeLow, data.m_typeLow))),
872  _mm_xor_pd(m_typeHigh, _mm_and_pd(mask.m_typeHigh, _mm_xor_pd(m_typeHigh, data.m_typeHigh))));
873  }
874 
875  inline ndBigVector ShiftRight() const
876  {
877  //return ndBigVector (m_w, m_x, m_y, m_z);
878  return ndBigVector(_mm_shuffle_pd(m_typeHigh, m_typeLow, PERMUT_MASK_DOUBLE(0, 1)), _mm_shuffle_pd(m_typeLow, m_typeHigh, PERMUT_MASK_DOUBLE(0, 1)));
879  }
880 
881  inline ndBigVector ShiftTripleRight() const
882  {
883  return ndBigVector(_mm_shuffle_pd(m_typeHigh, m_typeLow, PERMUT_MASK_DOUBLE(0, 0)), _mm_shuffle_pd(m_typeLow, m_typeHigh, PERMUT_MASK_DOUBLE(1, 1)));
884  }
885 
886  inline ndBigVector ShiftTripleLeft() const
887  {
888  return ndBigVector(_mm_shuffle_pd(m_typeLow, m_typeHigh, PERMUT_MASK_DOUBLE(0, 1)), _mm_shuffle_pd(m_typeLow, m_typeHigh, PERMUT_MASK_DOUBLE(1, 0)));
889  }
890 
891  inline ndBigVector ShiftRightLogical(ndInt32 bits) const
892  {
893  return ndBigVector(_mm_srli_epi64(m_typeIntLow, bits), _mm_srli_epi64(m_typeIntHigh, bits));
894  }
895 
896  inline ndInt32 GetSignMask() const
897  {
898  return _mm_movemask_pd(m_typeLow) | (_mm_movemask_pd(m_typeHigh) << 2);
899  }
900 
901  inline ndBigVector Floor() const
902  {
903  return ndBigVector(floor(m_x), floor(m_y), floor(m_z), floor(m_w));
904  }
905 
906  inline ndBigVector TestZero() const
907  {
908  return m_negOne & (*this == m_zero);
909  }
910 
911  inline static void Transpose4x4(ndBigVector& dst0, ndBigVector& dst1, ndBigVector& dst2, ndBigVector& dst3,
912  const ndBigVector& src0, const ndBigVector& src1, const ndBigVector& src2, const ndBigVector& src3)
913  {
914  ndBigVector tmp0(src0);
915  ndBigVector tmp1(src1);
916  ndBigVector tmp2(src2);
917  ndBigVector tmp3(src3);
918 
919  dst0 = ndBigVector(tmp0.m_x, tmp1.m_x, tmp2.m_x, tmp3.m_x);
920  dst1 = ndBigVector(tmp0.m_y, tmp1.m_y, tmp2.m_y, tmp3.m_y);
921  dst2 = ndBigVector(tmp0.m_z, tmp1.m_z, tmp2.m_z, tmp3.m_z);
922  dst3 = ndBigVector(tmp0.m_w, tmp1.m_w, tmp2.m_w, tmp3.m_w);
923  }
924 
925  // return dot 4d dot product
926  inline ndBigVector DotProduct(const ndBigVector &A) const
927  {
928  //const ndBigVector tmp(_mm_mul_pd(m_typeLow, A.m_typeLow), _mm_mul_pd(m_typeHigh, A.m_typeHigh));
929  //return tmp.AddHorizontal();
930  return (*this * A).AddHorizontal();
931  }
932 
933  // return 3d cross product
934  inline ndBigVector CrossProduct(const ndBigVector& B) const
935  {
936  return ndBigVector(m_y * B.m_z - m_z * B.m_y, m_z * B.m_x - m_x * B.m_z, m_x * B.m_y - m_y * B.m_x, m_w);
937  }
938 
939  // return 4d cross product
940  inline ndBigVector CrossProduct(const ndBigVector& A, const ndBigVector& B) const
941  {
942  ndFloat64 cofactor[3][3];
943  ndFloat64 array[4][4];
944 
945  const ndBigVector& me = *this;
946  for (ndInt32 i = 0; i < 4; ++i)
947  {
948  array[0][i] = me[i];
949  array[1][i] = A[i];
950  array[2][i] = B[i];
951  array[3][i] = ndFloat64(1.0f);
952  }
953 
954  ndBigVector normal;
955  ndFloat64 sign = ndFloat64(-1.0f);
956  for (ndInt32 i = 0; i < 4; ++i)
957  {
958  for (ndInt32 j = 0; j < 3; ++j)
959  {
960  ndInt32 k0 = 0;
961  for (ndInt32 k = 0; k < 4; ++k)
962  {
963  if (k != i)
964  {
965  cofactor[j][k0] = array[j][k];
966  k0++;
967  }
968  }
969  }
970  ndFloat64 x = cofactor[0][0] * (cofactor[1][1] * cofactor[2][2] - cofactor[1][2] * cofactor[2][1]);
971  ndFloat64 y = cofactor[0][1] * (cofactor[1][2] * cofactor[2][0] - cofactor[1][0] * cofactor[2][2]);
972  ndFloat64 z = cofactor[0][2] * (cofactor[1][0] * cofactor[2][1] - cofactor[1][1] * cofactor[2][0]);
973  ndFloat64 det = x + y + z;
974 
975  normal[i] = sign * det;
976  sign *= ndFloat64(-1.0f);
977  }
978 
979  return normal;
980  }
981 
982  union
983  {
984  ndFloat64 m_f[4];
985  ndInt64 m_i[4];
986  struct
987  {
988  __m128d m_typeLow;
989  __m128d m_typeHigh;
990  };
991  struct
992  {
993  __m128i m_typeIntLow;
994  __m128i m_typeIntHigh;
995  };
996  struct
997  {
998  ndFloat64 m_x;
999  ndFloat64 m_y;
1000  ndFloat64 m_z;
1001  ndFloat64 m_w;
1002  };
1003  struct
1004  {
1005  ndInt64 m_ix;
1006  ndInt64 m_iy;
1007  ndInt64 m_iz;
1008  ndInt64 m_iw;
1009  };
1010  };
1011 
1012  D_CORE_API static ndBigVector m_zero;
1013  D_CORE_API static ndBigVector m_one;
1014  D_CORE_API static ndBigVector m_wOne;
1015  D_CORE_API static ndBigVector m_two;
1016  D_CORE_API static ndBigVector m_half;
1017  D_CORE_API static ndBigVector m_three;
1018  D_CORE_API static ndBigVector m_negOne;
1019  D_CORE_API static ndBigVector m_xMask;
1020  D_CORE_API static ndBigVector m_yMask;
1021  D_CORE_API static ndBigVector m_zMask;
1022  D_CORE_API static ndBigVector m_wMask;
1023  D_CORE_API static ndBigVector m_xyzwMask;
1024  D_CORE_API static ndBigVector m_epsilon;
1025  D_CORE_API static ndBigVector m_signMask;
1026  D_CORE_API static ndBigVector m_triplexMask;
1027 } D_GCC_NEWTON_ALIGN_32;
1028 
1029 #endif
1030 #endif
ndBigVector
Definition: ndVectorArmNeon.h:463
ndVector
Definition: ndVectorArmNeon.h:41