16#if defined(JPH_USE_AVX)
17 mValue = _mm256_cvtps_pd(inRHS.mValue);
18#elif defined(JPH_USE_SSE)
19 mValue.mLow = _mm_cvtps_pd(inRHS.mValue);
20 mValue.mHigh = _mm_cvtps_pd(_mm_shuffle_ps(inRHS.mValue, inRHS.mValue, _MM_SHUFFLE(2, 2, 2, 2)));
21#elif defined(JPH_USE_NEON)
22 mValue.val[0] = vcvt_f64_f32(vget_low_f32(inRHS.mValue));
23 mValue.val[1] = vcvt_high_f64_f32(inRHS.mValue);
25 mF64[0] = (double)inRHS.GetX();
26 mF64[1] = (double)inRHS.GetY();
27 mF64[2] = (double)inRHS.GetZ();
28 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
41#if defined(JPH_USE_AVX)
42 mValue = _mm256_set_pd(inZ, inZ, inY, inX);
43#elif defined(JPH_USE_SSE)
44 mValue.mLow = _mm_set_pd(inY, inX);
45 mValue.mHigh = _mm_set1_pd(inZ);
46#elif defined(JPH_USE_NEON)
48 mValue.val[1] = vdupq_n_f64(inZ);
53 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
61#if defined(JPH_USE_AVX)
62 Type x = _mm256_castpd128_pd256(_mm_load_sd(&inV.
x));
63 Type y = _mm256_castpd128_pd256(_mm_load_sd(&inV.
y));
64 Type z = _mm256_broadcast_sd(&inV.
z);
65 Type xy = _mm256_unpacklo_pd(x, y);
66 mValue = _mm256_blend_pd(xy, z, 0b1100);
67#elif defined(JPH_USE_SSE)
68 mValue.mLow = _mm_loadu_pd(&inV.
x);
69 mValue.mHigh = _mm_set1_pd(inV.
z);
70#elif defined(JPH_USE_NEON)
71 mValue.val[0] = vld1q_f64(&inV.
x);
72 mValue.val[1] = vdupq_n_f64(inV.
z);
77 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
85#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
94#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
95 #if defined(JPH_USE_AVX)
96 return _mm256_shuffle_pd(inValue, inValue, 2);
97 #elif defined(JPH_USE_SSE)
99 value.mLow = inValue.mLow;
100 value.mHigh = _mm_shuffle_pd(inValue.mHigh, inValue.mHigh, 0);
102 #elif defined(JPH_USE_NEON)
104 value.val[0] = inValue.val[0];
105 value.val[1] = vdupq_laneq_f64(inValue.val[1], 0);
109 value.mData[0] = inValue.mData[0];
110 value.mData[1] = inValue.mData[1];
111 value.mData[2] = inValue.mData[2];
112 value.mData[3] = inValue.mData[2];
122#if defined(JPH_USE_AVX)
123 return _mm256_setzero_pd();
124#elif defined(JPH_USE_SSE)
125 __m128d zero = _mm_setzero_pd();
126 return DVec3({ zero, zero });
127#elif defined(JPH_USE_NEON)
128 float64x2_t zero = vdupq_n_f64(0.0);
129 return DVec3({ zero, zero });
131 return DVec3(0, 0, 0);
137#if defined(JPH_USE_AVX)
138 return _mm256_set1_pd(inV);
139#elif defined(JPH_USE_SSE)
140 __m128d value = _mm_set1_pd(inV);
141 return DVec3({ value, value });
142#elif defined(JPH_USE_NEON)
143 float64x2_t value = vdupq_n_f64(inV);
144 return DVec3({ value, value });
146 return DVec3(inV, inV, inV);
157 return sReplicate(numeric_limits<double>::quiet_NaN());
162#if defined(JPH_USE_AVX)
163 Type v = _mm256_loadu_pd(&inV.
x);
164#elif defined(JPH_USE_SSE)
166 v.mLow = _mm_loadu_pd(&inV.
x);
167 v.mHigh = _mm_set1_pd(inV.
z);
168#elif defined(JPH_USE_NEON)
169 Type v = vld1q_f64_x2(&inV.
x);
171 Type v = { inV.
x, inV.
y, inV.
z };
185#if defined(JPH_USE_AVX)
186 return _mm256_cvtpd_ps(mValue);
187#elif defined(JPH_USE_SSE)
188 __m128 low = _mm_cvtpd_ps(mValue.mLow);
189 __m128 high = _mm_cvtpd_ps(mValue.mHigh);
190 return _mm_shuffle_ps(low, high, _MM_SHUFFLE(1, 0, 1, 0));
191#elif defined(JPH_USE_NEON)
192 return vcvt_high_f32_f64(vcvtx_f32_f64(mValue.val[0]), mValue.val[1]);
194 return Vec3((
float)GetX(), (
float)GetY(), (
float)GetZ());
200#if defined(JPH_USE_AVX)
202#elif defined(JPH_USE_SSE)
204#elif defined(JPH_USE_NEON)
215#if defined(JPH_USE_AVX)
217#elif defined(JPH_USE_SSE)
219#elif defined(JPH_USE_NEON)
230 return sMax(
sMin(inV, inMax), inMin);
235#if defined(JPH_USE_AVX)
236 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_EQ_OQ);
237#elif defined(JPH_USE_SSE)
239#elif defined(JPH_USE_NEON)
240 return DVec3({ vreinterpretq_f64_u64(vceqq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vceqq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
250#if defined(JPH_USE_AVX)
251 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_LT_OQ);
252#elif defined(JPH_USE_SSE)
254#elif defined(JPH_USE_NEON)
255 return DVec3({ vreinterpretq_f64_u64(vcltq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vcltq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
265#if defined(JPH_USE_AVX)
266 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_LE_OQ);
267#elif defined(JPH_USE_SSE)
269#elif defined(JPH_USE_NEON)
270 return DVec3({ vreinterpretq_f64_u64(vcleq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vcleq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
280#if defined(JPH_USE_AVX)
281 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_GT_OQ);
282#elif defined(JPH_USE_SSE)
284#elif defined(JPH_USE_NEON)
285 return DVec3({ vreinterpretq_f64_u64(vcgtq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vcgtq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
295#if defined(JPH_USE_AVX)
296 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_GE_OQ);
297#elif defined(JPH_USE_SSE)
299#elif defined(JPH_USE_NEON)
300 return DVec3({ vreinterpretq_f64_u64(vcgeq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vcgeq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
310#if defined(JPH_USE_AVX)
316#elif defined(JPH_USE_NEON)
319 return inMul1 * inMul2 + inAdd;
325#if defined(JPH_USE_AVX)
327#elif defined(JPH_USE_SSE4_1)
330#elif defined(JPH_USE_NEON)
331 Type v = { vbslq_f64(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_f64(inControl.
mValue.val[0]), 63)), inSet.
mValue.val[0], inNotSet.
mValue.val[0]),
332 vbslq_f64(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_f64(inControl.
mValue.val[1]), 63)), inSet.
mValue.val[1], inNotSet.
mValue.val[1]) };
336 for (
int i = 0; i < 3; i++)
338#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
347#if defined(JPH_USE_AVX)
349#elif defined(JPH_USE_SSE)
351#elif defined(JPH_USE_NEON)
352 return DVec3({ vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[0]), vreinterpretq_u64_f64(inV2.
mValue.val[0]))),
353 vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[1]), vreinterpretq_u64_f64(inV2.
mValue.val[1]))) });
363#if defined(JPH_USE_AVX)
365#elif defined(JPH_USE_SSE)
367#elif defined(JPH_USE_NEON)
368 return DVec3({ vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[0]), vreinterpretq_u64_f64(inV2.
mValue.val[0]))),
369 vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[1]), vreinterpretq_u64_f64(inV2.
mValue.val[1]))) });
379#if defined(JPH_USE_AVX)
381#elif defined(JPH_USE_SSE)
383#elif defined(JPH_USE_NEON)
384 return DVec3({ vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[0]), vreinterpretq_u64_f64(inV2.
mValue.val[0]))),
385 vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[1]), vreinterpretq_u64_f64(inV2.
mValue.val[1]))) });
395#if defined(JPH_USE_AVX)
396 return _mm256_movemask_pd(
mValue) & 0x7;
397#elif defined(JPH_USE_SSE)
398 return (_mm_movemask_pd(
mValue.mLow) + (_mm_movemask_pd(
mValue.mHigh) << 2)) & 0x7;
421 return (inV2 - *
this).LengthSq() <= inMaxDistSq;
431#if defined(JPH_USE_AVX)
433#elif defined(JPH_USE_SSE)
435#elif defined(JPH_USE_NEON)
444#if defined(JPH_USE_AVX)
445 return _mm256_mul_pd(
mValue, _mm256_set1_pd(inV2));
446#elif defined(JPH_USE_SSE)
447 __m128d v = _mm_set1_pd(inV2);
449#elif defined(JPH_USE_NEON)
450 return DVec3({ vmulq_n_f64(
mValue.val[0], inV2), vmulq_n_f64(
mValue.val[1], inV2) });
458#if defined(JPH_USE_AVX)
459 return _mm256_mul_pd(_mm256_set1_pd(inV1), inV2.
mValue);
460#elif defined(JPH_USE_SSE)
461 __m128d v = _mm_set1_pd(inV1);
462 return DVec3({ _mm_mul_pd(v, inV2.
mValue.mLow), _mm_mul_pd(v, inV2.
mValue.mHigh) });
463#elif defined(JPH_USE_NEON)
464 return DVec3({ vmulq_n_f64(inV2.
mValue.val[0], inV1), vmulq_n_f64(inV2.
mValue.val[1], inV1) });
472#if defined(JPH_USE_AVX)
473 return _mm256_div_pd(
mValue, _mm256_set1_pd(inV2));
474#elif defined(JPH_USE_SSE)
475 __m128d v = _mm_set1_pd(inV2);
477#elif defined(JPH_USE_NEON)
478 float64x2_t v = vdupq_n_f64(inV2);
487#if defined(JPH_USE_AVX)
489#elif defined(JPH_USE_SSE)
490 __m128d v = _mm_set1_pd(inV2);
493#elif defined(JPH_USE_NEON)
497 for (
int i = 0; i < 3; ++i)
499 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
508#if defined(JPH_USE_AVX)
510#elif defined(JPH_USE_SSE)
513#elif defined(JPH_USE_NEON)
517 for (
int i = 0; i < 3; ++i)
519 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
528#if defined(JPH_USE_AVX)
530#elif defined(JPH_USE_SSE)
531 __m128d v = _mm_set1_pd(inV2);
534#elif defined(JPH_USE_NEON)
535 float64x2_t v = vdupq_n_f64(inV2);
539 for (
int i = 0; i < 3; ++i)
541 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
550#if defined(JPH_USE_AVX)
551 return _mm256_add_pd(
mValue, _mm256_cvtps_pd(inV2.
mValue));
552#elif defined(JPH_USE_SSE)
553 return DVec3({ _mm_add_pd(
mValue.mLow, _mm_cvtps_pd(inV2.
mValue)), _mm_add_pd(
mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.
mValue, inV2.
mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
554#elif defined(JPH_USE_NEON)
555 return DVec3({ vaddq_f64(
mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.
mValue))), vaddq_f64(
mValue.val[1], vcvt_high_f64_f32(inV2.
mValue)) });
563#if defined(JPH_USE_AVX)
565#elif defined(JPH_USE_SSE)
567#elif defined(JPH_USE_NEON)
576#if defined(JPH_USE_AVX)
578#elif defined(JPH_USE_SSE)
580 mValue.mHigh = _mm_add_pd(
mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.
mValue, inV2.
mValue, _MM_SHUFFLE(2, 2, 2, 2))));
581#elif defined(JPH_USE_NEON)
585 for (
int i = 0; i < 3; ++i)
587 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
596#if defined(JPH_USE_AVX)
598#elif defined(JPH_USE_SSE)
601#elif defined(JPH_USE_NEON)
605 for (
int i = 0; i < 3; ++i)
607 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
616#if defined(JPH_USE_AVX)
617 return _mm256_sub_pd(_mm256_setzero_pd(),
mValue);
618#elif defined(JPH_USE_SSE)
619 __m128d zero = _mm_setzero_pd();
621#elif defined(JPH_USE_NEON)
622 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
623 float64x2_t zero = vdupq_n_f64(0);
624 return DVec3({ vsubq_f64(zero,
mValue.val[0]), vsubq_f64(zero,
mValue.val[1]) });
629 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
639#if defined(JPH_USE_AVX)
640 return _mm256_sub_pd(
mValue, _mm256_cvtps_pd(inV2.
mValue));
641#elif defined(JPH_USE_SSE)
642 return DVec3({ _mm_sub_pd(
mValue.mLow, _mm_cvtps_pd(inV2.
mValue)), _mm_sub_pd(
mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.
mValue, inV2.
mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
643#elif defined(JPH_USE_NEON)
644 return DVec3({ vsubq_f64(
mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.
mValue))), vsubq_f64(
mValue.val[1], vcvt_high_f64_f32(inV2.
mValue)) });
652#if defined(JPH_USE_AVX)
654#elif defined(JPH_USE_SSE)
656#elif defined(JPH_USE_NEON)
665#if defined(JPH_USE_AVX)
667#elif defined(JPH_USE_SSE)
669 mValue.mHigh = _mm_sub_pd(
mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.
mValue, inV2.
mValue, _MM_SHUFFLE(2, 2, 2, 2))));
670#elif defined(JPH_USE_NEON)
674 for (
int i = 0; i < 3; ++i)
676 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
685#if defined(JPH_USE_AVX)
687#elif defined(JPH_USE_SSE)
690#elif defined(JPH_USE_NEON)
694 for (
int i = 0; i < 3; ++i)
696 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
706#if defined(JPH_USE_AVX)
708#elif defined(JPH_USE_SSE)
710#elif defined(JPH_USE_NEON)
719#if defined(JPH_USE_AVX512)
721#elif defined(JPH_USE_AVX)
722 return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(),
mValue),
mValue);
723#elif defined(JPH_USE_SSE)
724 __m128d zero = _mm_setzero_pd();
726#elif defined(JPH_USE_NEON)
740#if defined(JPH_USE_AVX2)
741 __m256d t1 = _mm256_permute4x64_pd(inV2.
mValue, _MM_SHUFFLE(0, 0, 2, 1));
742 t1 = _mm256_mul_pd(t1,
mValue);
743 __m256d t2 = _mm256_permute4x64_pd(
mValue, _MM_SHUFFLE(0, 0, 2, 1));
744 t2 = _mm256_mul_pd(t2, inV2.
mValue);
745 __m256d t3 = _mm256_sub_pd(t1, t2);
746 return _mm256_permute4x64_pd(t3, _MM_SHUFFLE(0, 0, 2, 1));
756#if defined(JPH_USE_AVX)
758 __m128d xy = _mm256_castpd256_pd128(mul);
759 __m128d yx = _mm_shuffle_pd(xy, xy, 1);
760 __m128d sum = _mm_add_pd(xy, yx);
761 __m128d zw = _mm256_extractf128_pd(mul, 1);
762 sum = _mm_add_pd(sum, zw);
763 return _mm_cvtsd_f64(sum);
764#elif defined(JPH_USE_SSE)
766 __m128d yx = _mm_shuffle_pd(xy, xy, 1);
767 __m128d sum = _mm_add_pd(xy, yx);
768 __m128d z = _mm_mul_sd(
mValue.mHigh, inV2.
mValue.mHigh);
769 sum = _mm_add_pd(sum, z);
770 return _mm_cvtsd_f64(sum);
771#elif defined(JPH_USE_NEON)
772 float64x2_t mul_low = vmulq_f64(
mValue.val[0], inV2.
mValue.val[0]);
773 float64x2_t mul_high = vmulq_f64(
mValue.val[1], inV2.
mValue.val[1]);
774 return vaddvq_f64(mul_low) + vgetq_lane_f64(mul_high, 0);
777 for (
int i = 0; i < 3; i++)
790#if defined(JPH_USE_AVX)
791 return _mm256_sqrt_pd(
mValue);
792#elif defined(JPH_USE_SSE)
794#elif defined(JPH_USE_NEON)
803 return sqrt(
Dot(*
this));
813 return abs(
LengthSq() - 1.0) <= inTolerance;
818#if defined(JPH_USE_AVX512)
819 return (_mm256_fpclass_pd_mask(
mValue, 0b10000001) & 0x7) != 0;
820#elif defined(JPH_USE_AVX)
821 return (_mm256_movemask_pd(_mm256_cmp_pd(
mValue,
mValue, _CMP_UNORD_Q)) & 0x7) != 0;
822#elif defined(JPH_USE_SSE)
823 return ((_mm_movemask_pd(_mm_cmpunord_pd(
mValue.mLow,
mValue.mLow)) + (_mm_movemask_pd(_mm_cmpunord_pd(
mValue.mHigh,
mValue.mHigh)) << 2)) & 0x7) != 0;
825 return isnan(
mF64[0]) || isnan(
mF64[1]) || isnan(
mF64[2]);
831#if defined(JPH_USE_AVX512)
832 return _mm256_fixupimm_pd(
mValue,
mValue, _mm256_set1_epi32(0xA9A90A00), 0);
833#elif defined(JPH_USE_AVX)
834 __m256d minus_one = _mm256_set1_pd(-1.0);
835 __m256d one = _mm256_set1_pd(1.0);
836 return _mm256_or_pd(_mm256_and_pd(
mValue, minus_one), one);
837#elif defined(JPH_USE_SSE)
838 __m128d minus_one = _mm_set1_pd(-1.0);
839 __m128d one = _mm_set1_pd(1.0);
840 return DVec3({ _mm_or_pd(_mm_and_pd(
mValue.mLow, minus_one), one), _mm_or_pd(_mm_and_pd(
mValue.mHigh, minus_one), one) });
841#elif defined(JPH_USE_NEON)
842 uint64x2_t minus_one = vreinterpretq_u64_f64(vdupq_n_f64(-1.0f));
843 uint64x2_t one = vreinterpretq_u64_f64(vdupq_n_f64(1.0f));
844 return DVec3({ vreinterpretq_f64_u64(vorrq_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[0]), minus_one), one)),
845 vreinterpretq_f64_u64(vorrq_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[1]), minus_one), one)) });
847 return DVec3(std::signbit(
mF64[0])? -1.0 : 1.0,
848 std::signbit(
mF64[1])? -1.0 : 1.0,
849 std::signbit(
mF64[2])? -1.0 : 1.0);
856 constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
858#if defined(JPH_USE_AVX)
859 return _mm256_and_pd(
mValue, _mm256_castsi256_pd(_mm256_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss))));
860#elif defined(JPH_USE_SSE)
861 __m128d mask = _mm_castsi128_pd(_mm_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss)));
863#elif defined(JPH_USE_NEON)
864 uint64x2_t mask = vdupq_n_u64(~cDoubleToFloatMantissaLoss);
865 return DVec3({ vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[0]), mask)),
866 vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[1]), mask)) });
872 return DVec3(x, y, z);
879 constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
881#if defined(JPH_USE_AVX512)
882 __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
883 __mmask8 is_zero = _mm256_testn_epi64_mask(_mm256_castpd_si256(
mValue), mantissa_loss);
884 __m256d value_or_mantissa_loss = _mm256_or_pd(
mValue, _mm256_castsi256_pd(mantissa_loss));
885 return _mm256_mask_blend_pd(is_zero, value_or_mantissa_loss,
mValue);
886#elif defined(JPH_USE_AVX)
887 __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
888 __m256d value_and_mantissa_loss = _mm256_and_pd(
mValue, _mm256_castsi256_pd(mantissa_loss));
889 __m256d is_zero = _mm256_cmp_pd(value_and_mantissa_loss, _mm256_setzero_pd(), _CMP_EQ_OQ);
890 __m256d value_or_mantissa_loss = _mm256_or_pd(
mValue, _mm256_castsi256_pd(mantissa_loss));
891 return _mm256_blendv_pd(value_or_mantissa_loss,
mValue, is_zero);
892#elif defined(JPH_USE_SSE4_1)
893 __m128i mantissa_loss = _mm_set1_epi64x(cDoubleToFloatMantissaLoss);
894 __m128d zero = _mm_setzero_pd();
895 __m128d value_and_mantissa_loss_low = _mm_and_pd(
mValue.mLow, _mm_castsi128_pd(mantissa_loss));
896 __m128d is_zero_low = _mm_cmpeq_pd(value_and_mantissa_loss_low, zero);
897 __m128d value_or_mantissa_loss_low = _mm_or_pd(
mValue.mLow, _mm_castsi128_pd(mantissa_loss));
898 __m128d value_and_mantissa_loss_high = _mm_and_pd(
mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
899 __m128d is_zero_high = _mm_cmpeq_pd(value_and_mantissa_loss_high, zero);
900 __m128d value_or_mantissa_loss_high = _mm_or_pd(
mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
901 return DVec3({ _mm_blendv_pd(value_or_mantissa_loss_low,
mValue.mLow, is_zero_low), _mm_blendv_pd(value_or_mantissa_loss_high,
mValue.mHigh, is_zero_high) });
902#elif defined(JPH_USE_NEON)
903 uint64x2_t mantissa_loss = vdupq_n_u64(cDoubleToFloatMantissaLoss);
904 float64x2_t zero = vdupq_n_f64(0.0);
905 float64x2_t value_and_mantissa_loss_low = vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[0]), mantissa_loss));
906 uint64x2_t is_zero_low = vceqq_f64(value_and_mantissa_loss_low, zero);
907 float64x2_t value_or_mantissa_loss_low = vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(
mValue.val[0]), mantissa_loss));
908 float64x2_t value_and_mantissa_loss_high = vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[1]), mantissa_loss));
909 float64x2_t value_low = vbslq_f64(is_zero_low,
mValue.val[0], value_or_mantissa_loss_low);
910 uint64x2_t is_zero_high = vceqq_f64(value_and_mantissa_loss_high, zero);
911 float64x2_t value_or_mantissa_loss_high = vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(
mValue.val[1]), mantissa_loss));
912 float64x2_t value_high = vbslq_f64(is_zero_high,
mValue.val[1], value_or_mantissa_loss_high);
913 return DVec3({ value_low, value_high });
919 double x =
BitCast<double>((ux & cDoubleToFloatMantissaLoss) == 0? ux : (ux | cDoubleToFloatMantissaLoss));
920 double y =
BitCast<double>((uy & cDoubleToFloatMantissaLoss) == 0? uy : (uy | cDoubleToFloatMantissaLoss));
921 double z =
BitCast<double>((uz & cDoubleToFloatMantissaLoss) == 0? uz : (uz | cDoubleToFloatMantissaLoss));
923 return DVec3(x, y, z);
std::uint64_t uint64
Definition Core.h:485
#define JPH_NAMESPACE_END
Definition Core.h:414
#define JPH_NAMESPACE_BEGIN
Definition Core.h:408
DVec3 operator*(double inV1, DVec3Arg inV2)
Definition DVec3.inl:456
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:223
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
JPH_INLINE To BitCast(const From &inValue)
Definition Math.h:192
static JPH_INLINE DVec3 sLess(DVec3Arg inV1, DVec3Arg inV2)
Less than (component wise)
Definition DVec3.inl:248
double mF64[4]
Definition DVec3.h:283
static JPH_INLINE DVec3 sMax(DVec3Arg inV1, DVec3Arg inV2)
Return the maximum of each of the components.
Definition DVec3.inl:213
JPH_INLINE bool TestAnyTrue() const
Test if any of the components are true (true is when highest bit of component is set)
Definition DVec3.inl:404
JPH_INLINE Vec3 ToVec3RoundDown() const
Convert to float vector 3 rounding down.
Definition DVec3.inl:927
static JPH_INLINE DVec3 sClamp(DVec3Arg inV, DVec3Arg inMin, DVec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition DVec3.inl:228
static JPH_INLINE DVec3 sMin(DVec3Arg inV1, DVec3Arg inV2)
Return the minimum value of each of the components.
Definition DVec3.inl:198
JPH_INLINE int GetTrues() const
Store if X is true in bit 0, Y in bit 1, Z in bit 2 and W in bit 3 (true is when highest bit of compo...
Definition DVec3.inl:393
static JPH_INLINE DVec3 sAnd(DVec3Arg inV1, DVec3Arg inV2)
Logical and (component wise)
Definition DVec3.inl:377
JPH_INLINE DVec3 & operator*=(double inV2)
Multiply vector with double.
Definition DVec3.inl:485
JPH_INLINE DVec3 Abs() const
Return the absolute value of each of the components.
Definition DVec3.inl:717
static JPH_INLINE DVec3 sFusedMultiplyAdd(DVec3Arg inMul1, DVec3Arg inMul2, DVec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition DVec3.inl:308
static JPH_INLINE Type sFixW(TypeArg inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Definition DVec3.inl:92
JPH_INLINE DVec3 Sqrt() const
Component wise square root.
Definition DVec3.inl:788
JPH_INLINE DVec3 GetSign() const
Get vector that contains the sign of each element (returns 1 if positive, -1 if negative)
Definition DVec3.inl:829
Type mValue
Definition DVec3.h:282
static JPH_INLINE DVec3 sXor(DVec3Arg inV1, DVec3Arg inV2)
Logical xor (component wise)
Definition DVec3.inl:361
static JPH_INLINE DVec3 sOne()
Vector with all ones.
Definition DVec3.inl:150
static JPH_INLINE DVec3 sGreaterOrEqual(DVec3Arg inV1, DVec3Arg inV2)
Greater than or equal (component wise)
Definition DVec3.inl:293
JPH_INLINE DVec3 operator+(Vec3Arg inV2) const
Add two vectors (component wise)
Definition DVec3.inl:548
JPH_INLINE bool IsClose(DVec3Arg inV2, double inMaxDistSq=1.0e-24) const
Test if two vectors are close.
Definition DVec3.inl:419
JPH_INLINE bool IsNormalized(double inTolerance=1.0e-12) const
Test if vector is normalized.
Definition DVec3.inl:811
static JPH_INLINE DVec3 sSelect(DVec3Arg inNotSet, DVec3Arg inSet, DVec3Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition DVec3.inl:323
const Type & TypeArg
Definition DVec3.h:30
static JPH_INLINE DVec3 sNaN()
Vector with all NaN's.
Definition DVec3.inl:155
friend JPH_INLINE DVec3 operator*(double inV1, DVec3Arg inV2)
Multiply vector with double.
Definition DVec3.inl:456
static JPH_INLINE DVec3 sGreater(DVec3Arg inV1, DVec3Arg inV2)
Greater than (component wise)
Definition DVec3.inl:278
JPH_INLINE void StoreDouble3(Double3 *outV) const
Store 3 doubles to memory.
Definition DVec3.inl:176
static JPH_INLINE DVec3 sOr(DVec3Arg inV1, DVec3Arg inV2)
Logical or (component wise)
Definition DVec3.inl:345
static JPH_INLINE DVec3 sZero()
Vector with all zeros.
Definition DVec3.inl:120
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition DVec3.inl:409
JPH_INLINE double Length() const
Length of vector.
Definition DVec3.inl:801
JPH_INLINE DVec3 operator-() const
Negate.
Definition DVec3.inl:614
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition DVec3.inl:816
JPH_INLINE Vec3 ToVec3RoundUp() const
Convert to float vector 3 rounding up.
Definition DVec3.inl:934
static const double cTrue
Representations of true and false for boolean operations.
Definition DVec3.h:277
DVec3()=default
Constructor.
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
Definition DVec3.inl:83
JPH_INLINE double LengthSq() const
Squared length of vector.
Definition DVec3.inl:783
JPH_INLINE DVec3 Normalized() const
Normalize vector.
Definition DVec3.inl:806
JPH_INLINE DVec3 operator/(double inV2) const
Divide vector by double.
Definition DVec3.inl:470
JPH_INLINE double Dot(DVec3Arg inV2) const
Dot product.
Definition DVec3.inl:754
static JPH_INLINE DVec3 sReplicate(double inV)
Replicate inV across all components.
Definition DVec3.inl:135
static JPH_INLINE DVec3 sLessOrEqual(DVec3Arg inV1, DVec3Arg inV2)
Less than or equal (component wise)
Definition DVec3.inl:263
JPH_INLINE DVec3 PrepareRoundToInf() const
Prepare to convert to float vector 3 rounding towards positive/negative inf (returns DVec3 that can b...
Definition DVec3.inl:876
JPH_INLINE DVec3 & operator+=(Vec3Arg inV2)
Add two vectors (component wise)
Definition DVec3.inl:574
static JPH_INLINE DVec3 sLoadDouble3Unsafe(const Double3 &inV)
Load 3 doubles from memory (reads 64 bits extra which it doesn't use)
Definition DVec3.inl:160
JPH_INLINE DVec3 & operator/=(double inV2)
Divide vector by double.
Definition DVec3.inl:526
JPH_INLINE DVec3 Cross(DVec3Arg inV2) const
Cross product.
Definition DVec3.inl:738
JPH_INLINE DVec3 & operator-=(Vec3Arg inV2)
Subtract two vectors (component wise)
Definition DVec3.inl:663
JPH_INLINE DVec3 PrepareRoundToZero() const
Prepare to convert to float vector 3 rounding towards zero (returns DVec3 that can be converted to a ...
Definition DVec3.inl:853
JPH_INLINE DVec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition DVec3.inl:733
static JPH_INLINE DVec3 sEquals(DVec3Arg inV1, DVec3Arg inV2)
Equals (component wise)
Definition DVec3.inl:233
struct { double mData[4];} Type
Definition DVec3.h:29
JPH_INLINE bool IsNearZero(double inMaxDistSq=1.0e-24) const
Test if vector is near zero.
Definition DVec3.inl:424
JPH_INLINE bool operator==(DVec3Arg inV2) const
Comparison.
Definition DVec3.inl:414
static const double cFalse
Definition DVec3.h:278
Class that holds 3 doubles. Used as a storage class. Convert to DVec3 for calculations.
Definition Double3.h:13
double z
Definition Double3.h:40
double y
Definition Double3.h:39
double x
Definition Double3.h:38
Type mValue
Definition Vec3.h:289
float mF32[4]
Definition Vec3.h:290