16#if defined(JPH_USE_AVX)
17 mValue = _mm256_cvtps_pd(inRHS.mValue);
18#elif defined(JPH_USE_SSE)
19 mValue.mLow = _mm_cvtps_pd(inRHS.mValue);
20 mValue.mHigh = _mm_cvtps_pd(_mm_shuffle_ps(inRHS.mValue, inRHS.mValue, _MM_SHUFFLE(2, 2, 2, 2)));
21#elif defined(JPH_USE_NEON)
22 mValue.val[0] = vcvt_f64_f32(vget_low_f32(inRHS.mValue));
23 mValue.val[1] = vcvt_high_f64_f32(inRHS.mValue);
24#elif defined(JPH_USE_RVV)
25 const vfloat32m1_t src = __riscv_vle32_v_f32m1(inRHS.mF32, 3);
26 const vfloat64m2_t widened = __riscv_vfwcvt_f_f_v_f64m2(src, 3);
27 __riscv_vse64_v_f64m2(mF64, widened, 3);
29 mF64[0] = (double)inRHS.GetX();
30 mF64[1] = (double)inRHS.GetY();
31 mF64[2] = (double)inRHS.GetZ();
32 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
45#if defined(JPH_USE_AVX)
46 mValue = _mm256_set_pd(inZ, inZ, inY, inX);
47#elif defined(JPH_USE_SSE)
48 mValue.mLow = _mm_set_pd(inY, inX);
49 mValue.mHigh = _mm_set1_pd(inZ);
50#elif defined(JPH_USE_NEON)
52 mValue.val[1] = vdupq_n_f64(inZ);
53#elif defined(JPH_USE_RVV)
54 vfloat64m2_t v = __riscv_vfmv_v_f_f64m2(inZ, 4);
55 v = __riscv_vfslide1up_vf_f64m2(v, inY, 4);
56 v = __riscv_vfslide1up_vf_f64m2(v, inX, 4);
57 __riscv_vse64_v_f64m2(
mF64, v, 4);
62 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
70#if defined(JPH_USE_AVX)
71 Type x = _mm256_castpd128_pd256(_mm_load_sd(&inV.
x));
72 Type y = _mm256_castpd128_pd256(_mm_load_sd(&inV.
y));
73 Type z = _mm256_broadcast_sd(&inV.
z);
74 Type xy = _mm256_unpacklo_pd(x, y);
75 mValue = _mm256_blend_pd(
xy, z, 0b1100);
76#elif defined(JPH_USE_SSE)
77 mValue.mLow = _mm_loadu_pd(&inV.
x);
78 mValue.mHigh = _mm_set1_pd(inV.
z);
79#elif defined(JPH_USE_NEON)
80 mValue.val[0] = vld1q_f64(&inV.
x);
81 mValue.val[1] = vdupq_n_f64(inV.
z);
82#elif defined(JPH_USE_RVV)
83 vfloat64m2_t v = __riscv_vle64_v_f64m2(&inV.
x, 3);
84 __riscv_vse64_v_f64m2(
mF64, v, 3);
89 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
97#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
106#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
107 #if defined(JPH_USE_AVX)
108 return _mm256_shuffle_pd(inValue, inValue, 2);
109 #elif defined(JPH_USE_SSE)
111 value.mLow = inValue.mLow;
112 value.mHigh = _mm_shuffle_pd(inValue.mHigh, inValue.mHigh, 0);
114 #elif defined(JPH_USE_NEON)
116 value.val[0] = inValue.val[0];
117 value.val[1] = vdupq_laneq_f64(inValue.val[1], 0);
119 #elif defined(JPH_USE_RVV)
121 const vfloat64m2_t buffer = __riscv_vle64_v_f64m2(inValue.mData, 3);
122 __riscv_vse64_v_f64m2(value.mData, buffer, 3);
123 value.mData[3] = value.mData[2];
127 value.mData[0] = inValue.mData[0];
128 value.mData[1] = inValue.mData[1];
129 value.mData[2] = inValue.mData[2];
130 value.mData[3] = inValue.mData[2];
140#if defined(JPH_USE_AVX)
141 return _mm256_setzero_pd();
142#elif defined(JPH_USE_SSE)
143 __m128d zero = _mm_setzero_pd();
144 return DVec3({ zero, zero });
145#elif defined(JPH_USE_NEON)
146 float64x2_t zero = vdupq_n_f64(0.0);
147 return DVec3({ zero, zero });
148#elif defined(JPH_USE_RVV)
150 const vfloat64m2_t v = __riscv_vfmv_v_f_f64m2(0.0, 3);
151 __riscv_vse64_v_f64m2(vec.mF64, v, 3);
154 return DVec3(0, 0, 0);
160#if defined(JPH_USE_AVX)
161 return _mm256_set1_pd(inV);
162#elif defined(JPH_USE_SSE)
163 __m128d value = _mm_set1_pd(inV);
164 return DVec3({ value, value });
165#elif defined(JPH_USE_NEON)
166 float64x2_t value = vdupq_n_f64(inV);
167 return DVec3({ value, value });
168#elif defined(JPH_USE_RVV)
170 const vfloat64m2_t v = __riscv_vfmv_v_f_f64m2(inV, 3);
171 __riscv_vse64_v_f64m2(vec.mF64, v, 3);
174 return DVec3(inV, inV, inV);
185 return sReplicate(numeric_limits<double>::quiet_NaN());
190#if defined(JPH_USE_AVX)
191 Type v = _mm256_loadu_pd(&inV.
x);
192#elif defined(JPH_USE_SSE)
194 v.mLow = _mm_loadu_pd(&inV.
x);
195 v.mHigh = _mm_set1_pd(inV.
z);
196#elif defined(JPH_USE_NEON)
197 Type v = vld1q_f64_x2(&inV.
x);
198#elif defined(JPH_USE_RVV)
200 const vfloat64m2_t vec = __riscv_vle64_v_f64m2(&inV.
x, 3);
201 __riscv_vse64_v_f64m2(v.mData, vec, 3);
203 Type v = { inV.
x, inV.
y, inV.
z };
217#if defined(JPH_USE_AVX)
218 return _mm256_cvtpd_ps(mValue);
219#elif defined(JPH_USE_SSE)
220 __m128 low = _mm_cvtpd_ps(mValue.mLow);
221 __m128 high = _mm_cvtpd_ps(mValue.mHigh);
222 return _mm_shuffle_ps(low, high, _MM_SHUFFLE(1, 0, 1, 0));
223#elif defined(JPH_USE_NEON)
224 return vcvt_high_f32_f64(vcvtx_f32_f64(mValue.val[0]), mValue.val[1]);
225#elif defined(JPH_USE_RVV)
227 const vfloat64m2_t src = __riscv_vle64_v_f64m2(mF64, 3);
228 const vfloat32m1_t narrowed = __riscv_vfncvt_f_f_w_f32m1(src, 3);
229 __riscv_vse32_v_f32m1(v.
mF32, narrowed, 3);
232 return Vec3((
float)GetX(), (
float)GetY(), (
float)GetZ());
238#if defined(JPH_USE_AVX)
240#elif defined(JPH_USE_SSE)
242#elif defined(JPH_USE_NEON)
244#elif defined(JPH_USE_RVV)
246 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.
mF64, 3);
247 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
248 const vfloat64m2_t min = __riscv_vfmin_vv_f64m2(v1, v2, 3);
249 __riscv_vse64_v_f64m2(res.mF64, min, 3);
260#if defined(JPH_USE_AVX)
262#elif defined(JPH_USE_SSE)
264#elif defined(JPH_USE_NEON)
266#elif defined(JPH_USE_RVV)
268 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.
mF64, 3);
269 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
270 const vfloat64m2_t max = __riscv_vfmax_vv_f64m2(v1, v2, 3);
271 __riscv_vse64_v_f64m2(res.mF64, max, 3);
282 return sMax(
sMin(inV, inMax), inMin);
287#if defined(JPH_USE_AVX)
288 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_EQ_OQ);
289#elif defined(JPH_USE_SSE)
291#elif defined(JPH_USE_NEON)
292 return DVec3({ vreinterpretq_f64_u64(vceqq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vceqq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
293#elif defined(JPH_USE_RVV)
295 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.
mF64, 3);
296 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
297 const vbool32_t mask = __riscv_vmfeq_vv_f64m2_b32(v1, v2, 3);
298 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(
cFalse, 3);
299 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros,
cTrue, mask, 3);
300 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
311#if defined(JPH_USE_AVX)
312 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_LT_OQ);
313#elif defined(JPH_USE_SSE)
315#elif defined(JPH_USE_NEON)
316 return DVec3({ vreinterpretq_f64_u64(vcltq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vcltq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
317#elif defined(JPH_USE_RVV)
319 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.
mF64, 3);
320 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
321 const vbool32_t mask = __riscv_vmflt_vv_f64m2_b32(v1, v2, 3);
322 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(
cFalse, 3);
323 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros,
cTrue, mask, 3);
324 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
335#if defined(JPH_USE_AVX)
336 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_LE_OQ);
337#elif defined(JPH_USE_SSE)
339#elif defined(JPH_USE_NEON)
340 return DVec3({ vreinterpretq_f64_u64(vcleq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vcleq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
341#elif defined(JPH_USE_RVV)
343 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.
mF64, 3);
344 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
345 const vbool32_t mask = __riscv_vmfle_vv_f64m2_b32(v1, v2, 3);
346 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(
cFalse, 3);
347 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros,
cTrue, mask, 3);
348 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
359#if defined(JPH_USE_AVX)
360 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_GT_OQ);
361#elif defined(JPH_USE_SSE)
363#elif defined(JPH_USE_NEON)
364 return DVec3({ vreinterpretq_f64_u64(vcgtq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vcgtq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
365#elif defined(JPH_USE_RVV)
367 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.
mF64, 3);
368 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
369 const vbool32_t mask = __riscv_vmfgt_vv_f64m2_b32(v1, v2, 3);
370 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(
cFalse, 3);
371 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros,
cTrue, mask, 3);
372 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
383#if defined(JPH_USE_AVX)
384 return _mm256_cmp_pd(inV1.
mValue, inV2.
mValue, _CMP_GE_OQ);
385#elif defined(JPH_USE_SSE)
387#elif defined(JPH_USE_NEON)
388 return DVec3({ vreinterpretq_f64_u64(vcgeq_f64(inV1.
mValue.val[0], inV2.
mValue.val[0])), vreinterpretq_f64_u64(vcgeq_f64(inV1.
mValue.val[1], inV2.
mValue.val[1])) });
389#elif defined(JPH_USE_RVV)
391 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.
mF64, 3);
392 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
393 const vbool32_t mask = __riscv_vmfge_vv_f64m2_b32(v1, v2, 3);
394 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(
cFalse, 3);
395 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros,
cTrue, mask, 3);
396 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
407#if defined(JPH_USE_AVX)
413#elif defined(JPH_USE_NEON)
415#elif defined(JPH_USE_RVV)
417 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inMul1.
mF64, 3);
418 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inMul2.
mF64, 3);
419 const vfloat64m2_t rvv_add = __riscv_vle64_v_f64m2(inAdd.
mF64, 3);
420 const vfloat64m2_t fmadd = __riscv_vfmacc_vv_f64m2(rvv_add, v1, v2, 3);
421 __riscv_vse64_v_f64m2(res.mF64, fmadd, 3);
424 return inMul1 * inMul2 + inAdd;
430#if defined(JPH_USE_AVX)
432#elif defined(JPH_USE_SSE4_1)
435#elif defined(JPH_USE_NEON)
436 Type v = { vbslq_f64(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_f64(inControl.
mValue.val[0]), 63)), inSet.
mValue.val[0], inNotSet.
mValue.val[0]),
437 vbslq_f64(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_f64(inControl.
mValue.val[1]), 63)), inSet.
mValue.val[1], inNotSet.
mValue.val[1]) };
439#elif defined(JPH_USE_RVV)
441 const vfloat64m2_t control_double = __riscv_vle64_v_f64m2(inControl.
mF64, 3);
442 const vfloat64m2_t not_set = __riscv_vle64_v_f64m2(inNotSet.
mF64, 3);
443 const vfloat64m2_t set = __riscv_vle64_v_f64m2(inSet.
mF64, 3);
444 const vuint64m2_t control = __riscv_vreinterpret_v_f64m2_u64m2(control_double);
447 const uint64 sign_bit_mask = 0x8000000000000000u;
448 const vuint64m2_t r = __riscv_vand_vx_u64m2(control, sign_bit_mask, 3);
449 const vbool32_t rvv_mask = __riscv_vmsne_vx_u64m2_b32(r, 0x0, 3);
450 const vfloat64m2_t merged = __riscv_vmerge_vvm_f64m2(not_set, set, rvv_mask, 3);
451 __riscv_vse64_v_f64m2(masked.
mF64, merged, 3);
455 for (
int i = 0; i < 3; i++)
457#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
466#if defined(JPH_USE_AVX)
468#elif defined(JPH_USE_SSE)
470#elif defined(JPH_USE_NEON)
471 return DVec3({ vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[0]), vreinterpretq_u64_f64(inV2.
mValue.val[0]))),
472 vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[1]), vreinterpretq_u64_f64(inV2.
mValue.val[1]))) });
473#elif defined(JPH_USE_RVV)
475 const vuint64m2_t v1 = __riscv_vle64_v_u64m2(
reinterpret_cast<const uint64 *
>(inV1.
mF64), 3);
476 const vuint64m2_t v2 = __riscv_vle64_v_u64m2(
reinterpret_cast<const uint64 *
>(inV2.
mF64), 3);
477 const vuint64m2_t res = __riscv_vor_vv_u64m2(v1, v2, 3);
478 __riscv_vse64_v_u64m2(
reinterpret_cast<uint64 *
>(or_result.mF64), res, 3);
489#if defined(JPH_USE_AVX)
491#elif defined(JPH_USE_SSE)
493#elif defined(JPH_USE_NEON)
494 return DVec3({ vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[0]), vreinterpretq_u64_f64(inV2.
mValue.val[0]))),
495 vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[1]), vreinterpretq_u64_f64(inV2.
mValue.val[1]))) });
496#elif defined(JPH_USE_RVV)
498 const vuint64m2_t v1 = __riscv_vle64_v_u64m2(
reinterpret_cast<const uint64 *
>(inV1.
mF64), 3);
499 const vuint64m2_t v2 = __riscv_vle64_v_u64m2(
reinterpret_cast<const uint64 *
>(inV2.
mF64), 3);
500 const vuint64m2_t res = __riscv_vxor_vv_u64m2(v1, v2, 3);
501 __riscv_vse64_v_u64m2(
reinterpret_cast<uint64 *
>(xor_result.mF64), res, 3);
512#if defined(JPH_USE_AVX)
514#elif defined(JPH_USE_SSE)
516#elif defined(JPH_USE_NEON)
517 return DVec3({ vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[0]), vreinterpretq_u64_f64(inV2.
mValue.val[0]))),
518 vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(inV1.
mValue.val[1]), vreinterpretq_u64_f64(inV2.
mValue.val[1]))) });
519#elif defined(JPH_USE_RVV)
521 const vuint64m2_t v1 = __riscv_vle64_v_u64m2(
reinterpret_cast<const uint64 *
>(inV1.
mF64), 3);
522 const vuint64m2_t v2 = __riscv_vle64_v_u64m2(
reinterpret_cast<const uint64 *
>(inV2.
mF64), 3);
523 const vuint64m2_t res = __riscv_vand_vv_u64m2(v1, v2, 3);
524 __riscv_vse64_v_u64m2(
reinterpret_cast<uint64 *
>(and_result.mF64), res, 3);
535#if defined(JPH_USE_AVX)
536 return _mm256_movemask_pd(
mValue) & 0x7;
537#elif defined(JPH_USE_SSE)
538 return (_mm_movemask_pd(
mValue.mLow) + (_mm_movemask_pd(
mValue.mHigh) << 2)) & 0x7;
561 return (inV2 - *
this).LengthSq() <= inMaxDistSq;
571#if defined(JPH_USE_AVX)
573#elif defined(JPH_USE_SSE)
575#elif defined(JPH_USE_NEON)
577#elif defined(JPH_USE_RVV)
579 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
580 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
581 const vfloat64m2_t mul = __riscv_vfmul_vv_f64m2(v1, v2, 3);
582 __riscv_vse64_v_f64m2(res.mF64, mul, 3);
591#if defined(JPH_USE_AVX)
592 return _mm256_mul_pd(
mValue, _mm256_set1_pd(inV2));
593#elif defined(JPH_USE_SSE)
594 __m128d v = _mm_set1_pd(inV2);
596#elif defined(JPH_USE_NEON)
597 return DVec3({ vmulq_n_f64(
mValue.val[0], inV2), vmulq_n_f64(
mValue.val[1], inV2) });
598#elif defined(JPH_USE_RVV)
600 const vfloat64m2_t src = __riscv_vle64_v_f64m2(
mF64, 3);
601 const vfloat64m2_t mul = __riscv_vfmul_vf_f64m2(src, inV2, 3);
602 __riscv_vse64_v_f64m2(res.mF64, mul, 3);
611#if defined(JPH_USE_AVX)
612 return _mm256_mul_pd(_mm256_set1_pd(inV1), inV2.
mValue);
613#elif defined(JPH_USE_SSE)
614 __m128d v = _mm_set1_pd(inV1);
615 return DVec3({ _mm_mul_pd(v, inV2.
mValue.mLow), _mm_mul_pd(v, inV2.
mValue.mHigh) });
616#elif defined(JPH_USE_NEON)
617 return DVec3({ vmulq_n_f64(inV2.
mValue.val[0], inV1), vmulq_n_f64(inV2.
mValue.val[1], inV1) });
618#elif defined(JPH_USE_RVV)
620 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
621 const vfloat64m2_t mul = __riscv_vfmul_vf_f64m2(v1, inV1, 3);
622 __riscv_vse64_v_f64m2(res.mF64, mul, 3);
631#if defined(JPH_USE_AVX)
632 return _mm256_div_pd(
mValue, _mm256_set1_pd(inV2));
633#elif defined(JPH_USE_SSE)
634 __m128d v = _mm_set1_pd(inV2);
636#elif defined(JPH_USE_NEON)
637 float64x2_t v = vdupq_n_f64(inV2);
639#elif defined(JPH_USE_RVV)
641 const vfloat64m2_t src = __riscv_vle64_v_f64m2(
mF64, 3);
642 const vfloat64m2_t div = __riscv_vfdiv_vf_f64m2(src, inV2, 3);
643 __riscv_vse64_v_f64m2(res.mF64, div, 3);
652#if defined(JPH_USE_AVX)
654#elif defined(JPH_USE_SSE)
655 __m128d v = _mm_set1_pd(inV2);
658#elif defined(JPH_USE_NEON)
661#elif defined(JPH_USE_RVV)
662 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
663 const vfloat64m2_t res = __riscv_vfmul_vf_f64m2(v1, inV2, 3);
664 __riscv_vse64_v_f64m2(
mF64, res, 3);
666 for (
int i = 0; i < 3; ++i)
668 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
677#if defined(JPH_USE_AVX)
679#elif defined(JPH_USE_SSE)
682#elif defined(JPH_USE_NEON)
685#elif defined(JPH_USE_RVV)
686 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
687 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
688 const vfloat64m2_t rvv_res = __riscv_vfmul_vv_f64m2(v1, v2, 3);
689 __riscv_vse64_v_f64m2(
mF64, rvv_res, 3);
691 for (
int i = 0; i < 3; ++i)
693 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
702#if defined(JPH_USE_AVX)
704#elif defined(JPH_USE_SSE)
705 __m128d v = _mm_set1_pd(inV2);
708#elif defined(JPH_USE_NEON)
709 float64x2_t v = vdupq_n_f64(inV2);
712#elif defined(JPH_USE_RVV)
713 const vfloat64m2_t v = __riscv_vle64_v_f64m2(
mF64, 3);
714 const vfloat64m2_t res = __riscv_vfdiv_vf_f64m2(v, inV2, 3);
715 __riscv_vse64_v_f64m2(
mF64, res, 3);
717 for (
int i = 0; i < 3; ++i)
719 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
728#if defined(JPH_USE_AVX)
729 return _mm256_add_pd(
mValue, _mm256_cvtps_pd(inV2.
mValue));
730#elif defined(JPH_USE_SSE)
731 return DVec3({ _mm_add_pd(
mValue.mLow, _mm_cvtps_pd(inV2.
mValue)), _mm_add_pd(
mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.
mValue, inV2.
mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
732#elif defined(JPH_USE_NEON)
733 return DVec3({ vaddq_f64(
mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.
mValue))), vaddq_f64(
mValue.val[1], vcvt_high_f64_f32(inV2.
mValue)) });
734#elif defined(JPH_USE_RVV)
736 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
737 const vfloat32m1_t v2_f32 = __riscv_vle32_v_f32m1(inV2.
mF32, 3);
738 const vfloat64m2_t v2 = __riscv_vfwcvt_f_f_v_f64m2(v2_f32, 3);
739 const vfloat64m2_t rvv_add = __riscv_vfadd_vv_f64m2(v1, v2, 3);
740 __riscv_vse64_v_f64m2(res.mF64, rvv_add, 3);
749#if defined(JPH_USE_AVX)
751#elif defined(JPH_USE_SSE)
753#elif defined(JPH_USE_NEON)
755#elif defined(JPH_USE_RVV)
757 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
758 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
759 const vfloat64m2_t rvv_add = __riscv_vfadd_vv_f64m2(v1, v2, 3);
760 __riscv_vse64_v_f64m2(res.mF64, rvv_add, 3);
769#if defined(JPH_USE_AVX)
771#elif defined(JPH_USE_SSE)
773 mValue.mHigh = _mm_add_pd(
mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.
mValue, inV2.
mValue, _MM_SHUFFLE(2, 2, 2, 2))));
774#elif defined(JPH_USE_NEON)
777#elif defined(JPH_USE_RVV)
778 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
779 const vfloat32m1_t v2_f32 = __riscv_vle32_v_f32m1(inV2.
mF32, 3);
780 const vfloat64m2_t v2 = __riscv_vfwcvt_f_f_v_f64m2(v2_f32, 3);
781 const vfloat64m2_t rvv_add = __riscv_vfadd_vv_f64m2(v1, v2, 3);
782 __riscv_vse64_v_f64m2(
mF64, rvv_add, 3);
784 for (
int i = 0; i < 3; ++i)
786 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
795#if defined(JPH_USE_AVX)
797#elif defined(JPH_USE_SSE)
800#elif defined(JPH_USE_NEON)
803#elif defined(JPH_USE_RVV)
804 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
805 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
806 const vfloat64m2_t rvv_add = __riscv_vfadd_vv_f64m2(v1, v2, 3);
807 __riscv_vse64_v_f64m2(
mF64, rvv_add, 3);
809 for (
int i = 0; i < 3; ++i)
811 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
820#if defined(JPH_USE_AVX)
821 return _mm256_sub_pd(_mm256_setzero_pd(),
mValue);
822#elif defined(JPH_USE_SSE)
823 __m128d zero = _mm_setzero_pd();
825#elif defined(JPH_USE_NEON)
826 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
827 float64x2_t zero = vdupq_n_f64(0);
828 return DVec3({ vsubq_f64(zero,
mValue.val[0]), vsubq_f64(zero,
mValue.val[1]) });
832#elif defined(JPH_USE_RVV)
833 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
835 const vfloat64m2_t rvv_zero = __riscv_vfmv_v_f_f64m2(0.0, 3);
836 const vfloat64m2_t v = __riscv_vle64_v_f64m2(
mF64, 3);
837 const vfloat64m2_t rvv_neg = __riscv_vfsub_vv_f64m2(rvv_zero, v, 3);
838 __riscv_vse64_v_f64m2(res.mF64, rvv_neg, 3);
842 const vfloat64m2_t v = __riscv_vle64_v_f64m2(
mF64, 3);
843 const vfloat64m2_t rvv_neg = __riscv_vfsgnjn_vv_f64m2(v, v, 3);
844 __riscv_vse64_v_f64m2(res.mF64, rvv_neg, 3);
848 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
858#if defined(JPH_USE_AVX)
859 return _mm256_sub_pd(
mValue, _mm256_cvtps_pd(inV2.
mValue));
860#elif defined(JPH_USE_SSE)
861 return DVec3({ _mm_sub_pd(
mValue.mLow, _mm_cvtps_pd(inV2.
mValue)), _mm_sub_pd(
mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.
mValue, inV2.
mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
862#elif defined(JPH_USE_NEON)
863 return DVec3({ vsubq_f64(
mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.
mValue))), vsubq_f64(
mValue.val[1], vcvt_high_f64_f32(inV2.
mValue)) });
864#elif defined(JPH_USE_RVV)
866 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
867 const vfloat32m1_t v2_f32 = __riscv_vle32_v_f32m1(inV2.
mF32, 3);
868 const vfloat64m2_t v2 = __riscv_vfwcvt_f_f_v_f64m2(v2_f32, 3);
869 const vfloat64m2_t rvv_sub = __riscv_vfsub_vv_f64m2(v1, v2, 3);
870 __riscv_vse64_v_f64m2(res.mF64, rvv_sub, 3);
879#if defined(JPH_USE_AVX)
881#elif defined(JPH_USE_SSE)
883#elif defined(JPH_USE_NEON)
885#elif defined(JPH_USE_RVV)
887 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
888 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
889 const vfloat64m2_t rvv_sub = __riscv_vfsub_vv_f64m2(v1, v2, 3);
890 __riscv_vse64_v_f64m2(res.mF64, rvv_sub, 3);
899#if defined(JPH_USE_AVX)
901#elif defined(JPH_USE_SSE)
903 mValue.mHigh = _mm_sub_pd(
mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.
mValue, inV2.
mValue, _MM_SHUFFLE(2, 2, 2, 2))));
904#elif defined(JPH_USE_NEON)
907#elif defined(JPH_USE_RVV)
908 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
909 const vfloat32m1_t v2_f32 = __riscv_vle32_v_f32m1(inV2.
mF32, 3);
910 const vfloat64m2_t v2 = __riscv_vfwcvt_f_f_v_f64m2(v2_f32, 3);
911 const vfloat64m2_t rvv_sub = __riscv_vfsub_vv_f64m2(v1, v2, 3);
912 __riscv_vse64_v_f64m2(
mF64, rvv_sub, 3);
914 for (
int i = 0; i < 3; ++i)
916 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
925#if defined(JPH_USE_AVX)
927#elif defined(JPH_USE_SSE)
930#elif defined(JPH_USE_NEON)
933#elif defined(JPH_USE_RVV)
934 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
935 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
936 const vfloat64m2_t rvv_sub = __riscv_vfsub_vv_f64m2(v1, v2, 3);
937 __riscv_vse64_v_f64m2(
mF64, rvv_sub, 3);
939 for (
int i = 0; i < 3; ++i)
941 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
951#if defined(JPH_USE_AVX)
953#elif defined(JPH_USE_SSE)
955#elif defined(JPH_USE_NEON)
957#elif defined(JPH_USE_RVV)
959 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
960 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
961 const vfloat64m2_t rvv_div = __riscv_vfdiv_vv_f64m2(v1, v2, 3);
962 __riscv_vse64_v_f64m2(res.mF64, rvv_div, 3);
971#if defined(JPH_USE_AVX512)
973#elif defined(JPH_USE_AVX)
974 return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(),
mValue),
mValue);
975#elif defined(JPH_USE_SSE)
976 __m128d zero = _mm_setzero_pd();
978#elif defined(JPH_USE_NEON)
980#elif defined(JPH_USE_RVV)
982 const vfloat64m2_t v = __riscv_vle64_v_f64m2(
mF64, 3);
983 const vfloat64m2_t rvv_abs = __riscv_vfsgnj_vf_f64m2(v, 1.0, 3);
984 __riscv_vse64_v_f64m2(res.mF64, rvv_abs, 3);
998#if defined(JPH_USE_AVX2)
999 __m256d t1 = _mm256_permute4x64_pd(inV2.
mValue, _MM_SHUFFLE(0, 0, 2, 1));
1000 t1 = _mm256_mul_pd(t1,
mValue);
1001 __m256d t2 = _mm256_permute4x64_pd(
mValue, _MM_SHUFFLE(0, 0, 2, 1));
1002 t2 = _mm256_mul_pd(t2, inV2.
mValue);
1003 __m256d t3 = _mm256_sub_pd(t1, t2);
1004 return _mm256_permute4x64_pd(t3, _MM_SHUFFLE(0, 0, 2, 1));
1005#elif defined(JPH_USE_RVV)
1006 const uint64 indices[3] = { 1, 2, 0 };
1007 const vuint64m2_t gather_indices = __riscv_vle64_v_u64m2(indices, 3);
1008 const vfloat64m2_t v0 = __riscv_vle64_v_f64m2(
mF64, 3);
1009 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
1010 vfloat64m2_t t0 = __riscv_vrgather_vv_f64m2(v1, gather_indices, 3);
1011 t0 = __riscv_vfmul_vv_f64m2(t0, v0, 3);
1012 vfloat64m2_t t1 = __riscv_vrgather_vv_f64m2(v0, gather_indices, 3);
1013 t1 = __riscv_vfmul_vv_f64m2(t1, v1, 3);
1014 const vfloat64m2_t sub = __riscv_vfsub_vv_f64m2(t0, t1, 3);
1015 const vfloat64m2_t cross = __riscv_vrgather_vv_f64m2(sub, gather_indices, 3);
1018 __riscv_vse64_v_f64m2(cross_result.
mF64, cross, 3);
1019 return cross_result;
1029#if defined(JPH_USE_AVX)
1031 __m128d
xy = _mm256_castpd256_pd128(mul);
1032 __m128d
yx = _mm_shuffle_pd(
xy,
xy, 1);
1033 __m128d sum = _mm_add_pd(
xy,
yx);
1034 __m128d zw = _mm256_extractf128_pd(mul, 1);
1035 sum = _mm_add_pd(sum, zw);
1036 return _mm_cvtsd_f64(sum);
1037#elif defined(JPH_USE_SSE)
1039 __m128d
yx = _mm_shuffle_pd(
xy,
xy, 1);
1040 __m128d sum = _mm_add_pd(
xy,
yx);
1041 __m128d z = _mm_mul_sd(
mValue.mHigh, inV2.
mValue.mHigh);
1042 sum = _mm_add_pd(sum, z);
1043 return _mm_cvtsd_f64(sum);
1044#elif defined(JPH_USE_NEON)
1045 float64x2_t mul_low = vmulq_f64(
mValue.val[0], inV2.
mValue.val[0]);
1046 float64x2_t mul_high = vmulq_f64(
mValue.val[1], inV2.
mValue.val[1]);
1047 return vaddvq_f64(mul_low) + vgetq_lane_f64(mul_high, 0);
1048#elif defined(JPH_USE_RVV)
1049 const vfloat64m1_t zeros = __riscv_vfmv_v_f_f64m1(0.0, 3);
1050 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(
mF64, 3);
1051 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.
mF64, 3);
1052 const vfloat64m2_t mul = __riscv_vfmul_vv_f64m2(v1, v2, 3);
1053 const vfloat64m1_t sum = __riscv_vfredosum_vs_f64m2_f64m1(mul, zeros, 3);
1054 return __riscv_vfmv_f_s_f64m1_f64(sum);
1057 for (
int i = 0; i < 3; i++)
1070#if defined(JPH_USE_AVX)
1071 return _mm256_sqrt_pd(
mValue);
1072#elif defined(JPH_USE_SSE)
1074#elif defined(JPH_USE_NEON)
1076#elif defined(JPH_USE_RVV)
1078 const vfloat64m2_t v = __riscv_vle64_v_f64m2(
mF64, 3);
1079 const vfloat64m2_t rvv_sqrt = __riscv_vfsqrt_v_f64m2(v, 3);
1080 __riscv_vse64_v_f64m2(res.mF64, rvv_sqrt, 3);
1089 return sqrt(
Dot(*
this));
1099 return abs(
LengthSq() - 1.0) <= inTolerance;
1104#if defined(JPH_USE_AVX512)
1105 return (_mm256_fpclass_pd_mask(
mValue, 0b10000001) & 0x7) != 0;
1106#elif defined(JPH_USE_AVX)
1107 return (_mm256_movemask_pd(_mm256_cmp_pd(
mValue,
mValue, _CMP_UNORD_Q)) & 0x7) != 0;
1108#elif defined(JPH_USE_SSE)
1109 return ((_mm_movemask_pd(_mm_cmpunord_pd(
mValue.mLow,
mValue.mLow)) + (_mm_movemask_pd(_mm_cmpunord_pd(
mValue.mHigh,
mValue.mHigh)) << 2)) & 0x7) != 0;
1110#elif defined(JPH_USE_RVV)
1111 const vfloat64m2_t v = __riscv_vle64_v_f64m2(
mF64, 3);
1112 const vbool32_t mask = __riscv_vmfeq_vv_f64m2_b32(v, v, 3);
1113 const uint32 eq = __riscv_vcpop_m_b32(mask, 3);
1116 return isnan(
mF64[0]) || isnan(
mF64[1]) || isnan(
mF64[2]);
1122#if defined(JPH_USE_AVX512)
1123 return _mm256_fixupimm_pd(
mValue,
mValue, _mm256_set1_epi32(0xA9A90A00), 0);
1124#elif defined(JPH_USE_AVX)
1125 __m256d minus_one = _mm256_set1_pd(-1.0);
1126 __m256d one = _mm256_set1_pd(1.0);
1127 return _mm256_or_pd(_mm256_and_pd(
mValue, minus_one), one);
1128#elif defined(JPH_USE_SSE)
1129 __m128d minus_one = _mm_set1_pd(-1.0);
1130 __m128d one = _mm_set1_pd(1.0);
1131 return DVec3({ _mm_or_pd(_mm_and_pd(
mValue.mLow, minus_one), one), _mm_or_pd(_mm_and_pd(
mValue.mHigh, minus_one), one) });
1132#elif defined(JPH_USE_NEON)
1133 uint64x2_t minus_one = vreinterpretq_u64_f64(vdupq_n_f64(-1.0f));
1134 uint64x2_t one = vreinterpretq_u64_f64(vdupq_n_f64(1.0f));
1135 return DVec3({ vreinterpretq_f64_u64(vorrq_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[0]), minus_one), one)),
1136 vreinterpretq_f64_u64(vorrq_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[1]), minus_one), one)) });
1137#elif defined(JPH_USE_RVV)
1139 const vfloat64m2_t rvv_in = __riscv_vle64_v_f64m2(
mF64, 3);
1140 const vfloat64m2_t rvv_one = __riscv_vfmv_v_f_f64m2(1.0, 3);
1141 const vfloat64m2_t rvv_signs = __riscv_vfsgnj_vv_f64m2(rvv_one, rvv_in, 3);
1142 __riscv_vse64_v_f64m2(res.mF64, rvv_signs, 3);
1145 return DVec3(std::signbit(
mF64[0])? -1.0 : 1.0,
1146 std::signbit(
mF64[1])? -1.0 : 1.0,
1147 std::signbit(
mF64[2])? -1.0 : 1.0);
1154 constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
1156#if defined(JPH_USE_AVX)
1157 return _mm256_and_pd(
mValue, _mm256_castsi256_pd(_mm256_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss))));
1158#elif defined(JPH_USE_SSE)
1159 __m128d mask = _mm_castsi128_pd(_mm_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss)));
1160 return DVec3({ _mm_and_pd(
mValue.mLow, mask), _mm_and_pd(
mValue.mHigh, mask) });
1161#elif defined(JPH_USE_NEON)
1162 uint64x2_t mask = vdupq_n_u64(~cDoubleToFloatMantissaLoss);
1163 return DVec3({ vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[0]), mask)),
1164 vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[1]), mask)) });
1165#elif defined(JPH_USE_RVV)
1166 const vfloat64m2_t dvec = __riscv_vle64_v_f64m2(
mF64, 3);
1167 const vuint64m2_t dvec_u64 = __riscv_vreinterpret_v_f64m2_u64m2(dvec);
1168 const vuint64m2_t chopped = __riscv_vand_vx_u64m2(dvec_u64, ~cDoubleToFloatMantissaLoss, 3);
1169 const vfloat64m2_t chopped_f64 = __riscv_vreinterpret_v_u64m2_f64m2(chopped);
1172 __riscv_vse64_v_f64m2(res.
mF64, chopped_f64, 3);
1179 return DVec3(x, y, z);
1186 constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
1188#if defined(JPH_USE_AVX512)
1189 __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
1190 __mmask8 is_zero = _mm256_testn_epi64_mask(_mm256_castpd_si256(
mValue), mantissa_loss);
1191 __m256d value_or_mantissa_loss = _mm256_or_pd(
mValue, _mm256_castsi256_pd(mantissa_loss));
1192 return _mm256_mask_blend_pd(is_zero, value_or_mantissa_loss,
mValue);
1193#elif defined(JPH_USE_AVX)
1194 __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
1195 __m256d value_and_mantissa_loss = _mm256_and_pd(
mValue, _mm256_castsi256_pd(mantissa_loss));
1196 __m256d is_zero = _mm256_cmp_pd(value_and_mantissa_loss, _mm256_setzero_pd(), _CMP_EQ_OQ);
1197 __m256d value_or_mantissa_loss = _mm256_or_pd(
mValue, _mm256_castsi256_pd(mantissa_loss));
1198 return _mm256_blendv_pd(value_or_mantissa_loss,
mValue, is_zero);
1199#elif defined(JPH_USE_SSE4_1)
1200 __m128i mantissa_loss = _mm_set1_epi64x(cDoubleToFloatMantissaLoss);
1201 __m128d zero = _mm_setzero_pd();
1202 __m128d value_and_mantissa_loss_low = _mm_and_pd(
mValue.mLow, _mm_castsi128_pd(mantissa_loss));
1203 __m128d is_zero_low = _mm_cmpeq_pd(value_and_mantissa_loss_low, zero);
1204 __m128d value_or_mantissa_loss_low = _mm_or_pd(
mValue.mLow, _mm_castsi128_pd(mantissa_loss));
1205 __m128d value_and_mantissa_loss_high = _mm_and_pd(
mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
1206 __m128d is_zero_high = _mm_cmpeq_pd(value_and_mantissa_loss_high, zero);
1207 __m128d value_or_mantissa_loss_high = _mm_or_pd(
mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
1208 return DVec3({ _mm_blendv_pd(value_or_mantissa_loss_low,
mValue.mLow, is_zero_low), _mm_blendv_pd(value_or_mantissa_loss_high,
mValue.mHigh, is_zero_high) });
1209#elif defined(JPH_USE_NEON)
1210 uint64x2_t mantissa_loss = vdupq_n_u64(cDoubleToFloatMantissaLoss);
1211 float64x2_t zero = vdupq_n_f64(0.0);
1212 float64x2_t value_and_mantissa_loss_low = vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[0]), mantissa_loss));
1213 uint64x2_t is_zero_low = vceqq_f64(value_and_mantissa_loss_low, zero);
1214 float64x2_t value_or_mantissa_loss_low = vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(
mValue.val[0]), mantissa_loss));
1215 float64x2_t value_and_mantissa_loss_high = vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
mValue.val[1]), mantissa_loss));
1216 float64x2_t value_low = vbslq_f64(is_zero_low,
mValue.val[0], value_or_mantissa_loss_low);
1217 uint64x2_t is_zero_high = vceqq_f64(value_and_mantissa_loss_high, zero);
1218 float64x2_t value_or_mantissa_loss_high = vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(
mValue.val[1]), mantissa_loss));
1219 float64x2_t value_high = vbslq_f64(is_zero_high,
mValue.val[1], value_or_mantissa_loss_high);
1220 return DVec3({ value_low, value_high });
1221#elif defined(JPH_USE_RVV)
1222 const vfloat64m2_t dvec = __riscv_vle64_v_f64m2(
mF64, 3);
1223 const vuint64m2_t dvec_u64 = __riscv_vreinterpret_v_f64m2_u64m2(dvec);
1224 const vuint64m2_t and_loss = __riscv_vand_vx_u64m2(dvec_u64, cDoubleToFloatMantissaLoss, 3);
1225 const vuint64m2_t or_loss = __riscv_vor_vx_u64m2(dvec_u64, cDoubleToFloatMantissaLoss, 3);
1226 const vbool32_t is_zero = __riscv_vmseq_vx_u64m2_b32(and_loss, 0x0, 3);
1227 const vuint64m2_t select = __riscv_vmerge_vvm_u64m2(or_loss, dvec_u64, is_zero, 3);
1228 const vfloat64m2_t select_f64 = __riscv_vreinterpret_v_u64m2_f64m2(select);
1231 __riscv_vse64_v_f64m2(res.
mF64, select_f64, 3);
1238 double x =
BitCast<double>((ux & cDoubleToFloatMantissaLoss) == 0? ux : (ux | cDoubleToFloatMantissaLoss));
1239 double y =
BitCast<double>((uy & cDoubleToFloatMantissaLoss) == 0? uy : (uy | cDoubleToFloatMantissaLoss));
1240 double z =
BitCast<double>((uz & cDoubleToFloatMantissaLoss) == 0? uz : (uz | cDoubleToFloatMantissaLoss));
1242 return DVec3(x, y, z);
std::uint64_t uint64
Definition Core.h:510
#define JPH_NAMESPACE_END
Definition Core.h:428
std::uint32_t uint32
Definition Core.h:508
#define JPH_NAMESPACE_BEGIN
Definition Core.h:422
DVec3 operator*(double inV1, DVec3Arg inV2)
Definition DVec3.inl:609
#define xy
Definition HLSLToCPP.h:511
#define yx
Definition HLSLToCPP.h:512
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:223
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
JPH_INLINE To BitCast(const From &inValue)
Definition Math.h:192
static JPH_INLINE DVec3 sLess(DVec3Arg inV1, DVec3Arg inV2)
Less than (component wise)
Definition DVec3.inl:309
double mF64[4]
Definition DVec3.h:283
static JPH_INLINE DVec3 sMax(DVec3Arg inV1, DVec3Arg inV2)
Return the maximum of each of the components.
Definition DVec3.inl:258
JPH_INLINE bool TestAnyTrue() const
Test if any of the components are true (true is when highest bit of component is set)
Definition DVec3.inl:544
JPH_INLINE Vec3 ToVec3RoundDown() const
Convert to float vector 3 rounding down.
Definition DVec3.inl:1246
static JPH_INLINE DVec3 sClamp(DVec3Arg inV, DVec3Arg inMin, DVec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition DVec3.inl:280
static JPH_INLINE DVec3 sMin(DVec3Arg inV1, DVec3Arg inV2)
Return the minimum value of each of the components.
Definition DVec3.inl:236
JPH_INLINE int GetTrues() const
Store if X is true in bit 0, Y in bit 1, Z in bit 2 and W in bit 3 (true is when highest bit of compo...
Definition DVec3.inl:533
static JPH_INLINE DVec3 sAnd(DVec3Arg inV1, DVec3Arg inV2)
Logical and (component wise)
Definition DVec3.inl:510
JPH_INLINE DVec3 & operator*=(double inV2)
Multiply vector with double.
Definition DVec3.inl:650
JPH_INLINE DVec3 Abs() const
Return the absolute value of each of the components.
Definition DVec3.inl:969
static JPH_INLINE DVec3 sFusedMultiplyAdd(DVec3Arg inMul1, DVec3Arg inMul2, DVec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition DVec3.inl:405
static JPH_INLINE Type sFixW(TypeArg inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Definition DVec3.inl:104
JPH_INLINE DVec3 Sqrt() const
Component wise square root.
Definition DVec3.inl:1068
JPH_INLINE DVec3 GetSign() const
Get vector that contains the sign of each element (returns 1 if positive, -1 if negative)
Definition DVec3.inl:1120
Type mValue
Definition DVec3.h:282
static JPH_INLINE DVec3 sXor(DVec3Arg inV1, DVec3Arg inV2)
Logical xor (component wise)
Definition DVec3.inl:487
static JPH_INLINE DVec3 sOne()
Vector with all ones.
Definition DVec3.inl:178
static JPH_INLINE DVec3 sGreaterOrEqual(DVec3Arg inV1, DVec3Arg inV2)
Greater than or equal (component wise)
Definition DVec3.inl:381
JPH_INLINE DVec3 operator+(Vec3Arg inV2) const
Add two vectors (component wise)
Definition DVec3.inl:726
JPH_INLINE bool IsClose(DVec3Arg inV2, double inMaxDistSq=1.0e-24) const
Test if two vectors are close.
Definition DVec3.inl:559
JPH_INLINE bool IsNormalized(double inTolerance=1.0e-12) const
Test if vector is normalized.
Definition DVec3.inl:1097
static JPH_INLINE DVec3 sSelect(DVec3Arg inNotSet, DVec3Arg inSet, DVec3Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition DVec3.inl:428
const Type & TypeArg
Definition DVec3.h:30
static JPH_INLINE DVec3 sNaN()
Vector with all NaN's.
Definition DVec3.inl:183
friend JPH_INLINE DVec3 operator*(double inV1, DVec3Arg inV2)
Multiply vector with double.
Definition DVec3.inl:609
static JPH_INLINE DVec3 sGreater(DVec3Arg inV1, DVec3Arg inV2)
Greater than (component wise)
Definition DVec3.inl:357
JPH_INLINE void StoreDouble3(Double3 *outV) const
Store 3 doubles to memory.
Definition DVec3.inl:208
static JPH_INLINE DVec3 sOr(DVec3Arg inV1, DVec3Arg inV2)
Logical or (component wise)
Definition DVec3.inl:464
static JPH_INLINE DVec3 sZero()
Vector with all zeros.
Definition DVec3.inl:138
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition DVec3.inl:549
JPH_INLINE double Length() const
Length of vector.
Definition DVec3.inl:1087
JPH_INLINE DVec3 operator-() const
Negate.
Definition DVec3.inl:818
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition DVec3.inl:1102
JPH_INLINE Vec3 ToVec3RoundUp() const
Convert to float vector 3 rounding up.
Definition DVec3.inl:1253
static const double cTrue
Representations of true and false for boolean operations.
Definition DVec3.h:277
DVec3()=default
Constructor.
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
Definition DVec3.inl:95
JPH_INLINE double LengthSq() const
Squared length of vector.
Definition DVec3.inl:1063
JPH_INLINE DVec3 Normalized() const
Normalize vector.
Definition DVec3.inl:1092
JPH_INLINE DVec3 operator/(double inV2) const
Divide vector by double.
Definition DVec3.inl:629
JPH_INLINE double Dot(DVec3Arg inV2) const
Dot product.
Definition DVec3.inl:1027
static JPH_INLINE DVec3 sReplicate(double inV)
Replicate inV across all components.
Definition DVec3.inl:158
static JPH_INLINE DVec3 sLessOrEqual(DVec3Arg inV1, DVec3Arg inV2)
Less than or equal (component wise)
Definition DVec3.inl:333
JPH_INLINE DVec3 PrepareRoundToInf() const
Prepare to convert to float vector 3 rounding towards positive/negative inf (returns DVec3 that can b...
Definition DVec3.inl:1183
JPH_INLINE DVec3 & operator+=(Vec3Arg inV2)
Add two vectors (component wise)
Definition DVec3.inl:767
static JPH_INLINE DVec3 sLoadDouble3Unsafe(const Double3 &inV)
Load 3 doubles from memory (reads 64 bits extra which it doesn't use)
Definition DVec3.inl:188
JPH_INLINE DVec3 & operator/=(double inV2)
Divide vector by double.
Definition DVec3.inl:700
JPH_INLINE DVec3 Cross(DVec3Arg inV2) const
Cross product.
Definition DVec3.inl:996
JPH_INLINE DVec3 & operator-=(Vec3Arg inV2)
Subtract two vectors (component wise)
Definition DVec3.inl:897
JPH_INLINE DVec3 PrepareRoundToZero() const
Prepare to convert to float vector 3 rounding towards zero (returns DVec3 that can be converted to a ...
Definition DVec3.inl:1151
JPH_INLINE DVec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition DVec3.inl:991
static JPH_INLINE DVec3 sEquals(DVec3Arg inV1, DVec3Arg inV2)
Equals (component wise)
Definition DVec3.inl:285
struct { double mData[4];} Type
Definition DVec3.h:29
JPH_INLINE bool IsNearZero(double inMaxDistSq=1.0e-24) const
Test if vector is near zero.
Definition DVec3.inl:564
JPH_INLINE bool operator==(DVec3Arg inV2) const
Comparison.
Definition DVec3.inl:554
static const double cFalse
Definition DVec3.h:278
Class that holds 3 doubles. Used as a storage class. Convert to DVec3 for calculations.
Definition Double3.h:13
double z
Definition Double3.h:40
double y
Definition Double3.h:39
double x
Definition Double3.h:38
Type mValue
Definition Vec3.h:299
float mF32[4]
Definition Vec3.h:300