Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
DVec3.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
8
9// Create a std::hash/JPH::Hash for DVec3
10JPH_MAKE_HASHABLE(JPH::DVec3, t.GetX(), t.GetY(), t.GetZ())
11
13
14DVec3::DVec3(Vec3Arg inRHS)
15{
16#if defined(JPH_USE_AVX)
17 mValue = _mm256_cvtps_pd(inRHS.mValue);
18#elif defined(JPH_USE_SSE)
19 mValue.mLow = _mm_cvtps_pd(inRHS.mValue);
20 mValue.mHigh = _mm_cvtps_pd(_mm_shuffle_ps(inRHS.mValue, inRHS.mValue, _MM_SHUFFLE(2, 2, 2, 2)));
21#elif defined(JPH_USE_NEON)
22 mValue.val[0] = vcvt_f64_f32(vget_low_f32(inRHS.mValue));
23 mValue.val[1] = vcvt_high_f64_f32(inRHS.mValue);
24#elif defined(JPH_USE_RVV)
25 const vfloat32m1_t src = __riscv_vle32_v_f32m1(inRHS.mF32, 3);
26 const vfloat64m2_t widened = __riscv_vfwcvt_f_f_v_f64m2(src, 3);
27 __riscv_vse64_v_f64m2(mF64, widened, 3);
28#else
29 mF64[0] = (double)inRHS.GetX();
30 mF64[1] = (double)inRHS.GetY();
31 mF64[2] = (double)inRHS.GetZ();
32 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
33 mF64[3] = mF64[2];
34 #endif
35#endif
36}
37
39 DVec3(Vec3(inRHS))
40{
41}
42
43DVec3::DVec3(double inX, double inY, double inZ)
44{
45#if defined(JPH_USE_AVX)
46 mValue = _mm256_set_pd(inZ, inZ, inY, inX); // Assure Z and W are the same
47#elif defined(JPH_USE_SSE)
48 mValue.mLow = _mm_set_pd(inY, inX);
49 mValue.mHigh = _mm_set1_pd(inZ);
50#elif defined(JPH_USE_NEON)
51 mValue.val[0] = vcombine_f64(vcreate_f64(BitCast<uint64>(inX)), vcreate_f64(BitCast<uint64>(inY)));
52 mValue.val[1] = vdupq_n_f64(inZ);
53#elif defined(JPH_USE_RVV)
54 vfloat64m2_t v = __riscv_vfmv_v_f_f64m2(inZ, 4);
55 v = __riscv_vfslide1up_vf_f64m2(v, inY, 4);
56 v = __riscv_vfslide1up_vf_f64m2(v, inX, 4);
57 __riscv_vse64_v_f64m2(mF64, v, 4);
58#else
59 mF64[0] = inX;
60 mF64[1] = inY;
61 mF64[2] = inZ;
62 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
63 mF64[3] = mF64[2];
64 #endif
65#endif
66}
67
69{
70#if defined(JPH_USE_AVX)
71 Type x = _mm256_castpd128_pd256(_mm_load_sd(&inV.x));
72 Type y = _mm256_castpd128_pd256(_mm_load_sd(&inV.y));
73 Type z = _mm256_broadcast_sd(&inV.z);
74 Type xy = _mm256_unpacklo_pd(x, y);
75 mValue = _mm256_blend_pd(xy, z, 0b1100); // Assure Z and W are the same
76#elif defined(JPH_USE_SSE)
77 mValue.mLow = _mm_loadu_pd(&inV.x);
78 mValue.mHigh = _mm_set1_pd(inV.z);
79#elif defined(JPH_USE_NEON)
80 mValue.val[0] = vld1q_f64(&inV.x);
81 mValue.val[1] = vdupq_n_f64(inV.z);
82#elif defined(JPH_USE_RVV)
83 vfloat64m2_t v = __riscv_vle64_v_f64m2(&inV.x, 3);
84 __riscv_vse64_v_f64m2(mF64, v, 3);
85#else
86 mF64[0] = inV.x;
87 mF64[1] = inV.y;
88 mF64[2] = inV.z;
89 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
90 mF64[3] = mF64[2];
91 #endif
92#endif
93}
94
95void DVec3::CheckW() const
96{
97#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
98 // Avoid asserts when both components are NaN
99 JPH_ASSERT(reinterpret_cast<const uint64 *>(mF64)[2] == reinterpret_cast<const uint64 *>(mF64)[3]);
100#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
101}
102
105{
106#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
107 #if defined(JPH_USE_AVX)
108 return _mm256_shuffle_pd(inValue, inValue, 2);
109 #elif defined(JPH_USE_SSE)
110 Type value;
111 value.mLow = inValue.mLow;
112 value.mHigh = _mm_shuffle_pd(inValue.mHigh, inValue.mHigh, 0);
113 return value;
114 #elif defined(JPH_USE_NEON)
115 Type value;
116 value.val[0] = inValue.val[0];
117 value.val[1] = vdupq_laneq_f64(inValue.val[1], 0);
118 return value;
119 #elif defined(JPH_USE_RVV)
120 Type value;
121 const vfloat64m2_t buffer = __riscv_vle64_v_f64m2(inValue.mData, 3);
122 __riscv_vse64_v_f64m2(value.mData, buffer, 3);
123 value.mData[3] = value.mData[2];
124 return value;
125 #else
126 Type value;
127 value.mData[0] = inValue.mData[0];
128 value.mData[1] = inValue.mData[1];
129 value.mData[2] = inValue.mData[2];
130 value.mData[3] = inValue.mData[2];
131 return value;
132 #endif
133#else
134 return inValue;
135#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
136}
137
139{
140#if defined(JPH_USE_AVX)
141 return _mm256_setzero_pd();
142#elif defined(JPH_USE_SSE)
143 __m128d zero = _mm_setzero_pd();
144 return DVec3({ zero, zero });
145#elif defined(JPH_USE_NEON)
146 float64x2_t zero = vdupq_n_f64(0.0);
147 return DVec3({ zero, zero });
148#elif defined(JPH_USE_RVV)
149 DVec3 vec;
150 const vfloat64m2_t v = __riscv_vfmv_v_f_f64m2(0.0, 3);
151 __riscv_vse64_v_f64m2(vec.mF64, v, 3);
152 return vec;
153#else
154 return DVec3(0, 0, 0);
155#endif
156}
157
159{
160#if defined(JPH_USE_AVX)
161 return _mm256_set1_pd(inV);
162#elif defined(JPH_USE_SSE)
163 __m128d value = _mm_set1_pd(inV);
164 return DVec3({ value, value });
165#elif defined(JPH_USE_NEON)
166 float64x2_t value = vdupq_n_f64(inV);
167 return DVec3({ value, value });
168#elif defined(JPH_USE_RVV)
169 DVec3 vec;
170 const vfloat64m2_t v = __riscv_vfmv_v_f_f64m2(inV, 3);
171 __riscv_vse64_v_f64m2(vec.mF64, v, 3);
172 return vec;
173#else
174 return DVec3(inV, inV, inV);
175#endif
176}
177
179{
180 return sReplicate(1.0);
181}
182
184{
185 return sReplicate(numeric_limits<double>::quiet_NaN());
186}
187
189{
190#if defined(JPH_USE_AVX)
191 Type v = _mm256_loadu_pd(&inV.x);
192#elif defined(JPH_USE_SSE)
193 Type v;
194 v.mLow = _mm_loadu_pd(&inV.x);
195 v.mHigh = _mm_set1_pd(inV.z);
196#elif defined(JPH_USE_NEON)
197 Type v = vld1q_f64_x2(&inV.x);
198#elif defined(JPH_USE_RVV)
199 Type v;
200 const vfloat64m2_t vec = __riscv_vle64_v_f64m2(&inV.x, 3);
201 __riscv_vse64_v_f64m2(v.mData, vec, 3);
202#else
203 Type v = { inV.x, inV.y, inV.z };
204#endif
205 return sFixW(v);
206}
207
209{
210#if defined(JPH_USE_AVX)
211 _mm_storeu_pd(&outV->x, _mm256_castpd256_pd128(mValue));
212 outV->z = mF64[2];
213#elif defined(JPH_USE_SSE)
214 _mm_storeu_pd(&outV->x, mValue.mLow);
215 outV->z = mF64[2];
216#elif defined(JPH_USE_NEON)
217 vst1q_f64(&outV->x, mValue.val[0]);
218 outV->z = mF64[2];
219#elif defined(JPH_USE_RVV)
220 const vfloat64m2_t v = __riscv_vle64_v_f64m2(mF64, 3);
221 __riscv_vse64_v_f64m2(&outV->x, v, 3);
222#else
223 outV->x = mF64[0];
224 outV->y = mF64[1];
225 outV->z = mF64[2];
226#endif
227}
228
229DVec3::operator Vec3() const
230{
231#if defined(JPH_USE_AVX)
232 return _mm256_cvtpd_ps(mValue);
233#elif defined(JPH_USE_SSE)
234 __m128 low = _mm_cvtpd_ps(mValue.mLow);
235 __m128 high = _mm_cvtpd_ps(mValue.mHigh);
236 return _mm_shuffle_ps(low, high, _MM_SHUFFLE(1, 0, 1, 0));
237#elif defined(JPH_USE_NEON)
238 return vcvt_high_f32_f64(vcvtx_f32_f64(mValue.val[0]), mValue.val[1]);
239#elif defined(JPH_USE_RVV)
240 Vec3 v;
241 const vfloat64m2_t src = __riscv_vle64_v_f64m2(mF64, 3);
242 const vfloat32m1_t narrowed = __riscv_vfncvt_f_f_w_f32m1(src, 3);
243 __riscv_vse32_v_f32m1(v.mF32, narrowed, 3);
244 return v;
245#else
246 return Vec3((float)GetX(), (float)GetY(), (float)GetZ());
247#endif
248}
249
251{
252#if defined(JPH_USE_AVX)
253 return _mm256_min_pd(inV1.mValue, inV2.mValue);
254#elif defined(JPH_USE_SSE)
255 return DVec3({ _mm_min_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_min_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
256#elif defined(JPH_USE_NEON)
257 return DVec3({ vminq_f64(inV1.mValue.val[0], inV2.mValue.val[0]), vminq_f64(inV1.mValue.val[1], inV2.mValue.val[1]) });
258#elif defined(JPH_USE_RVV)
259 DVec3 res;
260 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.mF64, 3);
261 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
262 const vfloat64m2_t min = __riscv_vfmin_vv_f64m2(v1, v2, 3);
263 __riscv_vse64_v_f64m2(res.mF64, min, 3);
264 return res;
265#else
266 return DVec3(min(inV1.mF64[0], inV2.mF64[0]),
267 min(inV1.mF64[1], inV2.mF64[1]),
268 min(inV1.mF64[2], inV2.mF64[2]));
269#endif
270}
271
273{
274#if defined(JPH_USE_AVX)
275 return _mm256_max_pd(inV1.mValue, inV2.mValue);
276#elif defined(JPH_USE_SSE)
277 return DVec3({ _mm_max_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_max_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
278#elif defined(JPH_USE_NEON)
279 return DVec3({ vmaxq_f64(inV1.mValue.val[0], inV2.mValue.val[0]), vmaxq_f64(inV1.mValue.val[1], inV2.mValue.val[1]) });
280#elif defined(JPH_USE_RVV)
281 DVec3 res;
282 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.mF64, 3);
283 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
284 const vfloat64m2_t max = __riscv_vfmax_vv_f64m2(v1, v2, 3);
285 __riscv_vse64_v_f64m2(res.mF64, max, 3);
286 return res;
287#else
288 return DVec3(max(inV1.mF64[0], inV2.mF64[0]),
289 max(inV1.mF64[1], inV2.mF64[1]),
290 max(inV1.mF64[2], inV2.mF64[2]));
291#endif
292}
293
295{
296 return sMax(sMin(inV, inMax), inMin);
297}
298
300{
301#if defined(JPH_USE_AVX)
302 return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_EQ_OQ);
303#elif defined(JPH_USE_SSE)
304 return DVec3({ _mm_cmpeq_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpeq_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
305#elif defined(JPH_USE_NEON)
306 return DVec3({ vreinterpretq_f64_u64(vceqq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vceqq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
307#elif defined(JPH_USE_RVV)
308 DVec3 res;
309 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.mF64, 3);
310 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
311 const vbool32_t mask = __riscv_vmfeq_vv_f64m2_b32(v1, v2, 3);
312 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(cFalse, 3);
313 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros, cTrue, mask, 3);
314 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
315 return res;
316#else
317 return DVec3(inV1.mF64[0] == inV2.mF64[0]? cTrue : cFalse,
318 inV1.mF64[1] == inV2.mF64[1]? cTrue : cFalse,
319 inV1.mF64[2] == inV2.mF64[2]? cTrue : cFalse);
320#endif
321}
322
324{
325#if defined(JPH_USE_AVX)
326 return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_LT_OQ);
327#elif defined(JPH_USE_SSE)
328 return DVec3({ _mm_cmplt_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmplt_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
329#elif defined(JPH_USE_NEON)
330 return DVec3({ vreinterpretq_f64_u64(vcltq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vcltq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
331#elif defined(JPH_USE_RVV)
332 DVec3 res;
333 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.mF64, 3);
334 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
335 const vbool32_t mask = __riscv_vmflt_vv_f64m2_b32(v1, v2, 3);
336 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(cFalse, 3);
337 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros, cTrue, mask, 3);
338 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
339 return res;
340#else
341 return DVec3(inV1.mF64[0] < inV2.mF64[0]? cTrue : cFalse,
342 inV1.mF64[1] < inV2.mF64[1]? cTrue : cFalse,
343 inV1.mF64[2] < inV2.mF64[2]? cTrue : cFalse);
344#endif
345}
346
348{
349#if defined(JPH_USE_AVX)
350 return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_LE_OQ);
351#elif defined(JPH_USE_SSE)
352 return DVec3({ _mm_cmple_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmple_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
353#elif defined(JPH_USE_NEON)
354 return DVec3({ vreinterpretq_f64_u64(vcleq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vcleq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
355#elif defined(JPH_USE_RVV)
356 DVec3 res;
357 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.mF64, 3);
358 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
359 const vbool32_t mask = __riscv_vmfle_vv_f64m2_b32(v1, v2, 3);
360 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(cFalse, 3);
361 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros, cTrue, mask, 3);
362 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
363 return res;
364#else
365 return DVec3(inV1.mF64[0] <= inV2.mF64[0]? cTrue : cFalse,
366 inV1.mF64[1] <= inV2.mF64[1]? cTrue : cFalse,
367 inV1.mF64[2] <= inV2.mF64[2]? cTrue : cFalse);
368#endif
369}
370
372{
373#if defined(JPH_USE_AVX)
374 return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_GT_OQ);
375#elif defined(JPH_USE_SSE)
376 return DVec3({ _mm_cmpgt_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpgt_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
377#elif defined(JPH_USE_NEON)
378 return DVec3({ vreinterpretq_f64_u64(vcgtq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vcgtq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
379#elif defined(JPH_USE_RVV)
380 DVec3 res;
381 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.mF64, 3);
382 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
383 const vbool32_t mask = __riscv_vmfgt_vv_f64m2_b32(v1, v2, 3);
384 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(cFalse, 3);
385 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros, cTrue, mask, 3);
386 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
387 return res;
388#else
389 return DVec3(inV1.mF64[0] > inV2.mF64[0]? cTrue : cFalse,
390 inV1.mF64[1] > inV2.mF64[1]? cTrue : cFalse,
391 inV1.mF64[2] > inV2.mF64[2]? cTrue : cFalse);
392#endif
393}
394
396{
397#if defined(JPH_USE_AVX)
398 return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_GE_OQ);
399#elif defined(JPH_USE_SSE)
400 return DVec3({ _mm_cmpge_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpge_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
401#elif defined(JPH_USE_NEON)
402 return DVec3({ vreinterpretq_f64_u64(vcgeq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vcgeq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
403#elif defined(JPH_USE_RVV)
404 DVec3 res;
405 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV1.mF64, 3);
406 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
407 const vbool32_t mask = __riscv_vmfge_vv_f64m2_b32(v1, v2, 3);
408 const vfloat64m2_t zeros = __riscv_vfmv_v_f_f64m2(cFalse, 3);
409 const vfloat64m2_t merged = __riscv_vfmerge_vfm_f64m2(zeros, cTrue, mask, 3);
410 __riscv_vse64_v_f64m2(res.mF64, merged, 3);
411 return res;
412#else
413 return DVec3(inV1.mF64[0] >= inV2.mF64[0]? cTrue : cFalse,
414 inV1.mF64[1] >= inV2.mF64[1]? cTrue : cFalse,
415 inV1.mF64[2] >= inV2.mF64[2]? cTrue : cFalse);
416#endif
417}
418
420{
421#if defined(JPH_USE_AVX)
422 #ifdef JPH_USE_FMADD
423 return _mm256_fmadd_pd(inMul1.mValue, inMul2.mValue, inAdd.mValue);
424 #else
425 return _mm256_add_pd(_mm256_mul_pd(inMul1.mValue, inMul2.mValue), inAdd.mValue);
426 #endif
427#elif defined(JPH_USE_NEON)
428 return DVec3({ vmlaq_f64(inAdd.mValue.val[0], inMul1.mValue.val[0], inMul2.mValue.val[0]), vmlaq_f64(inAdd.mValue.val[1], inMul1.mValue.val[1], inMul2.mValue.val[1]) });
429#elif defined(JPH_USE_RVV)
430 DVec3 res;
431 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inMul1.mF64, 3);
432 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inMul2.mF64, 3);
433 const vfloat64m2_t rvv_add = __riscv_vle64_v_f64m2(inAdd.mF64, 3);
434 const vfloat64m2_t fmadd = __riscv_vfmacc_vv_f64m2(rvv_add, v1, v2, 3);
435 __riscv_vse64_v_f64m2(res.mF64, fmadd, 3);
436 return res;
437#else
438 return inMul1 * inMul2 + inAdd;
439#endif
440}
441
442DVec3 DVec3::sSelect(DVec3Arg inNotSet, DVec3Arg inSet, DVec3Arg inControl)
443{
444#if defined(JPH_USE_AVX)
445 return _mm256_blendv_pd(inNotSet.mValue, inSet.mValue, inControl.mValue);
446#elif defined(JPH_USE_SSE4_1)
447 Type v = { _mm_blendv_pd(inNotSet.mValue.mLow, inSet.mValue.mLow, inControl.mValue.mLow), _mm_blendv_pd(inNotSet.mValue.mHigh, inSet.mValue.mHigh, inControl.mValue.mHigh) };
448 return sFixW(v);
449#elif defined(JPH_USE_NEON)
450 Type v = { vbslq_f64(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_f64(inControl.mValue.val[0]), 63)), inSet.mValue.val[0], inNotSet.mValue.val[0]),
451 vbslq_f64(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_f64(inControl.mValue.val[1]), 63)), inSet.mValue.val[1], inNotSet.mValue.val[1]) };
452 return sFixW(v);
453#elif defined(JPH_USE_RVV)
454 DVec3 masked;
455 const vfloat64m2_t control_double = __riscv_vle64_v_f64m2(inControl.mF64, 3);
456 const vfloat64m2_t not_set = __riscv_vle64_v_f64m2(inNotSet.mF64, 3);
457 const vfloat64m2_t set = __riscv_vle64_v_f64m2(inSet.mF64, 3);
458 const vuint64m2_t control = __riscv_vreinterpret_v_f64m2_u64m2(control_double);
459
460 // Generate RVV bool mask from UVec4
461 const uint64 sign_bit_mask = 0x8000000000000000u;
462 const vuint64m2_t r = __riscv_vand_vx_u64m2(control, sign_bit_mask, 3);
463 const vbool32_t rvv_mask = __riscv_vmsne_vx_u64m2_b32(r, 0x0, 3);
464 const vfloat64m2_t merged = __riscv_vmerge_vvm_f64m2(not_set, set, rvv_mask, 3);
465 __riscv_vse64_v_f64m2(masked.mF64, merged, 3);
466 return masked;
467#else
468 DVec3 result;
469 for (int i = 0; i < 3; i++)
470 result.mF64[i] = (BitCast<uint64>(inControl.mF64[i]) & (uint64(1) << 63))? inSet.mF64[i] : inNotSet.mF64[i];
471#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
472 result.mF64[3] = result.mF64[2];
473#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
474 return result;
475#endif
476}
477
479{
480#if defined(JPH_USE_AVX)
481 return _mm256_or_pd(inV1.mValue, inV2.mValue);
482#elif defined(JPH_USE_SSE)
483 return DVec3({ _mm_or_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_or_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
484#elif defined(JPH_USE_NEON)
485 return DVec3({ vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(inV1.mValue.val[0]), vreinterpretq_u64_f64(inV2.mValue.val[0]))),
486 vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(inV1.mValue.val[1]), vreinterpretq_u64_f64(inV2.mValue.val[1]))) });
487#elif defined(JPH_USE_RVV)
488 DVec3 or_result;
489 const vuint64m2_t v1 = __riscv_vle64_v_u64m2(reinterpret_cast<const uint64 *>(inV1.mF64), 3);
490 const vuint64m2_t v2 = __riscv_vle64_v_u64m2(reinterpret_cast<const uint64 *>(inV2.mF64), 3);
491 const vuint64m2_t res = __riscv_vor_vv_u64m2(v1, v2, 3);
492 __riscv_vse64_v_u64m2(reinterpret_cast<uint64 *>(or_result.mF64), res, 3);
493 return or_result;
494#else
495 return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) | BitCast<uint64>(inV2.mF64[0])),
498#endif
499}
500
502{
503#if defined(JPH_USE_AVX)
504 return _mm256_xor_pd(inV1.mValue, inV2.mValue);
505#elif defined(JPH_USE_SSE)
506 return DVec3({ _mm_xor_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_xor_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
507#elif defined(JPH_USE_NEON)
508 return DVec3({ vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(inV1.mValue.val[0]), vreinterpretq_u64_f64(inV2.mValue.val[0]))),
509 vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(inV1.mValue.val[1]), vreinterpretq_u64_f64(inV2.mValue.val[1]))) });
510#elif defined(JPH_USE_RVV)
511 DVec3 xor_result;
512 const vuint64m2_t v1 = __riscv_vle64_v_u64m2(reinterpret_cast<const uint64 *>(inV1.mF64), 3);
513 const vuint64m2_t v2 = __riscv_vle64_v_u64m2(reinterpret_cast<const uint64 *>(inV2.mF64), 3);
514 const vuint64m2_t res = __riscv_vxor_vv_u64m2(v1, v2, 3);
515 __riscv_vse64_v_u64m2(reinterpret_cast<uint64 *>(xor_result.mF64), res, 3);
516 return xor_result;
517#else
518 return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) ^ BitCast<uint64>(inV2.mF64[0])),
521#endif
522}
523
525{
526#if defined(JPH_USE_AVX)
527 return _mm256_and_pd(inV1.mValue, inV2.mValue);
528#elif defined(JPH_USE_SSE)
529 return DVec3({ _mm_and_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_and_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
530#elif defined(JPH_USE_NEON)
531 return DVec3({ vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(inV1.mValue.val[0]), vreinterpretq_u64_f64(inV2.mValue.val[0]))),
532 vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(inV1.mValue.val[1]), vreinterpretq_u64_f64(inV2.mValue.val[1]))) });
533#elif defined(JPH_USE_RVV)
534 DVec3 and_result;
535 const vuint64m2_t v1 = __riscv_vle64_v_u64m2(reinterpret_cast<const uint64 *>(inV1.mF64), 3);
536 const vuint64m2_t v2 = __riscv_vle64_v_u64m2(reinterpret_cast<const uint64 *>(inV2.mF64), 3);
537 const vuint64m2_t res = __riscv_vand_vv_u64m2(v1, v2, 3);
538 __riscv_vse64_v_u64m2(reinterpret_cast<uint64 *>(and_result.mF64), res, 3);
539 return and_result;
540#else
541 return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) & BitCast<uint64>(inV2.mF64[0])),
544#endif
545}
546
548{
549#if defined(JPH_USE_AVX)
550 return _mm256_movemask_pd(mValue) & 0x7;
551#elif defined(JPH_USE_SSE)
552 return (_mm_movemask_pd(mValue.mLow) + (_mm_movemask_pd(mValue.mHigh) << 2)) & 0x7;
553#else
554 return int((BitCast<uint64>(mF64[0]) >> 63) | ((BitCast<uint64>(mF64[1]) >> 63) << 1) | ((BitCast<uint64>(mF64[2]) >> 63) << 2));
555#endif
556}
557
559{
560 return GetTrues() != 0;
561}
562
564{
565 return GetTrues() == 0x7;
566}
567
569{
570 return sEquals(*this, inV2).TestAllTrue();
571}
572
573bool DVec3::IsClose(DVec3Arg inV2, double inMaxDistSq) const
574{
575 return (inV2 - *this).LengthSq() <= inMaxDistSq;
576}
577
578bool DVec3::IsNearZero(double inMaxDistSq) const
579{
580 return LengthSq() <= inMaxDistSq;
581}
582
584{
585#if defined(JPH_USE_AVX)
586 return _mm256_mul_pd(mValue, inV2.mValue);
587#elif defined(JPH_USE_SSE)
588 return DVec3({ _mm_mul_pd(mValue.mLow, inV2.mValue.mLow), _mm_mul_pd(mValue.mHigh, inV2.mValue.mHigh) });
589#elif defined(JPH_USE_NEON)
590 return DVec3({ vmulq_f64(mValue.val[0], inV2.mValue.val[0]), vmulq_f64(mValue.val[1], inV2.mValue.val[1]) });
591#elif defined(JPH_USE_RVV)
592 DVec3 res;
593 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
594 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
595 const vfloat64m2_t mul = __riscv_vfmul_vv_f64m2(v1, v2, 3);
596 __riscv_vse64_v_f64m2(res.mF64, mul, 3);
597 return res;
598#else
599 return DVec3(mF64[0] * inV2.mF64[0], mF64[1] * inV2.mF64[1], mF64[2] * inV2.mF64[2]);
600#endif
601}
602
603DVec3 DVec3::operator * (double inV2) const
604{
605#if defined(JPH_USE_AVX)
606 return _mm256_mul_pd(mValue, _mm256_set1_pd(inV2));
607#elif defined(JPH_USE_SSE)
608 __m128d v = _mm_set1_pd(inV2);
609 return DVec3({ _mm_mul_pd(mValue.mLow, v), _mm_mul_pd(mValue.mHigh, v) });
610#elif defined(JPH_USE_NEON)
611 return DVec3({ vmulq_n_f64(mValue.val[0], inV2), vmulq_n_f64(mValue.val[1], inV2) });
612#elif defined(JPH_USE_RVV)
613 DVec3 res;
614 const vfloat64m2_t src = __riscv_vle64_v_f64m2(mF64, 3);
615 const vfloat64m2_t mul = __riscv_vfmul_vf_f64m2(src, inV2, 3);
616 __riscv_vse64_v_f64m2(res.mF64, mul, 3);
617 return res;
618#else
619 return DVec3(mF64[0] * inV2, mF64[1] * inV2, mF64[2] * inV2);
620#endif
621}
622
623DVec3 operator * (double inV1, DVec3Arg inV2)
624{
625#if defined(JPH_USE_AVX)
626 return _mm256_mul_pd(_mm256_set1_pd(inV1), inV2.mValue);
627#elif defined(JPH_USE_SSE)
628 __m128d v = _mm_set1_pd(inV1);
629 return DVec3({ _mm_mul_pd(v, inV2.mValue.mLow), _mm_mul_pd(v, inV2.mValue.mHigh) });
630#elif defined(JPH_USE_NEON)
631 return DVec3({ vmulq_n_f64(inV2.mValue.val[0], inV1), vmulq_n_f64(inV2.mValue.val[1], inV1) });
632#elif defined(JPH_USE_RVV)
633 DVec3 res;
634 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
635 const vfloat64m2_t mul = __riscv_vfmul_vf_f64m2(v1, inV1, 3);
636 __riscv_vse64_v_f64m2(res.mF64, mul, 3);
637 return res;
638#else
639 return DVec3(inV1 * inV2.mF64[0], inV1 * inV2.mF64[1], inV1 * inV2.mF64[2]);
640#endif
641}
642
643DVec3 DVec3::operator / (double inV2) const
644{
645#if defined(JPH_USE_AVX)
646 return _mm256_div_pd(mValue, _mm256_set1_pd(inV2));
647#elif defined(JPH_USE_SSE)
648 __m128d v = _mm_set1_pd(inV2);
649 return DVec3({ _mm_div_pd(mValue.mLow, v), _mm_div_pd(mValue.mHigh, v) });
650#elif defined(JPH_USE_NEON)
651 float64x2_t v = vdupq_n_f64(inV2);
652 return DVec3({ vdivq_f64(mValue.val[0], v), vdivq_f64(mValue.val[1], v) });
653#elif defined(JPH_USE_RVV)
654 DVec3 res;
655 const vfloat64m2_t src = __riscv_vle64_v_f64m2(mF64, 3);
656 const vfloat64m2_t div = __riscv_vfdiv_vf_f64m2(src, inV2, 3);
657 __riscv_vse64_v_f64m2(res.mF64, div, 3);
658 return res;
659#else
660 return DVec3(mF64[0] / inV2, mF64[1] / inV2, mF64[2] / inV2);
661#endif
662}
663
665{
666#if defined(JPH_USE_AVX)
667 mValue = _mm256_mul_pd(mValue, _mm256_set1_pd(inV2));
668#elif defined(JPH_USE_SSE)
669 __m128d v = _mm_set1_pd(inV2);
670 mValue.mLow = _mm_mul_pd(mValue.mLow, v);
671 mValue.mHigh = _mm_mul_pd(mValue.mHigh, v);
672#elif defined(JPH_USE_NEON)
673 mValue.val[0] = vmulq_n_f64(mValue.val[0], inV2);
674 mValue.val[1] = vmulq_n_f64(mValue.val[1], inV2);
675#elif defined(JPH_USE_RVV)
676 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
677 const vfloat64m2_t res = __riscv_vfmul_vf_f64m2(v1, inV2, 3);
678 __riscv_vse64_v_f64m2(mF64, res, 3);
679#else
680 for (int i = 0; i < 3; ++i)
681 mF64[i] *= inV2;
682 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
683 mF64[3] = mF64[2];
684 #endif
685#endif
686 return *this;
687}
688
690{
691#if defined(JPH_USE_AVX)
692 mValue = _mm256_mul_pd(mValue, inV2.mValue);
693#elif defined(JPH_USE_SSE)
694 mValue.mLow = _mm_mul_pd(mValue.mLow, inV2.mValue.mLow);
695 mValue.mHigh = _mm_mul_pd(mValue.mHigh, inV2.mValue.mHigh);
696#elif defined(JPH_USE_NEON)
697 mValue.val[0] = vmulq_f64(mValue.val[0], inV2.mValue.val[0]);
698 mValue.val[1] = vmulq_f64(mValue.val[1], inV2.mValue.val[1]);
699#elif defined(JPH_USE_RVV)
700 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
701 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
702 const vfloat64m2_t rvv_res = __riscv_vfmul_vv_f64m2(v1, v2, 3);
703 __riscv_vse64_v_f64m2(mF64, rvv_res, 3);
704#else
705 for (int i = 0; i < 3; ++i)
706 mF64[i] *= inV2.mF64[i];
707 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
708 mF64[3] = mF64[2];
709 #endif
710#endif
711 return *this;
712}
713
715{
716#if defined(JPH_USE_AVX)
717 mValue = _mm256_div_pd(mValue, _mm256_set1_pd(inV2));
718#elif defined(JPH_USE_SSE)
719 __m128d v = _mm_set1_pd(inV2);
720 mValue.mLow = _mm_div_pd(mValue.mLow, v);
721 mValue.mHigh = _mm_div_pd(mValue.mHigh, v);
722#elif defined(JPH_USE_NEON)
723 float64x2_t v = vdupq_n_f64(inV2);
724 mValue.val[0] = vdivq_f64(mValue.val[0], v);
725 mValue.val[1] = vdivq_f64(mValue.val[1], v);
726#elif defined(JPH_USE_RVV)
727 const vfloat64m2_t v = __riscv_vle64_v_f64m2(mF64, 3);
728 const vfloat64m2_t res = __riscv_vfdiv_vf_f64m2(v, inV2, 3);
729 __riscv_vse64_v_f64m2(mF64, res, 3);
730#else
731 for (int i = 0; i < 3; ++i)
732 mF64[i] /= inV2;
733 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
734 mF64[3] = mF64[2];
735 #endif
736#endif
737 return *this;
738}
739
741{
742#if defined(JPH_USE_AVX)
743 return _mm256_add_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
744#elif defined(JPH_USE_SSE)
745 return DVec3({ _mm_add_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue)), _mm_add_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
746#elif defined(JPH_USE_NEON)
747 return DVec3({ vaddq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue))), vaddq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue)) });
748#elif defined(JPH_USE_RVV)
749 DVec3 res;
750 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
751 const vfloat32m1_t v2_f32 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
752 const vfloat64m2_t v2 = __riscv_vfwcvt_f_f_v_f64m2(v2_f32, 3);
753 const vfloat64m2_t rvv_add = __riscv_vfadd_vv_f64m2(v1, v2, 3);
754 __riscv_vse64_v_f64m2(res.mF64, rvv_add, 3);
755 return res;
756#else
757 return DVec3(mF64[0] + inV2.mF32[0], mF64[1] + inV2.mF32[1], mF64[2] + inV2.mF32[2]);
758#endif
759}
760
762{
763#if defined(JPH_USE_AVX)
764 return _mm256_add_pd(mValue, inV2.mValue);
765#elif defined(JPH_USE_SSE)
766 return DVec3({ _mm_add_pd(mValue.mLow, inV2.mValue.mLow), _mm_add_pd(mValue.mHigh, inV2.mValue.mHigh) });
767#elif defined(JPH_USE_NEON)
768 return DVec3({ vaddq_f64(mValue.val[0], inV2.mValue.val[0]), vaddq_f64(mValue.val[1], inV2.mValue.val[1]) });
769#elif defined(JPH_USE_RVV)
770 DVec3 res;
771 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
772 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
773 const vfloat64m2_t rvv_add = __riscv_vfadd_vv_f64m2(v1, v2, 3);
774 __riscv_vse64_v_f64m2(res.mF64, rvv_add, 3);
775 return res;
776#else
777 return DVec3(mF64[0] + inV2.mF64[0], mF64[1] + inV2.mF64[1], mF64[2] + inV2.mF64[2]);
778#endif
779}
780
782{
783#if defined(JPH_USE_AVX)
784 mValue = _mm256_add_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
785#elif defined(JPH_USE_SSE)
786 mValue.mLow = _mm_add_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue));
787 mValue.mHigh = _mm_add_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
788#elif defined(JPH_USE_NEON)
789 mValue.val[0] = vaddq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue)));
790 mValue.val[1] = vaddq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue));
791#elif defined(JPH_USE_RVV)
792 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
793 const vfloat32m1_t v2_f32 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
794 const vfloat64m2_t v2 = __riscv_vfwcvt_f_f_v_f64m2(v2_f32, 3);
795 const vfloat64m2_t rvv_add = __riscv_vfadd_vv_f64m2(v1, v2, 3);
796 __riscv_vse64_v_f64m2(mF64, rvv_add, 3);
797#else
798 for (int i = 0; i < 3; ++i)
799 mF64[i] += inV2.mF32[i];
800 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
801 mF64[3] = mF64[2];
802 #endif
803#endif
804 return *this;
805}
806
808{
809#if defined(JPH_USE_AVX)
810 mValue = _mm256_add_pd(mValue, inV2.mValue);
811#elif defined(JPH_USE_SSE)
812 mValue.mLow = _mm_add_pd(mValue.mLow, inV2.mValue.mLow);
813 mValue.mHigh = _mm_add_pd(mValue.mHigh, inV2.mValue.mHigh);
814#elif defined(JPH_USE_NEON)
815 mValue.val[0] = vaddq_f64(mValue.val[0], inV2.mValue.val[0]);
816 mValue.val[1] = vaddq_f64(mValue.val[1], inV2.mValue.val[1]);
817#elif defined(JPH_USE_RVV)
818 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
819 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
820 const vfloat64m2_t rvv_add = __riscv_vfadd_vv_f64m2(v1, v2, 3);
821 __riscv_vse64_v_f64m2(mF64, rvv_add, 3);
822#else
823 for (int i = 0; i < 3; ++i)
824 mF64[i] += inV2.mF64[i];
825 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
826 mF64[3] = mF64[2];
827 #endif
828#endif
829 return *this;
830}
831
833{
834#if defined(JPH_USE_AVX)
835 return _mm256_sub_pd(_mm256_setzero_pd(), mValue);
836#elif defined(JPH_USE_SSE)
837 __m128d zero = _mm_setzero_pd();
838 return DVec3({ _mm_sub_pd(zero, mValue.mLow), _mm_sub_pd(zero, mValue.mHigh) });
839#elif defined(JPH_USE_NEON)
840 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
841 float64x2_t zero = vdupq_n_f64(0);
842 return DVec3({ vsubq_f64(zero, mValue.val[0]), vsubq_f64(zero, mValue.val[1]) });
843 #else
844 return DVec3({ vnegq_f64(mValue.val[0]), vnegq_f64(mValue.val[1]) });
845 #endif
846#elif defined(JPH_USE_RVV)
847 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
848 DVec3 res;
849 const vfloat64m2_t rvv_zero = __riscv_vfmv_v_f_f64m2(0.0, 3);
850 const vfloat64m2_t v = __riscv_vle64_v_f64m2(mF64, 3);
851 const vfloat64m2_t rvv_neg = __riscv_vfsub_vv_f64m2(rvv_zero, v, 3);
852 __riscv_vse64_v_f64m2(res.mF64, rvv_neg, 3);
853 return res;
854 #else
855 DVec3 res;
856 const vfloat64m2_t v = __riscv_vle64_v_f64m2(mF64, 3);
857 const vfloat64m2_t rvv_neg = __riscv_vfsgnjn_vv_f64m2(v, v, 3);
858 __riscv_vse64_v_f64m2(res.mF64, rvv_neg, 3);
859 return res;
860 #endif
861#else
862 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
863 return DVec3(0.0 - mF64[0], 0.0 - mF64[1], 0.0 - mF64[2]);
864 #else
865 return DVec3(-mF64[0], -mF64[1], -mF64[2]);
866 #endif
867#endif
868}
869
871{
872#if defined(JPH_USE_AVX)
873 return _mm256_sub_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
874#elif defined(JPH_USE_SSE)
875 return DVec3({ _mm_sub_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue)), _mm_sub_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
876#elif defined(JPH_USE_NEON)
877 return DVec3({ vsubq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue))), vsubq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue)) });
878#elif defined(JPH_USE_RVV)
879 DVec3 res;
880 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
881 const vfloat32m1_t v2_f32 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
882 const vfloat64m2_t v2 = __riscv_vfwcvt_f_f_v_f64m2(v2_f32, 3);
883 const vfloat64m2_t rvv_sub = __riscv_vfsub_vv_f64m2(v1, v2, 3);
884 __riscv_vse64_v_f64m2(res.mF64, rvv_sub, 3);
885 return res;
886#else
887 return DVec3(mF64[0] - inV2.mF32[0], mF64[1] - inV2.mF32[1], mF64[2] - inV2.mF32[2]);
888#endif
889}
890
892{
893#if defined(JPH_USE_AVX)
894 return _mm256_sub_pd(mValue, inV2.mValue);
895#elif defined(JPH_USE_SSE)
896 return DVec3({ _mm_sub_pd(mValue.mLow, inV2.mValue.mLow), _mm_sub_pd(mValue.mHigh, inV2.mValue.mHigh) });
897#elif defined(JPH_USE_NEON)
898 return DVec3({ vsubq_f64(mValue.val[0], inV2.mValue.val[0]), vsubq_f64(mValue.val[1], inV2.mValue.val[1]) });
899#elif defined(JPH_USE_RVV)
900 DVec3 res;
901 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
902 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
903 const vfloat64m2_t rvv_sub = __riscv_vfsub_vv_f64m2(v1, v2, 3);
904 __riscv_vse64_v_f64m2(res.mF64, rvv_sub, 3);
905 return res;
906#else
907 return DVec3(mF64[0] - inV2.mF64[0], mF64[1] - inV2.mF64[1], mF64[2] - inV2.mF64[2]);
908#endif
909}
910
912{
913#if defined(JPH_USE_AVX)
914 mValue = _mm256_sub_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
915#elif defined(JPH_USE_SSE)
916 mValue.mLow = _mm_sub_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue));
917 mValue.mHigh = _mm_sub_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
918#elif defined(JPH_USE_NEON)
919 mValue.val[0] = vsubq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue)));
920 mValue.val[1] = vsubq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue));
921#elif defined(JPH_USE_RVV)
922 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
923 const vfloat32m1_t v2_f32 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
924 const vfloat64m2_t v2 = __riscv_vfwcvt_f_f_v_f64m2(v2_f32, 3);
925 const vfloat64m2_t rvv_sub = __riscv_vfsub_vv_f64m2(v1, v2, 3);
926 __riscv_vse64_v_f64m2(mF64, rvv_sub, 3);
927#else
928 for (int i = 0; i < 3; ++i)
929 mF64[i] -= inV2.mF32[i];
930 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
931 mF64[3] = mF64[2];
932 #endif
933#endif
934 return *this;
935}
936
938{
939#if defined(JPH_USE_AVX)
940 mValue = _mm256_sub_pd(mValue, inV2.mValue);
941#elif defined(JPH_USE_SSE)
942 mValue.mLow = _mm_sub_pd(mValue.mLow, inV2.mValue.mLow);
943 mValue.mHigh = _mm_sub_pd(mValue.mHigh, inV2.mValue.mHigh);
944#elif defined(JPH_USE_NEON)
945 mValue.val[0] = vsubq_f64(mValue.val[0], inV2.mValue.val[0]);
946 mValue.val[1] = vsubq_f64(mValue.val[1], inV2.mValue.val[1]);
947#elif defined(JPH_USE_RVV)
948 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
949 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
950 const vfloat64m2_t rvv_sub = __riscv_vfsub_vv_f64m2(v1, v2, 3);
951 __riscv_vse64_v_f64m2(mF64, rvv_sub, 3);
952#else
953 for (int i = 0; i < 3; ++i)
954 mF64[i] -= inV2.mF64[i];
955 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
956 mF64[3] = mF64[2];
957 #endif
958#endif
959 return *this;
960}
961
963{
964 inV2.CheckW();
965#if defined(JPH_USE_AVX)
966 return _mm256_div_pd(mValue, inV2.mValue);
967#elif defined(JPH_USE_SSE)
968 return DVec3({ _mm_div_pd(mValue.mLow, inV2.mValue.mLow), _mm_div_pd(mValue.mHigh, inV2.mValue.mHigh) });
969#elif defined(JPH_USE_NEON)
970 return DVec3({ vdivq_f64(mValue.val[0], inV2.mValue.val[0]), vdivq_f64(mValue.val[1], inV2.mValue.val[1]) });
971#elif defined(JPH_USE_RVV)
972 DVec3 res;
973 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
974 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
975 const vfloat64m2_t rvv_div = __riscv_vfdiv_vv_f64m2(v1, v2, 3);
976 __riscv_vse64_v_f64m2(res.mF64, rvv_div, 3);
977 return res;
978#else
979 return DVec3(mF64[0] / inV2.mF64[0], mF64[1] / inV2.mF64[1], mF64[2] / inV2.mF64[2]);
980#endif
981}
982
984{
985#if defined(JPH_USE_AVX512)
986 return _mm256_range_pd(mValue, mValue, 0b1000);
987#elif defined(JPH_USE_AVX)
988 return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(), mValue), mValue);
989#elif defined(JPH_USE_SSE)
990 __m128d zero = _mm_setzero_pd();
991 return DVec3({ _mm_max_pd(_mm_sub_pd(zero, mValue.mLow), mValue.mLow), _mm_max_pd(_mm_sub_pd(zero, mValue.mHigh), mValue.mHigh) });
992#elif defined(JPH_USE_NEON)
993 return DVec3({ vabsq_f64(mValue.val[0]), vabsq_f64(mValue.val[1]) });
994#elif defined(JPH_USE_RVV)
995 DVec3 res;
996 const vfloat64m2_t v = __riscv_vle64_v_f64m2(mF64, 3);
997 const vfloat64m2_t rvv_abs = __riscv_vfsgnj_vf_f64m2(v, 1.0, 3);
998 __riscv_vse64_v_f64m2(res.mF64, rvv_abs, 3);
999 return res;
1000#else
1001 return DVec3(abs(mF64[0]), abs(mF64[1]), abs(mF64[2]));
1002#endif
1003}
1004
1006{
1007 return sOne() / mValue;
1008}
1009
1011{
1012#if defined(JPH_USE_AVX2)
1013 __m256d t1 = _mm256_permute4x64_pd(inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
1014 t1 = _mm256_mul_pd(t1, mValue);
1015 __m256d t2 = _mm256_permute4x64_pd(mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
1016 t2 = _mm256_mul_pd(t2, inV2.mValue);
1017 __m256d t3 = _mm256_sub_pd(t1, t2);
1018 return _mm256_permute4x64_pd(t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
1019#elif defined(JPH_USE_RVV)
1020 const uint64 indices[3] = { 1, 2, 0 };
1021 const vuint64m2_t gather_indices = __riscv_vle64_v_u64m2(indices, 3);
1022 const vfloat64m2_t v0 = __riscv_vle64_v_f64m2(mF64, 3);
1023 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
1024 vfloat64m2_t t0 = __riscv_vrgather_vv_f64m2(v1, gather_indices, 3);
1025 t0 = __riscv_vfmul_vv_f64m2(t0, v0, 3);
1026 vfloat64m2_t t1 = __riscv_vrgather_vv_f64m2(v0, gather_indices, 3);
1027 t1 = __riscv_vfmul_vv_f64m2(t1, v1, 3);
1028 const vfloat64m2_t sub = __riscv_vfsub_vv_f64m2(t0, t1, 3);
1029 const vfloat64m2_t cross = __riscv_vrgather_vv_f64m2(sub, gather_indices, 3);
1030
1031 DVec3 cross_result;
1032 __riscv_vse64_v_f64m2(cross_result.mF64, cross, 3);
1033 return cross_result;
1034#else
1035 return DVec3(mF64[1] * inV2.mF64[2] - mF64[2] * inV2.mF64[1],
1036 mF64[2] * inV2.mF64[0] - mF64[0] * inV2.mF64[2],
1037 mF64[0] * inV2.mF64[1] - mF64[1] * inV2.mF64[0]);
1038#endif
1039}
1040
1041double DVec3::Dot(DVec3Arg inV2) const
1042{
1043#if defined(JPH_USE_AVX)
1044 __m256d mul = _mm256_mul_pd(mValue, inV2.mValue);
1045 __m128d xy = _mm256_castpd256_pd128(mul);
1046 __m128d yx = _mm_shuffle_pd(xy, xy, 1);
1047 __m128d sum = _mm_add_pd(xy, yx);
1048 __m128d zw = _mm256_extractf128_pd(mul, 1);
1049 sum = _mm_add_pd(sum, zw);
1050 return _mm_cvtsd_f64(sum);
1051#elif defined(JPH_USE_SSE)
1052 __m128d xy = _mm_mul_pd(mValue.mLow, inV2.mValue.mLow);
1053 __m128d yx = _mm_shuffle_pd(xy, xy, 1);
1054 __m128d sum = _mm_add_pd(xy, yx);
1055 __m128d z = _mm_mul_sd(mValue.mHigh, inV2.mValue.mHigh);
1056 sum = _mm_add_pd(sum, z);
1057 return _mm_cvtsd_f64(sum);
1058#elif defined(JPH_USE_NEON)
1059 float64x2_t mul_low = vmulq_f64(mValue.val[0], inV2.mValue.val[0]);
1060 float64x2_t mul_high = vmulq_f64(mValue.val[1], inV2.mValue.val[1]);
1061 return vaddvq_f64(mul_low) + vgetq_lane_f64(mul_high, 0);
1062#elif defined(JPH_USE_RVV)
1063 const vfloat64m1_t zeros = __riscv_vfmv_v_f_f64m1(0.0, 3);
1064 const vfloat64m2_t v1 = __riscv_vle64_v_f64m2(mF64, 3);
1065 const vfloat64m2_t v2 = __riscv_vle64_v_f64m2(inV2.mF64, 3);
1066 const vfloat64m2_t mul = __riscv_vfmul_vv_f64m2(v1, v2, 3);
1067 const vfloat64m1_t sum = __riscv_vfredosum_vs_f64m2_f64m1(mul, zeros, 3);
1068 return __riscv_vfmv_f_s_f64m1_f64(sum);
1069#else
1070 double dot = 0.0;
1071 for (int i = 0; i < 3; i++)
1072 dot += mF64[i] * inV2.mF64[i];
1073 return dot;
1074#endif
1075}
1076
1077double DVec3::LengthSq() const
1078{
1079 return Dot(*this);
1080}
1081
1083{
1084#if defined(JPH_USE_AVX)
1085 return _mm256_sqrt_pd(mValue);
1086#elif defined(JPH_USE_SSE)
1087 return DVec3({ _mm_sqrt_pd(mValue.mLow), _mm_sqrt_pd(mValue.mHigh) });
1088#elif defined(JPH_USE_NEON)
1089 return DVec3({ vsqrtq_f64(mValue.val[0]), vsqrtq_f64(mValue.val[1]) });
1090#elif defined(JPH_USE_RVV)
1091 DVec3 res;
1092 const vfloat64m2_t v = __riscv_vle64_v_f64m2(mF64, 3);
1093 const vfloat64m2_t rvv_sqrt = __riscv_vfsqrt_v_f64m2(v, 3);
1094 __riscv_vse64_v_f64m2(res.mF64, rvv_sqrt, 3);
1095 return res;
1096#else
1097 return DVec3(sqrt(mF64[0]), sqrt(mF64[1]), sqrt(mF64[2]));
1098#endif
1099}
1100
1101double DVec3::Length() const
1102{
1103 return sqrt(Dot(*this));
1104}
1105
1107{
1108 return *this / Length();
1109}
1110
1111bool DVec3::IsNormalized(double inTolerance) const
1112{
1113 return abs(LengthSq() - 1.0) <= inTolerance;
1114}
1115
1116bool DVec3::IsNaN() const
1117{
1118#if defined(JPH_USE_AVX512)
1119 return (_mm256_fpclass_pd_mask(mValue, 0b10000001) & 0x7) != 0;
1120#elif defined(JPH_USE_AVX)
1121 return (_mm256_movemask_pd(_mm256_cmp_pd(mValue, mValue, _CMP_UNORD_Q)) & 0x7) != 0;
1122#elif defined(JPH_USE_SSE)
1123 return ((_mm_movemask_pd(_mm_cmpunord_pd(mValue.mLow, mValue.mLow)) + (_mm_movemask_pd(_mm_cmpunord_pd(mValue.mHigh, mValue.mHigh)) << 2)) & 0x7) != 0;
1124#elif defined(JPH_USE_RVV)
1125 const vfloat64m2_t v = __riscv_vle64_v_f64m2(mF64, 3);
1126 const vbool32_t mask = __riscv_vmfeq_vv_f64m2_b32(v, v, 3);
1127 const uint32 eq = __riscv_vcpop_m_b32(mask, 3);
1128 return eq != 3;
1129#else
1130 return isnan(mF64[0]) || isnan(mF64[1]) || isnan(mF64[2]);
1131#endif
1132}
1133
1135{
1136#if defined(JPH_USE_AVX512)
1137 return _mm256_fixupimm_pd(mValue, mValue, _mm256_set1_epi32(0xA9A90A00), 0);
1138#elif defined(JPH_USE_AVX)
1139 __m256d minus_one = _mm256_set1_pd(-1.0);
1140 __m256d one = _mm256_set1_pd(1.0);
1141 return _mm256_or_pd(_mm256_and_pd(mValue, minus_one), one);
1142#elif defined(JPH_USE_SSE)
1143 __m128d minus_one = _mm_set1_pd(-1.0);
1144 __m128d one = _mm_set1_pd(1.0);
1145 return DVec3({ _mm_or_pd(_mm_and_pd(mValue.mLow, minus_one), one), _mm_or_pd(_mm_and_pd(mValue.mHigh, minus_one), one) });
1146#elif defined(JPH_USE_NEON)
1147 uint64x2_t minus_one = vreinterpretq_u64_f64(vdupq_n_f64(-1.0f));
1148 uint64x2_t one = vreinterpretq_u64_f64(vdupq_n_f64(1.0f));
1149 return DVec3({ vreinterpretq_f64_u64(vorrq_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[0]), minus_one), one)),
1150 vreinterpretq_f64_u64(vorrq_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[1]), minus_one), one)) });
1151#elif defined(JPH_USE_RVV)
1152 DVec3 res;
1153 const vfloat64m2_t rvv_in = __riscv_vle64_v_f64m2(mF64, 3);
1154 const vfloat64m2_t rvv_one = __riscv_vfmv_v_f_f64m2(1.0, 3);
1155 const vfloat64m2_t rvv_signs = __riscv_vfsgnj_vv_f64m2(rvv_one, rvv_in, 3);
1156 __riscv_vse64_v_f64m2(res.mF64, rvv_signs, 3);
1157 return res;
1158#else
1159 return DVec3(std::signbit(mF64[0])? -1.0 : 1.0,
1160 std::signbit(mF64[1])? -1.0 : 1.0,
1161 std::signbit(mF64[2])? -1.0 : 1.0);
1162#endif
1163}
1164
1166{
1167 // Float has 23 bit mantissa, double 52 bit mantissa => we lose 29 bits when converting from double to float
1168 constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
1169
1170#if defined(JPH_USE_AVX)
1171 return _mm256_and_pd(mValue, _mm256_castsi256_pd(_mm256_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss))));
1172#elif defined(JPH_USE_SSE)
1173 __m128d mask = _mm_castsi128_pd(_mm_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss)));
1174 return DVec3({ _mm_and_pd(mValue.mLow, mask), _mm_and_pd(mValue.mHigh, mask) });
1175#elif defined(JPH_USE_NEON)
1176 uint64x2_t mask = vdupq_n_u64(~cDoubleToFloatMantissaLoss);
1177 return DVec3({ vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[0]), mask)),
1178 vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[1]), mask)) });
1179#elif defined(JPH_USE_RVV)
1180 const vfloat64m2_t dvec = __riscv_vle64_v_f64m2(mF64, 3);
1181 const vuint64m2_t dvec_u64 = __riscv_vreinterpret_v_f64m2_u64m2(dvec);
1182 const vuint64m2_t chopped = __riscv_vand_vx_u64m2(dvec_u64, ~cDoubleToFloatMantissaLoss, 3);
1183 const vfloat64m2_t chopped_f64 = __riscv_vreinterpret_v_u64m2_f64m2(chopped);
1184
1185 DVec3 res;
1186 __riscv_vse64_v_f64m2(res.mF64, chopped_f64, 3);
1187 return res;
1188#else
1189 double x = BitCast<double>(BitCast<uint64>(mF64[0]) & ~cDoubleToFloatMantissaLoss);
1190 double y = BitCast<double>(BitCast<uint64>(mF64[1]) & ~cDoubleToFloatMantissaLoss);
1191 double z = BitCast<double>(BitCast<uint64>(mF64[2]) & ~cDoubleToFloatMantissaLoss);
1192
1193 return DVec3(x, y, z);
1194#endif
1195}
1196
1198{
1199 // Float has 23 bit mantissa, double 52 bit mantissa => we lose 29 bits when converting from double to float
1200 constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
1201
1202#if defined(JPH_USE_AVX512)
1203 __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
1204 __mmask8 is_zero = _mm256_testn_epi64_mask(_mm256_castpd_si256(mValue), mantissa_loss);
1205 __m256d value_or_mantissa_loss = _mm256_or_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
1206 return _mm256_mask_blend_pd(is_zero, value_or_mantissa_loss, mValue);
1207#elif defined(JPH_USE_AVX)
1208 __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
1209 __m256d value_and_mantissa_loss = _mm256_and_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
1210 __m256d is_zero = _mm256_cmp_pd(value_and_mantissa_loss, _mm256_setzero_pd(), _CMP_EQ_OQ);
1211 __m256d value_or_mantissa_loss = _mm256_or_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
1212 return _mm256_blendv_pd(value_or_mantissa_loss, mValue, is_zero);
1213#elif defined(JPH_USE_SSE4_1)
1214 __m128i mantissa_loss = _mm_set1_epi64x(cDoubleToFloatMantissaLoss);
1215 __m128d zero = _mm_setzero_pd();
1216 __m128d value_and_mantissa_loss_low = _mm_and_pd(mValue.mLow, _mm_castsi128_pd(mantissa_loss));
1217 __m128d is_zero_low = _mm_cmpeq_pd(value_and_mantissa_loss_low, zero);
1218 __m128d value_or_mantissa_loss_low = _mm_or_pd(mValue.mLow, _mm_castsi128_pd(mantissa_loss));
1219 __m128d value_and_mantissa_loss_high = _mm_and_pd(mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
1220 __m128d is_zero_high = _mm_cmpeq_pd(value_and_mantissa_loss_high, zero);
1221 __m128d value_or_mantissa_loss_high = _mm_or_pd(mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
1222 return DVec3({ _mm_blendv_pd(value_or_mantissa_loss_low, mValue.mLow, is_zero_low), _mm_blendv_pd(value_or_mantissa_loss_high, mValue.mHigh, is_zero_high) });
1223#elif defined(JPH_USE_NEON)
1224 uint64x2_t mantissa_loss = vdupq_n_u64(cDoubleToFloatMantissaLoss);
1225 float64x2_t zero = vdupq_n_f64(0.0);
1226 float64x2_t value_and_mantissa_loss_low = vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[0]), mantissa_loss));
1227 uint64x2_t is_zero_low = vceqq_f64(value_and_mantissa_loss_low, zero);
1228 float64x2_t value_or_mantissa_loss_low = vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(mValue.val[0]), mantissa_loss));
1229 float64x2_t value_and_mantissa_loss_high = vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[1]), mantissa_loss));
1230 float64x2_t value_low = vbslq_f64(is_zero_low, mValue.val[0], value_or_mantissa_loss_low);
1231 uint64x2_t is_zero_high = vceqq_f64(value_and_mantissa_loss_high, zero);
1232 float64x2_t value_or_mantissa_loss_high = vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(mValue.val[1]), mantissa_loss));
1233 float64x2_t value_high = vbslq_f64(is_zero_high, mValue.val[1], value_or_mantissa_loss_high);
1234 return DVec3({ value_low, value_high });
1235#elif defined(JPH_USE_RVV)
1236 const vfloat64m2_t dvec = __riscv_vle64_v_f64m2(mF64, 3);
1237 const vuint64m2_t dvec_u64 = __riscv_vreinterpret_v_f64m2_u64m2(dvec);
1238 const vuint64m2_t and_loss = __riscv_vand_vx_u64m2(dvec_u64, cDoubleToFloatMantissaLoss, 3);
1239 const vuint64m2_t or_loss = __riscv_vor_vx_u64m2(dvec_u64, cDoubleToFloatMantissaLoss, 3);
1240 const vbool32_t is_zero = __riscv_vmseq_vx_u64m2_b32(and_loss, 0x0, 3);
1241 const vuint64m2_t select = __riscv_vmerge_vvm_u64m2(or_loss, dvec_u64, is_zero, 3);
1242 const vfloat64m2_t select_f64 = __riscv_vreinterpret_v_u64m2_f64m2(select);
1243
1244 DVec3 res;
1245 __riscv_vse64_v_f64m2(res.mF64, select_f64, 3);
1246 return res;
1247#else
1248 uint64 ux = BitCast<uint64>(mF64[0]);
1249 uint64 uy = BitCast<uint64>(mF64[1]);
1250 uint64 uz = BitCast<uint64>(mF64[2]);
1251
1252 double x = BitCast<double>((ux & cDoubleToFloatMantissaLoss) == 0? ux : (ux | cDoubleToFloatMantissaLoss));
1253 double y = BitCast<double>((uy & cDoubleToFloatMantissaLoss) == 0? uy : (uy | cDoubleToFloatMantissaLoss));
1254 double z = BitCast<double>((uz & cDoubleToFloatMantissaLoss) == 0? uz : (uz | cDoubleToFloatMantissaLoss));
1255
1256 return DVec3(x, y, z);
1257#endif
1258}
1259
1261{
1262 DVec3 to_zero = PrepareRoundToZero();
1263 DVec3 to_inf = PrepareRoundToInf();
1264 return Vec3(DVec3::sSelect(to_zero, to_inf, DVec3::sLess(*this, DVec3::sZero())));
1265}
1266
1268{
1269 DVec3 to_zero = PrepareRoundToZero();
1270 DVec3 to_inf = PrepareRoundToInf();
1271 return Vec3(DVec3::sSelect(to_inf, to_zero, DVec3::sLess(*this, DVec3::sZero())));
1272}
1273
std::uint64_t uint64
Definition Core.h:510
#define JPH_NAMESPACE_END
Definition Core.h:428
std::uint32_t uint32
Definition Core.h:508
#define JPH_NAMESPACE_BEGIN
Definition Core.h:422
DVec3 operator*(double inV1, DVec3Arg inV2)
Definition DVec3.inl:623
#define xy
Definition HLSLToCPP.h:511
#define yx
Definition HLSLToCPP.h:512
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:223
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
JPH_INLINE To BitCast(const From &inValue)
Definition Math.h:192
Definition DVec3.h:14
static JPH_INLINE DVec3 sLess(DVec3Arg inV1, DVec3Arg inV2)
Less than (component wise)
Definition DVec3.inl:323
double mF64[4]
Definition DVec3.h:283
static JPH_INLINE DVec3 sMax(DVec3Arg inV1, DVec3Arg inV2)
Return the maximum of each of the components.
Definition DVec3.inl:272
JPH_INLINE bool TestAnyTrue() const
Test if any of the components are true (true is when highest bit of component is set)
Definition DVec3.inl:558
JPH_INLINE Vec3 ToVec3RoundDown() const
Convert to float vector 3 rounding down.
Definition DVec3.inl:1260
static JPH_INLINE DVec3 sClamp(DVec3Arg inV, DVec3Arg inMin, DVec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition DVec3.inl:294
static JPH_INLINE DVec3 sMin(DVec3Arg inV1, DVec3Arg inV2)
Return the minimum value of each of the components.
Definition DVec3.inl:250
JPH_INLINE int GetTrues() const
Store if X is true in bit 0, Y in bit 1, Z in bit 2 and W in bit 3 (true is when highest bit of compo...
Definition DVec3.inl:547
static JPH_INLINE DVec3 sAnd(DVec3Arg inV1, DVec3Arg inV2)
Logical and (component wise)
Definition DVec3.inl:524
JPH_INLINE DVec3 & operator*=(double inV2)
Multiply vector with double.
Definition DVec3.inl:664
JPH_INLINE DVec3 Abs() const
Return the absolute value of each of the components.
Definition DVec3.inl:983
static JPH_INLINE DVec3 sFusedMultiplyAdd(DVec3Arg inMul1, DVec3Arg inMul2, DVec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition DVec3.inl:419
static JPH_INLINE Type sFixW(TypeArg inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Definition DVec3.inl:104
JPH_INLINE DVec3 Sqrt() const
Component wise square root.
Definition DVec3.inl:1082
JPH_INLINE DVec3 GetSign() const
Get vector that contains the sign of each element (returns 1 if positive, -1 if negative)
Definition DVec3.inl:1134
Type mValue
Definition DVec3.h:282
static JPH_INLINE DVec3 sXor(DVec3Arg inV1, DVec3Arg inV2)
Logical xor (component wise)
Definition DVec3.inl:501
static JPH_INLINE DVec3 sOne()
Vector with all ones.
Definition DVec3.inl:178
static JPH_INLINE DVec3 sGreaterOrEqual(DVec3Arg inV1, DVec3Arg inV2)
Greater than or equal (component wise)
Definition DVec3.inl:395
JPH_INLINE DVec3 operator+(Vec3Arg inV2) const
Add two vectors (component wise)
Definition DVec3.inl:740
JPH_INLINE bool IsClose(DVec3Arg inV2, double inMaxDistSq=1.0e-24) const
Test if two vectors are close.
Definition DVec3.inl:573
JPH_INLINE bool IsNormalized(double inTolerance=1.0e-12) const
Test if vector is normalized.
Definition DVec3.inl:1111
static JPH_INLINE DVec3 sSelect(DVec3Arg inNotSet, DVec3Arg inSet, DVec3Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition DVec3.inl:442
const Type & TypeArg
Definition DVec3.h:30
static JPH_INLINE DVec3 sNaN()
Vector with all NaN's.
Definition DVec3.inl:183
friend JPH_INLINE DVec3 operator*(double inV1, DVec3Arg inV2)
Multiply vector with double.
Definition DVec3.inl:623
static JPH_INLINE DVec3 sGreater(DVec3Arg inV1, DVec3Arg inV2)
Greater than (component wise)
Definition DVec3.inl:371
JPH_INLINE void StoreDouble3(Double3 *outV) const
Store 3 doubles to memory.
Definition DVec3.inl:208
static JPH_INLINE DVec3 sOr(DVec3Arg inV1, DVec3Arg inV2)
Logical or (component wise)
Definition DVec3.inl:478
static JPH_INLINE DVec3 sZero()
Vector with all zeros.
Definition DVec3.inl:138
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition DVec3.inl:563
JPH_INLINE double Length() const
Length of vector.
Definition DVec3.inl:1101
JPH_INLINE DVec3 operator-() const
Negate.
Definition DVec3.inl:832
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition DVec3.inl:1116
JPH_INLINE Vec3 ToVec3RoundUp() const
Convert to float vector 3 rounding up.
Definition DVec3.inl:1267
static const double cTrue
Representations of true and false for boolean operations.
Definition DVec3.h:277
DVec3()=default
Constructor.
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
Definition DVec3.inl:95
JPH_INLINE double LengthSq() const
Squared length of vector.
Definition DVec3.inl:1077
JPH_INLINE DVec3 Normalized() const
Normalize vector.
Definition DVec3.inl:1106
JPH_INLINE DVec3 operator/(double inV2) const
Divide vector by double.
Definition DVec3.inl:643
JPH_INLINE double Dot(DVec3Arg inV2) const
Dot product.
Definition DVec3.inl:1041
static JPH_INLINE DVec3 sReplicate(double inV)
Replicate inV across all components.
Definition DVec3.inl:158
static JPH_INLINE DVec3 sLessOrEqual(DVec3Arg inV1, DVec3Arg inV2)
Less than or equal (component wise)
Definition DVec3.inl:347
JPH_INLINE DVec3 PrepareRoundToInf() const
Prepare to convert to float vector 3 rounding towards positive/negative inf (returns DVec3 that can b...
Definition DVec3.inl:1197
JPH_INLINE DVec3 & operator+=(Vec3Arg inV2)
Add two vectors (component wise)
Definition DVec3.inl:781
static JPH_INLINE DVec3 sLoadDouble3Unsafe(const Double3 &inV)
Load 3 doubles from memory (reads 64 bits extra which it doesn't use)
Definition DVec3.inl:188
JPH_INLINE DVec3 & operator/=(double inV2)
Divide vector by double.
Definition DVec3.inl:714
JPH_INLINE DVec3 Cross(DVec3Arg inV2) const
Cross product.
Definition DVec3.inl:1010
JPH_INLINE DVec3 & operator-=(Vec3Arg inV2)
Subtract two vectors (component wise)
Definition DVec3.inl:911
JPH_INLINE DVec3 PrepareRoundToZero() const
Prepare to convert to float vector 3 rounding towards zero (returns DVec3 that can be converted to a ...
Definition DVec3.inl:1165
JPH_INLINE DVec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition DVec3.inl:1005
static JPH_INLINE DVec3 sEquals(DVec3Arg inV1, DVec3Arg inV2)
Equals (component wise)
Definition DVec3.inl:299
struct { double mData[4];} Type
Definition DVec3.h:29
JPH_INLINE bool IsNearZero(double inMaxDistSq=1.0e-24) const
Test if vector is near zero.
Definition DVec3.inl:578
JPH_INLINE bool operator==(DVec3Arg inV2) const
Comparison.
Definition DVec3.inl:568
static const double cFalse
Definition DVec3.h:278
Class that holds 3 doubles. Used as a storage class. Convert to DVec3 for calculations.
Definition Double3.h:13
double z
Definition Double3.h:40
double y
Definition Double3.h:39
double x
Definition Double3.h:38
Definition Vec3.h:17
Type mValue
Definition Vec3.h:299
float mF32[4]
Definition Vec3.h:300
Definition Vec4.h:14