Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Vec3.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#include <Jolt/Math/Vec4.h>
6#include <Jolt/Math/UVec4.h>
8
10#include <random>
12
13// Create a std::hash/JPH::Hash for Vec3
14JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
15
17
18void Vec3::CheckW() const
19{
20#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
21 // Avoid asserts when both components are NaN
22 JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
23#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
24}
25
26JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
27{
28#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
29 #if defined(JPH_USE_SSE)
30 return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
31 #elif defined(JPH_USE_NEON)
32 return JPH_NEON_SHUFFLE_F32x4(inValue, inValue, 0, 1, 2, 2);
33 #elif defined(JPH_USE_RVV)
34 Type value;
35 const vfloat32m1_t v = __riscv_vle32_v_f32m1(inValue.mData, 3);
36 __riscv_vse32_v_f32m1(value.mData, v, 3);
37 value.mData[3] = value.mData[2];
38 return value;
39 #else
40 Type value;
41 value.mData[0] = inValue.mData[0];
42 value.mData[1] = inValue.mData[1];
43 value.mData[2] = inValue.mData[2];
44 value.mData[3] = inValue.mData[2];
45 return value;
46 #endif
47#else
48 return inValue;
49#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
50}
51
53 mValue(sFixW(inRHS.mValue))
54{
55}
56
57Vec3::Vec3(const Float3 &inV)
58{
59#if defined(JPH_USE_SSE)
60 Type x = _mm_load_ss(&inV.x);
61 Type y = _mm_load_ss(&inV.y);
62 Type z = _mm_load_ss(&inV.z);
63 Type xy = _mm_unpacklo_ps(x, y);
64 mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
65#elif defined(JPH_USE_NEON)
66 float32x2_t xy = vld1_f32(&inV.x);
67 float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
68 mValue = vcombine_f32(xy, zz);
69#elif defined(JPH_USE_RVV)
70 const vfloat32m1_t v = __riscv_vle32_v_f32m1(&inV.x, 3);
71 __riscv_vse32_v_f32m1(mF32, v, 3);
72 mF32[3] = inV.z;
73#else
74 mF32[0] = inV.x;
75 mF32[1] = inV.y;
76 mF32[2] = inV.z;
77 mF32[3] = inV.z; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables
78#endif
79}
80
81Vec3::Vec3(float inX, float inY, float inZ)
82{
83#if defined(JPH_USE_SSE)
84 mValue = _mm_set_ps(inZ, inZ, inY, inX);
85#elif defined(JPH_USE_NEON)
86 uint32x2_t xy = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inX)) | (static_cast<uint64>(BitCast<uint32>(inY)) << 32));
87 uint32x2_t zz = vreinterpret_u32_f32(vdup_n_f32(inZ));
88 mValue = vreinterpretq_f32_u32(vcombine_u32(xy, zz));
89#elif defined(JPH_USE_RVV)
90 const float aggregated[4] = { inX, inY, inZ, inZ };
91 const vfloat32m1_t v = __riscv_vle32_v_f32m1(aggregated, 4);
92 __riscv_vse32_v_f32m1(mF32, v, 4);
93#else
94 mF32[0] = inX;
95 mF32[1] = inY;
96 mF32[2] = inZ;
97 mF32[3] = inZ; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables
98#endif
99}
100
101template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
103{
104 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
105 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
106 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
107
108#if defined(JPH_USE_SSE)
109 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
110#elif defined(JPH_USE_NEON)
111 return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
112#elif defined(JPH_USE_RVV)
113 Vec3 v;
114 const vfloat32m1_t data = __riscv_vle32_v_f32m1(mF32, 4);
115 const uint32 stored_indices[4] = { SwizzleX, SwizzleY, SwizzleZ, SwizzleZ };
116 const vuint32m1_t index = __riscv_vle32_v_u32m1(stored_indices, 4);
117 const vfloat32m1_t swizzled = __riscv_vrgather_vv_f32m1(data, index, 4);
118 __riscv_vse32_v_f32m1(v.mF32, swizzled, 4);
119 return v;
120#else
121 return Vec3(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ]);
122#endif
123}
124
126{
127#if defined(JPH_USE_SSE)
128 return _mm_setzero_ps();
129#elif defined(JPH_USE_NEON)
130 return vdupq_n_f32(0);
131#elif defined(JPH_USE_RVV)
132 Vec3 v;
133 const vfloat32m1_t zero_vec = __riscv_vfmv_v_f_f32m1(0.0f, 3);
134 __riscv_vse32_v_f32m1(v.mF32, zero_vec, 3);
135 return v;
136#else
137 return Vec3(0, 0, 0);
138#endif
139}
140
142{
143#if defined(JPH_USE_SSE)
144 return _mm_set1_ps(inV);
145#elif defined(JPH_USE_NEON)
146 return vdupq_n_f32(inV);
147#elif defined(JPH_USE_RVV)
148 Vec3 vec;
149 const vfloat32m1_t v = __riscv_vfmv_v_f_f32m1(inV, 3);
150 __riscv_vse32_v_f32m1(vec.mF32, v, 3);
151 return vec;
152#else
153 return Vec3(inV, inV, inV);
154#endif
155}
156
158{
159 return sReplicate(1.0f);
160}
161
163{
164 return sReplicate(numeric_limits<float>::quiet_NaN());
165}
166
168{
169#if defined(JPH_USE_SSE)
170 Type v = _mm_loadu_ps(&inV.x);
171#elif defined(JPH_USE_NEON)
172 Type v = vld1q_f32(&inV.x);
173#elif defined(JPH_USE_RVV)
174 Type v;
175 const vfloat32m1_t rvv = __riscv_vle32_v_f32m1(&inV.x, 3);
176 __riscv_vse32_v_f32m1(v.mData, rvv, 3);
177#else
178 Type v = { inV.x, inV.y, inV.z };
179#endif
180 return sFixW(v);
181}
182
184{
185#if defined(JPH_USE_SSE)
186 return _mm_min_ps(inV1.mValue, inV2.mValue);
187#elif defined(JPH_USE_NEON)
188 return vminq_f32(inV1.mValue, inV2.mValue);
189#elif defined(JPH_USE_RVV)
190 Vec3 res;
191 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.mF32, 3);
192 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
193 const vfloat32m1_t min = __riscv_vfmin_vv_f32m1(v1, v2, 3);
194 __riscv_vse32_v_f32m1(res.mF32, min, 3);
195 return res;
196#else
197 return Vec3(min(inV1.mF32[0], inV2.mF32[0]),
198 min(inV1.mF32[1], inV2.mF32[1]),
199 min(inV1.mF32[2], inV2.mF32[2]));
200#endif
201}
202
204{
205#if defined(JPH_USE_SSE)
206 return _mm_max_ps(inV1.mValue, inV2.mValue);
207#elif defined(JPH_USE_NEON)
208 return vmaxq_f32(inV1.mValue, inV2.mValue);
209#elif defined(JPH_USE_RVV)
210 Vec3 res;
211 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.mF32, 3);
212 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
213 const vfloat32m1_t max = __riscv_vfmax_vv_f32m1(v1, v2, 3);
214 __riscv_vse32_v_f32m1(res.mF32, max, 3);
215 return res;
216#else
217 return Vec3(max(inV1.mF32[0], inV2.mF32[0]),
218 max(inV1.mF32[1], inV2.mF32[1]),
219 max(inV1.mF32[2], inV2.mF32[2]));
220#endif
221}
222
224{
225 return sMax(sMin(inV, inMax), inMin);
226}
227
229{
230#if defined(JPH_USE_SSE)
231 return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
232#elif defined(JPH_USE_NEON)
233 return vceqq_f32(inV1.mValue, inV2.mValue);
234#elif defined(JPH_USE_RVV)
235 UVec4 res;
236 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.mF32, 3);
237 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
238 const vbool32_t mask = __riscv_vmfeq_vv_f32m1_b32(v1, v2, 3);
239 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 3);
240 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 3);
241 __riscv_vse32_v_u32m1(res.mU32, merged, 3);
242 res.mU32[3] = res.mU32[2];
243 return res;
244#else
245 uint32 z = inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0;
246 return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
247 inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
248 z,
249 z);
250#endif
251}
252
254{
255#if defined(JPH_USE_SSE)
256 return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
257#elif defined(JPH_USE_NEON)
258 return vcltq_f32(inV1.mValue, inV2.mValue);
259#elif defined(JPH_USE_RVV)
260 UVec4 res;
261 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.mF32, 3);
262 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
263 const vbool32_t mask = __riscv_vmflt_vv_f32m1_b32(v1, v2, 3);
264 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 3);
265 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 3);
266 __riscv_vse32_v_u32m1(res.mU32, merged, 3);
267 res.mU32[3] = res.mU32[2];
268 return res;
269#else
270 uint32 z = inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0;
271 return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
272 inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
273 z,
274 z);
275#endif
276}
277
279{
280#if defined(JPH_USE_SSE)
281 return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
282#elif defined(JPH_USE_NEON)
283 return vcleq_f32(inV1.mValue, inV2.mValue);
284#elif defined(JPH_USE_RVV)
285 UVec4 res;
286 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.mF32, 3);
287 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
288 const vbool32_t mask = __riscv_vmfle_vv_f32m1_b32(v1, v2, 3);
289 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 3);
290 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 3);
291 __riscv_vse32_v_u32m1(res.mU32, merged, 3);
292 res.mU32[3] = res.mU32[2];
293 return res;
294#else
295 uint32 z = inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0;
296 return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
297 inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
298 z,
299 z);
300#endif
301}
302
304{
305#if defined(JPH_USE_SSE)
306 return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
307#elif defined(JPH_USE_NEON)
308 return vcgtq_f32(inV1.mValue, inV2.mValue);
309#elif defined(JPH_USE_RVV)
310 UVec4 res;
311 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.mF32, 3);
312 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
313 const vbool32_t mask = __riscv_vmfgt_vv_f32m1_b32(v1, v2, 3);
314 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 3);
315 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 3);
316 __riscv_vse32_v_u32m1(res.mU32, merged, 3);
317 res.mU32[3] = res.mU32[2];
318 return res;
319#else
320 uint32 z = inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0;
321 return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
322 inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
323 z,
324 z);
325#endif
326}
327
329{
330#if defined(JPH_USE_SSE)
331 return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
332#elif defined(JPH_USE_NEON)
333 return vcgeq_f32(inV1.mValue, inV2.mValue);
334#elif defined(JPH_USE_RVV)
335 UVec4 res;
336 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.mF32, 3);
337 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
338 const vbool32_t mask = __riscv_vmfge_vv_f32m1_b32(v1, v2, 3);
339 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 3);
340 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 3);
341 __riscv_vse32_v_u32m1(res.mU32, merged, 3);
342 res.mU32[3] = res.mU32[2];
343 return res;
344#else
345 uint32 z = inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0;
346 return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
347 inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
348 z,
349 z);
350#endif
351}
352
354{
355#ifdef JPH_USE_FMADD
356 #ifdef JPH_USE_SSE
357 return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
358 #elif defined(JPH_USE_NEON)
359 return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
360 #elif defined(JPH_USE_RVV)
361 Vec3 res;
362 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inMul1.mF32, 3);
363 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inMul2.mF32, 3);
364 const vfloat32m1_t rvv_add = __riscv_vle32_v_f32m1(inAdd.mF32, 3);
365 const vfloat32m1_t fmadd = __riscv_vfmacc_vv_f32m1(rvv_add, v1, v2, 3);
366 __riscv_vse32_v_f32m1(res.mF32, fmadd, 3);
367 return res;
368 #else
369 return inMul1 * inMul2 + inAdd;
370 #endif
371#else
372 return inMul1 * inMul2 + inAdd;
373#endif
374}
375
376Vec3 Vec3::sSelect(Vec3Arg inNotSet, Vec3Arg inSet, UVec4Arg inControl)
377{
378#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
379 Type v = _mm_blendv_ps(inNotSet.mValue, inSet.mValue, _mm_castsi128_ps(inControl.mValue));
380 return sFixW(v);
381#elif defined(JPH_USE_SSE)
382 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.mValue, 31));
383 Type v = _mm_or_ps(_mm_and_ps(is_set, inSet.mValue), _mm_andnot_ps(is_set, inNotSet.mValue));
384 return sFixW(v);
385#elif defined(JPH_USE_NEON)
386 Type v = vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inSet.mValue, inNotSet.mValue);
387 return sFixW(v);
388#elif defined(JPH_USE_RVV)
389 Vec3 masked;
390 const vuint32m1_t control = __riscv_vle32_v_u32m1(inControl.mU32, 3);
391 const vfloat32m1_t not_set = __riscv_vle32_v_f32m1(inNotSet.mF32, 3);
392 const vfloat32m1_t set = __riscv_vle32_v_f32m1(inSet.mF32, 3);
393
394 // Generate RVV bool mask from UVec4
395 const vuint32m1_t r = __riscv_vand_vx_u32m1(control, 0x80000000u, 3);
396 const vbool32_t rvv_mask = __riscv_vmsne_vx_u32m1_b32(r, 0x0, 3);
397 const vfloat32m1_t merged = __riscv_vmerge_vvm_f32m1(not_set, set, rvv_mask, 3);
398 __riscv_vse32_v_f32m1(masked.mF32, merged, 3);
399 return masked;
400#else
401 Vec3 result;
402 for (int i = 0; i < 3; i++)
403 result.mF32[i] = (inControl.mU32[i] & 0x80000000u) ? inSet.mF32[i] : inNotSet.mF32[i];
404#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
405 result.mF32[3] = result.mF32[2];
406#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
407 return result;
408#endif
409}
410
412{
413#if defined(JPH_USE_SSE)
414 return _mm_or_ps(inV1.mValue, inV2.mValue);
415#elif defined(JPH_USE_NEON)
416 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
417#elif defined(JPH_USE_RVV)
418 Vec3 or_result;
419 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(reinterpret_cast<const uint32 *>(inV1.mF32), 3);
420 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(reinterpret_cast<const uint32 *>(inV2.mF32), 3);
421 const vuint32m1_t res = __riscv_vor_vv_u32m1(v1, v2, 3);
422 __riscv_vse32_v_u32m1(reinterpret_cast<uint32 *>(or_result.mF32), res, 3);
423 return or_result;
424#else
426#endif
427}
428
430{
431#if defined(JPH_USE_SSE)
432 return _mm_xor_ps(inV1.mValue, inV2.mValue);
433#elif defined(JPH_USE_NEON)
434 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
435#elif defined(JPH_USE_RVV)
436 Vec3 xor_result;
437 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(reinterpret_cast<const uint32 *>(inV1.mF32), 3);
438 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(reinterpret_cast<const uint32 *>(inV2.mF32), 3);
439 const vuint32m1_t res = __riscv_vxor_vv_u32m1(v1, v2, 3);
440 __riscv_vse32_v_u32m1(reinterpret_cast<uint32 *>(xor_result.mF32), res, 3);
441 return xor_result;
442#else
444#endif
445}
446
448{
449#if defined(JPH_USE_SSE)
450 return _mm_and_ps(inV1.mValue, inV2.mValue);
451#elif defined(JPH_USE_NEON)
452 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
453#elif defined(JPH_USE_RVV)
454 Vec3 and_result;
455 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(reinterpret_cast<const uint32 *>(inV1.mF32), 3);
456 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(reinterpret_cast<const uint32 *>(inV2.mF32), 3);
457 const vuint32m1_t res = __riscv_vand_vv_u32m1(v1, v2, 3);
458 __riscv_vse32_v_u32m1(reinterpret_cast<uint32 *>(and_result.mF32), res, 3);
459 return and_result;
460#else
462#endif
463}
464
465Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
466{
467 Vec4 s, c;
468 Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
469 return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
470}
471
472template <class Random>
473Vec3 Vec3::sRandom(Random &inRandom)
474{
475 std::uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
476 float theta = JPH_PI * zero_to_one(inRandom);
477 float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
478 return sUnitSpherical(theta, phi);
479}
480
482{
483 return sEquals(*this, inV2).TestAllXYZTrue();
484}
485
486bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
487{
488 return (inV2 - *this).LengthSq() <= inMaxDistSq;
489}
490
491bool Vec3::IsNearZero(float inMaxDistSq) const
492{
493 return LengthSq() <= inMaxDistSq;
494}
495
497{
498#if defined(JPH_USE_SSE)
499 return _mm_mul_ps(mValue, inV2.mValue);
500#elif defined(JPH_USE_NEON)
501 return vmulq_f32(mValue, inV2.mValue);
502#elif defined(JPH_USE_RVV)
503 Vec3 res;
504 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
505 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
506 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(v1, v2, 3);
507 __riscv_vse32_v_f32m1(res.mF32, mul, 3);
508 return res;
509#else
510 return Vec3(mF32[0] * inV2.mF32[0], mF32[1] * inV2.mF32[1], mF32[2] * inV2.mF32[2]);
511#endif
512}
513
514Vec3 Vec3::operator * (float inV2) const
515{
516#if defined(JPH_USE_SSE)
517 return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
518#elif defined(JPH_USE_NEON)
519 return vmulq_n_f32(mValue, inV2);
520#elif defined(JPH_USE_RVV)
521 Vec3 res;
522 const vfloat32m1_t src = __riscv_vle32_v_f32m1(mF32, 3);
523 const vfloat32m1_t mul = __riscv_vfmul_vf_f32m1(src, inV2, 3);
524 __riscv_vse32_v_f32m1(res.mF32, mul, 3);
525 return res;
526#else
527 return Vec3(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2);
528#endif
529}
530
531Vec3 operator * (float inV1, Vec3Arg inV2)
532{
533#if defined(JPH_USE_SSE)
534 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
535#elif defined(JPH_USE_NEON)
536 return vmulq_n_f32(inV2.mValue, inV1);
537#elif defined(JPH_USE_RVV)
538 Vec3 res;
539 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
540 const vfloat32m1_t mul = __riscv_vfmul_vf_f32m1(v1, inV1, 3);
541 __riscv_vse32_v_f32m1(res.mF32, mul, 3);
542 return res;
543#else
544 return Vec3(inV1 * inV2.mF32[0], inV1 * inV2.mF32[1], inV1 * inV2.mF32[2]);
545#endif
546}
547
548Vec3 Vec3::operator / (float inV2) const
549{
550#if defined(JPH_USE_SSE)
551 return _mm_div_ps(mValue, _mm_set1_ps(inV2));
552#elif defined(JPH_USE_NEON)
553 return vdivq_f32(mValue, vdupq_n_f32(inV2));
554#elif defined(JPH_USE_RVV)
555 Vec3 res;
556 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
557 const vfloat32m1_t div = __riscv_vfdiv_vf_f32m1(v1, inV2, 3);
558 __riscv_vse32_v_f32m1(res.mF32, div, 3);
559 return res;
560#else
561 return Vec3(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2);
562#endif
563}
564
566{
567#if defined(JPH_USE_SSE)
568 mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
569#elif defined(JPH_USE_NEON)
570 mValue = vmulq_n_f32(mValue, inV2);
571#elif defined(JPH_USE_RVV)
572 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
573 const vfloat32m1_t res = __riscv_vfmul_vf_f32m1(v1, inV2, 3);
574 __riscv_vse32_v_f32m1(mF32, res, 3);
575#else
576 for (int i = 0; i < 3; ++i)
577 mF32[i] *= inV2;
578 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
579 mF32[3] = mF32[2];
580 #endif
581#endif
582 return *this;
583}
584
586{
587#if defined(JPH_USE_SSE)
588 mValue = _mm_mul_ps(mValue, inV2.mValue);
589#elif defined(JPH_USE_NEON)
590 mValue = vmulq_f32(mValue, inV2.mValue);
591#elif defined(JPH_USE_RVV)
592 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
593 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
594 const vfloat32m1_t rvv_res = __riscv_vfmul_vv_f32m1(v1, v2, 3);
595 __riscv_vse32_v_f32m1(mF32, rvv_res, 3);
596#else
597 for (int i = 0; i < 3; ++i)
598 mF32[i] *= inV2.mF32[i];
599 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
600 mF32[3] = mF32[2];
601 #endif
602#endif
603 return *this;
604}
605
607{
608#if defined(JPH_USE_SSE)
609 mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
610#elif defined(JPH_USE_NEON)
611 mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
612#elif defined(JPH_USE_RVV)
613 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 3);
614 const vfloat32m1_t res = __riscv_vfdiv_vf_f32m1(v, inV2, 3);
615 __riscv_vse32_v_f32m1(mF32, res, 3);
616#else
617 for (int i = 0; i < 3; ++i)
618 mF32[i] /= inV2;
619 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
620 mF32[3] = mF32[2];
621 #endif
622#endif
623 return *this;
624}
625
627{
628#if defined(JPH_USE_SSE)
629 return _mm_add_ps(mValue, inV2.mValue);
630#elif defined(JPH_USE_NEON)
631 return vaddq_f32(mValue, inV2.mValue);
632#elif defined(JPH_USE_RVV)
633 Vec3 res;
634 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
635 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
636 const vfloat32m1_t rvv_add = __riscv_vfadd_vv_f32m1(v1, v2, 3);
637 __riscv_vse32_v_f32m1(res.mF32, rvv_add, 3);
638 return res;
639#else
640 return Vec3(mF32[0] + inV2.mF32[0], mF32[1] + inV2.mF32[1], mF32[2] + inV2.mF32[2]);
641#endif
642}
643
645{
646#if defined(JPH_USE_SSE)
647 mValue = _mm_add_ps(mValue, inV2.mValue);
648#elif defined(JPH_USE_NEON)
649 mValue = vaddq_f32(mValue, inV2.mValue);
650#elif defined(JPH_USE_RVV)
651 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
652 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
653 const vfloat32m1_t rvv_add = __riscv_vfadd_vv_f32m1(v1, v2, 3);
654 __riscv_vse32_v_f32m1(mF32, rvv_add, 3);
655#else
656 for (int i = 0; i < 3; ++i)
657 mF32[i] += inV2.mF32[i];
658 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
659 mF32[3] = mF32[2];
660 #endif
661#endif
662 return *this;
663}
664
666{
667#if defined(JPH_USE_SSE)
668 return _mm_sub_ps(_mm_setzero_ps(), mValue);
669#elif defined(JPH_USE_NEON)
670 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
671 return vsubq_f32(vdupq_n_f32(0), mValue);
672 #else
673 return vnegq_f32(mValue);
674 #endif
675#elif defined(JPH_USE_RVV)
676 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
677 Vec3 res;
678 const vfloat32m1_t rvv_zero = __riscv_vfmv_v_f_f32m1(0.0f, 3);
679 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 3);
680 const vfloat32m1_t rvv_neg = __riscv_vfsub_vv_f32m1(rvv_zero, v, 3);
681 __riscv_vse32_v_f32m1(res.mF32, rvv_neg, 3);
682 return res;
683 #else
684 Vec3 res;
685 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 3);
686 const vfloat32m1_t rvv_neg = __riscv_vfsgnjn_vv_f32m1(v, v, 3);
687 __riscv_vse32_v_f32m1(res.mF32, rvv_neg, 3);
688 return res;
689 #endif
690#else
691 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
692 return Vec3(0.0f - mF32[0], 0.0f - mF32[1], 0.0f - mF32[2]);
693 #else
694 return Vec3(-mF32[0], -mF32[1], -mF32[2]);
695 #endif
696#endif
697}
698
700{
701#if defined(JPH_USE_SSE)
702 return _mm_sub_ps(mValue, inV2.mValue);
703#elif defined(JPH_USE_NEON)
704 return vsubq_f32(mValue, inV2.mValue);
705#elif defined(JPH_USE_RVV)
706 Vec3 res;
707 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
708 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
709 const vfloat32m1_t rvv_sub = __riscv_vfsub_vv_f32m1(v1, v2, 3);
710 __riscv_vse32_v_f32m1(res.mF32, rvv_sub, 3);
711 return res;
712#else
713 return Vec3(mF32[0] - inV2.mF32[0], mF32[1] - inV2.mF32[1], mF32[2] - inV2.mF32[2]);
714#endif
715}
716
718{
719#if defined(JPH_USE_SSE)
720 mValue = _mm_sub_ps(mValue, inV2.mValue);
721#elif defined(JPH_USE_NEON)
722 mValue = vsubq_f32(mValue, inV2.mValue);
723#elif defined(JPH_USE_RVV)
724 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
725 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
726 const vfloat32m1_t rvv_sub = __riscv_vfsub_vv_f32m1(v1, v2, 3);
727 __riscv_vse32_v_f32m1(mF32, rvv_sub, 3);
728#else
729 for (int i = 0; i < 3; ++i)
730 mF32[i] -= inV2.mF32[i];
731 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
732 mF32[3] = mF32[2];
733 #endif
734#endif
735 return *this;
736}
737
739{
740 inV2.CheckW(); // Check W equals Z to avoid div by zero
741#if defined(JPH_USE_SSE)
742 return _mm_div_ps(mValue, inV2.mValue);
743#elif defined(JPH_USE_NEON)
744 return vdivq_f32(mValue, inV2.mValue);
745#elif defined(JPH_USE_RVV)
746 Vec3 res;
747 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(mF32, 3);
748 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
749 const vfloat32m1_t rvv_div = __riscv_vfdiv_vv_f32m1(v1, v2, 3);
750 __riscv_vse32_v_f32m1(res.mF32, rvv_div, 3);
751 return res;
752#else
753 return Vec3(mF32[0] / inV2.mF32[0], mF32[1] / inV2.mF32[1], mF32[2] / inV2.mF32[2]);
754#endif
755}
756
758{
759#if defined(JPH_USE_SSE)
760 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
761#elif defined(JPH_USE_NEON)
762 return vdupq_laneq_f32(mValue, 0);
763#elif defined(JPH_USE_RVV)
764 Vec4 vec;
765 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(mF32[0], 4);
766 __riscv_vse32_v_f32m1(vec.mF32, splat, 4);
767 return vec;
768#else
769 return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
770#endif
771}
772
774{
775#if defined(JPH_USE_SSE)
776 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
777#elif defined(JPH_USE_NEON)
778 return vdupq_laneq_f32(mValue, 1);
779#elif defined(JPH_USE_RVV)
780 Vec4 vec;
781 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(mF32[1], 4);
782 __riscv_vse32_v_f32m1(vec.mF32, splat, 4);
783 return vec;
784#else
785 return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
786#endif
787}
788
790{
791#if defined(JPH_USE_SSE)
792 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
793#elif defined(JPH_USE_NEON)
794 return vdupq_laneq_f32(mValue, 2);
795#elif defined(JPH_USE_RVV)
796 Vec4 vec;
797 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(mF32[2], 4);
798 __riscv_vse32_v_f32m1(vec.mF32, splat, 4);
799 return vec;
800#else
801 return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
802#endif
803}
804
806{
807 return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
808}
809
811{
812 return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
813}
814
816{
817#if defined(JPH_USE_AVX512)
818 return _mm_range_ps(mValue, mValue, 0b1000);
819#elif defined(JPH_USE_SSE)
820 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
821#elif defined(JPH_USE_NEON)
822 return vabsq_f32(mValue);
823#elif defined(JPH_USE_RVV)
824 Vec3 res;
825 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 3);
826 const vfloat32m1_t rvv_abs = __riscv_vfsgnj_vf_f32m1(v, 1.0, 3);
827 __riscv_vse32_v_f32m1(res.mF32, rvv_abs, 3);
828 return res;
829#else
830 return Vec3(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]));
831#endif
832}
833
835{
836 return sOne() / mValue;
837}
838
840{
841#ifdef JPH_USE_FMADD
842 Vec3 cd = inC * inD;
843 Vec3 err = Vec3::sFusedMultiplyAdd(-inC, inD, cd);
844 Vec3 dop = Vec3::sFusedMultiplyAdd(inA, inB, -cd);
845 return dop + err;
846#else
847 return inA * inB - inC * inD;
848#endif
849}
850
852{
853#if defined(JPH_USE_SSE)
854 Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
855 t1 = _mm_mul_ps(t1, mValue);
856 Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
857 t2 = _mm_mul_ps(t2, inV2.mValue);
858 Type t3 = _mm_sub_ps(t1, t2);
859 return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
860#elif defined(JPH_USE_NEON)
861 Type t1 = JPH_NEON_SHUFFLE_F32x4(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
862 t1 = vmulq_f32(t1, mValue);
863 Type t2 = JPH_NEON_SHUFFLE_F32x4(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
864 t2 = vmulq_f32(t2, inV2.mValue);
865 Type t3 = vsubq_f32(t1, t2);
866 return JPH_NEON_SHUFFLE_F32x4(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
867#elif defined(JPH_USE_RVV)
868 const uint32 indices[3] = { 1, 2, 0 };
869 const vuint32m1_t gather_indices = __riscv_vle32_v_u32m1(indices, 3);
870 const vfloat32m1_t v0 = __riscv_vle32_v_f32m1(mF32, 3);
871 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV2.mF32, 3);
872 vfloat32m1_t t0 = __riscv_vrgather_vv_f32m1(v1, gather_indices, 3);
873 t0 = __riscv_vfmul_vv_f32m1(t0, v0, 3);
874 vfloat32m1_t t1 = __riscv_vrgather_vv_f32m1(v0, gather_indices, 3);
875 t1 = __riscv_vfmul_vv_f32m1(t1, v1, 3);
876 const vfloat32m1_t sub = __riscv_vfsub_vv_f32m1(t0, t1, 3);
877 const vfloat32m1_t cross = __riscv_vrgather_vv_f32m1(sub, gather_indices, 3);
878
879 Vec3 cross_result;
880 __riscv_vse32_v_f32m1(cross_result.mF32, cross, 3);
881 return cross_result;
882#else
883 return Vec3(mF32[1] * inV2.mF32[2] - mF32[2] * inV2.mF32[1],
884 mF32[2] * inV2.mF32[0] - mF32[0] * inV2.mF32[2],
885 mF32[0] * inV2.mF32[1] - mF32[1] * inV2.mF32[0]);
886#endif
887}
888
893
894float Vec3::ReduceSum() const
895{
896 // Ensure that we handle -0.0f correctly when cross platform deterministic behavior is required.
897#if defined(JPH_USE_SSE4_1)
898 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
899 Type val = _mm_blend_ps(mValue, _mm_setzero_ps(), 0x8); // [x, y, z, 0]
900 Type shuf = _mm_movehdup_ps(val); // [y, y, 0, 0]
901 Type sums = _mm_add_ps(val, shuf); // [x + y, y + y, z + 0, 0]
902 shuf = _mm_movehl_ps(shuf, sums); // [z + 0, 0, 0, 0]
903 #else
904 Type shuf = _mm_movehdup_ps(mValue); // [y, y, w, w]
905 Type sums = _mm_add_ps(mValue, shuf); // [x + y, y + y, z + w, w + w]
906 shuf = _mm_movehl_ps(mValue, mValue); // [z, w, z, w]
907 #endif
908 sums = _mm_add_ps(sums, shuf); // Deterministic: [(x + y) + (z + 0), ...], non-deterministic: [(x + y) + z, ...]
909 return _mm_cvtss_f32(sums);
910#elif defined(JPH_USE_NEON)
911 Type v = vsetq_lane_f32(0, mValue, 3);
912 return vaddvq_f32(v);
913#elif defined(JPH_USE_RVV)
914 const vfloat32m1_t zeros = __riscv_vfmv_v_f_f32m1(0.0f, 3);
915 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 3);
916 const vfloat32m1_t sum = __riscv_vfredosum_vs_f32m1_f32m1(v, zeros, 3);
917 return __riscv_vfmv_f_s_f32m1_f32(sum);
918#else
919 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
920 return (mF32[0] + mF32[1]) + (mF32[2] + 0.0f);
921 #else
922 return mF32[0] + mF32[1] + mF32[2];
923 #endif
924#endif
925}
926
927float Vec3::Dot(Vec3Arg inV2) const
928{
929 return (*this * inV2).ReduceSum();
930}
931
933{
934 return Vec3::sReplicate(Dot(inV2));
935}
936
938{
939 return Vec4::sReplicate(Dot(inV2));
940}
941
942float Vec3::LengthSq() const
943{
944 return Dot(*this);
945}
946
947float Vec3::Length() const
948{
949 return JPH::Sqrt(LengthSq());
950}
951
953{
954#if defined(JPH_USE_SSE)
955 return _mm_sqrt_ps(mValue);
956#elif defined(JPH_USE_NEON)
957 return vsqrtq_f32(mValue);
958#elif defined(JPH_USE_RVV)
959 Vec3 res;
960 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 3);
961 const vfloat32m1_t rvv_sqrt = __riscv_vfsqrt_v_f32m1(v, 3);
962 __riscv_vse32_v_f32m1(res.mF32, rvv_sqrt, 3);
963 return res;
964#else
965 return Vec3(JPH::Sqrt(mF32[0]), JPH::Sqrt(mF32[1]), JPH::Sqrt(mF32[2]));
966#endif
967}
968
970{
971 return *this / Length();
972}
973
975{
976#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
977 Type mul = _mm_mul_ps(mValue, mValue);
978 Type shuf = _mm_movehdup_ps(mul);
979 Type sums = _mm_add_ps(mul, shuf);
980 shuf = _mm_movehl_ps(mul, mul);
981 sums = _mm_add_ps(sums, shuf);
982 Type len_sq = _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(0, 0, 0, 0));
983 // clang with '-ffast-math' (which you should not use!) can generate _mm_rsqrt_ps
984 // instructions which produce INFs/NaNs when they get a denormal float as input.
985 // We therefore treat denormals as zero here.
986 Type is_zero = _mm_cmple_ps(len_sq, _mm_set1_ps(FLT_MIN));
987#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
988 if (_mm_movemask_ps(is_zero) == 0xf)
989 return inZeroValue;
990 else
991 return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
992#else
993 return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
994#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
995#elif defined(JPH_USE_NEON)
996 float32x4_t mul = vmulq_f32(mValue, mValue);
997 mul = vsetq_lane_f32(0, mul, 3);
998 float32x4_t len_sq = vdupq_n_f32(vaddvq_f32(mul));
999 uint32x4_t is_zero = vcleq_f32(len_sq, vdupq_n_f32(FLT_MIN));
1000 return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, vsqrtq_f32(len_sq)));
1001#elif defined(JPH_USE_RVV)
1002 const vfloat32m1_t src = __riscv_vle32_v_f32m1(mF32, 3);
1003 const vfloat32m1_t zeros = __riscv_vfmv_v_f_f32m1(0.0f, 3);
1004 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(src, src, 3);
1005 const vfloat32m1_t sum = __riscv_vfredosum_vs_f32m1_f32m1(mul, zeros, 3);
1006 const float dot = __riscv_vfmv_f_s_f32m1_f32(sum);
1007 if (dot <= FLT_MIN)
1008 return inZeroValue;
1009
1010 const vfloat32m1_t splat = __riscv_vrgather_vx_f32m1(sum, 0, 3);
1011 const vfloat32m1_t length = __riscv_vfsqrt_v_f32m1(splat, 3);
1012
1013 Vec3 v;
1014 const vfloat32m1_t norm = __riscv_vfdiv_vv_f32m1(src, length, 3);
1015 __riscv_vse32_v_f32m1(v.mF32, norm, 3);
1016 return v;
1017#else
1018 float len_sq = LengthSq();
1019 if (len_sq <= FLT_MIN)
1020 return inZeroValue;
1021 else
1022 return *this / JPH::Sqrt(len_sq);
1023#endif
1024}
1025
1026bool Vec3::IsNormalized(float inTolerance) const
1027{
1028 return abs(LengthSq() - 1.0f) <= inTolerance;
1029}
1030
1031bool Vec3::IsNaN() const
1032{
1033#if defined(JPH_USE_AVX512)
1034 return (_mm_fpclass_ps_mask(mValue, 0b10000001) & 0x7) != 0;
1035#elif defined(JPH_USE_SSE)
1036 return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
1037#elif defined(JPH_USE_NEON)
1038 uint32x4_t mask = JPH_NEON_UINT32x4(1, 1, 1, 0);
1039 uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
1040 return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
1041#elif defined(JPH_USE_RVV)
1042 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 3);
1043 const vbool32_t mask = __riscv_vmfeq_vv_f32m1_b32(v, v, 3);
1044 const uint32 eq = __riscv_vcpop_m_b32(mask, 3);
1045 return eq != 3;
1046#else
1047 return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]);
1048#endif
1049}
1050
1051void Vec3::StoreFloat3(Float3 *outV) const
1052{
1053#if defined(JPH_USE_SSE)
1054 _mm_store_ss(&outV->x, mValue);
1056 _mm_store_ss(&outV->y, t.mValue);
1058 _mm_store_ss(&outV->z, t.mValue);
1059#elif defined(JPH_USE_NEON)
1060 float32x2_t xy = vget_low_f32(mValue);
1061 vst1_f32(&outV->x, xy);
1062 vst1q_lane_f32(&outV->z, mValue, 2);
1063#elif defined(JPH_USE_RVV)
1064 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 3);
1065 __riscv_vse32_v_f32m1(&outV->x, v, 3);
1066#else
1067 outV->x = mF32[0];
1068 outV->y = mF32[1];
1069 outV->z = mF32[2];
1070#endif
1071}
1072
1074{
1075#if defined(JPH_USE_SSE)
1076 return _mm_cvttps_epi32(mValue);
1077#elif defined(JPH_USE_NEON)
1078 return vcvtq_u32_f32(mValue);
1079#elif defined(JPH_USE_RVV)
1080 UVec4 res;
1081 const vfloat32m1_t v = __riscv_vle32_v_f32m1(mF32, 4);
1082 const vuint32m1_t cast = __riscv_vfcvt_rtz_xu_f_v_u32m1(v, 4);
1083 __riscv_vse32_v_u32m1(res.mU32, cast, 4);
1084 return res;
1085#else
1086 return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
1087#endif
1088}
1089
1091{
1092#if defined(JPH_USE_SSE)
1093 return UVec4(_mm_castps_si128(mValue));
1094#elif defined(JPH_USE_NEON)
1095 return vreinterpretq_u32_f32(mValue);
1096#else
1097 return *reinterpret_cast<const UVec4 *>(this);
1098#endif
1099}
1100
1101float Vec3::ReduceMin() const
1102{
1105 return v.GetX();
1106}
1107
1108float Vec3::ReduceMax() const
1109{
1112 return v.GetX();
1113}
1114
1116{
1117#if defined(JPH_USE_SSE)
1118 // Build both perpendicular candidates without explicit masking:
1119 // perp_x = [z, 0, -x, 0] (used when |x| > |y|)
1120 // perp_y = [0, z, -y, 0] (used when |x| <= |y|)
1121 __m128 zero = _mm_setzero_ps();
1122 __m128 neg = _mm_sub_ps(zero, mValue);
1123 __m128 perp_x = _mm_shuffle_ps(_mm_unpackhi_ps(mValue, zero), neg, _MM_SHUFFLE(0, 0, 1, 0));
1124 __m128 perp_y = _mm_shuffle_ps(_mm_unpackhi_ps(zero, mValue), neg, _MM_SHUFFLE(1, 1, 1, 0));
1125
1126 // Compare squared components instead of absolute values (saves the abs computation).
1127 __m128 sq = _mm_mul_ps(mValue, mValue);
1128 __m128 xx = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 0));
1129 __m128 yy = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(1, 1, 1, 1));
1130 __m128 zz = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(2, 2, 2, 2));
1131 __m128 x_gt_y = _mm_cmpgt_ps(xx, yy);
1132
1133 // Select perpendicular based on |x| > |y|.
1134#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
1135 __m128 result = _mm_blendv_ps(perp_y, perp_x, x_gt_y);
1136#else
1137 __m128 result = _mm_or_ps(_mm_and_ps(x_gt_y, perp_x), _mm_andnot_ps(x_gt_y, perp_y));
1138#endif
1139
1140 // Normalize. Since the result has only two nonzero components; one of x^2 / y^2 plus z^2; the squared length is max(xx, yy) + zz. All lanes of the sqrt input are identical.
1141 __m128 len = _mm_sqrt_ps(_mm_add_ps(_mm_max_ps(xx, yy), zz));
1142 return _mm_div_ps(result, len);
1143#else
1144 float x = mF32[0], y = mF32[1], z = mF32[2];
1145 float xx = x * x, yy = y * y, zz = z * z;
1146#ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
1147 Vec3 perp_x(z, 0.0f, 0.0f - x);
1148 Vec3 perp_y(0.0f, z, 0.0f - y);
1149#else
1150 Vec3 perp_x(z, 0.0f, -x);
1151 Vec3 perp_y(0.0f, z, -y);
1152#endif // JPH_CROSS_PLATFORM_DETERMINISTIC
1153 return (xx > yy ? perp_x : perp_y) / JPH::Sqrt(max(xx, yy) + zz);
1154#endif // JPH_USE_SSE
1155}
1156
1158{
1159#if defined(JPH_USE_AVX512)
1160 Type one = _mm_set1_ps(1.0f);
1161 return _mm_or_ps(_mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90100), 0), one);
1162#elif defined(JPH_USE_SSE)
1163 Type minus_one = _mm_set1_ps(-1.0f);
1164 Type one = _mm_set1_ps(1.0f);
1165 return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
1166#elif defined(JPH_USE_NEON)
1167 Type minus_one = vdupq_n_f32(-1.0f);
1168 Type one = vdupq_n_f32(1.0f);
1169 return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(mValue), vreinterpretq_u32_f32(minus_one)), vreinterpretq_u32_f32(one)));
1170#elif defined(JPH_USE_RVV)
1171 Vec3 res;
1172 const vfloat32m1_t rvv_in = __riscv_vle32_v_f32m1(mF32, 3);
1173 const vfloat32m1_t rvv_one = __riscv_vfmv_v_f_f32m1(1.0, 3);
1174 const vfloat32m1_t rvv_signs = __riscv_vfsgnj_vv_f32m1(rvv_one, rvv_in, 3);
1175 __riscv_vse32_v_f32m1(res.mF32, rvv_signs, 3);
1176 return res;
1177#else
1178 return Vec3(std::signbit(mF32[0])? -1.0f : 1.0f,
1179 std::signbit(mF32[1])? -1.0f : 1.0f,
1180 std::signbit(mF32[2])? -1.0f : 1.0f);
1181#endif
1182}
1183
1184template <int X, int Y, int Z>
1185JPH_INLINE Vec3 Vec3::FlipSign() const
1186{
1187 static_assert(X == 1 || X == -1, "X must be 1 or -1");
1188 static_assert(Y == 1 || Y == -1, "Y must be 1 or -1");
1189 static_assert(Z == 1 || Z == -1, "Z must be 1 or -1");
1190 return Vec3::sXor(*this, Vec3(X > 0? 0.0f : -0.0f, Y > 0? 0.0f : -0.0f, Z > 0? 0.0f : -0.0f));
1191}
1192
1194{
1195 constexpr float cOneOverSqrt2 = 0.70710678f;
1196 constexpr uint cNumBits = 14;
1197 constexpr uint cMask = (1 << cNumBits) - 1;
1198 constexpr uint cMaxValue = cMask - 1; // Need odd number of buckets to quantize to or else we can't encode 0
1199 constexpr float cScale = float(cMaxValue) / (2.0f * cOneOverSqrt2);
1200
1201 // Store sign bit
1202 Vec3 v = *this;
1203 uint32 max_element = v.Abs().GetHighestComponentIndex();
1204 uint32 value = 0;
1205 if (v[max_element] < 0.0f)
1206 {
1207 value = 0x80000000u;
1208 v = -v;
1209 }
1210
1211 // Store highest component
1212 value |= max_element << 29;
1213
1214 // Store the other two components in a compressed format
1215 UVec4 compressed = Vec3::sClamp((v + Vec3::sReplicate(cOneOverSqrt2)) * cScale + Vec3::sReplicate(0.5f), Vec3::sZero(), Vec3::sReplicate(cMaxValue)).ToInt();
1216 switch (max_element)
1217 {
1218 case 0:
1219 compressed = compressed.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED, SWIZZLE_UNUSED>();
1220 break;
1221
1222 case 1:
1223 compressed = compressed.Swizzle<SWIZZLE_X, SWIZZLE_Z, SWIZZLE_UNUSED, SWIZZLE_UNUSED>();
1224 break;
1225 }
1226
1227 value |= compressed.GetX();
1228 value |= compressed.GetY() << cNumBits;
1229 return value;
1230}
1231
1233{
1234 constexpr float cOneOverSqrt2 = 0.70710678f;
1235 constexpr uint cNumBits = 14;
1236 constexpr uint cMask = (1u << cNumBits) - 1;
1237 constexpr uint cMaxValue = cMask - 1; // Need odd number of buckets to quantize to or else we can't encode 0
1238 constexpr int cHalfMaxValue = int(cMaxValue >> 1);
1239 constexpr float cScale = 2.0f * cOneOverSqrt2 / float(cMaxValue);
1240
1241 // Restore two components
1242 Vec3 v = Vec3(float(int(inValue & cMask) - cHalfMaxValue), float(int((inValue >> cNumBits) & cMask) - cHalfMaxValue), 0) * cScale;
1243 JPH_ASSERT(v.GetZ() == 0.0f);
1244
1245 // Restore the highest component
1246 v.SetZ(JPH::Sqrt(max(1.0f - v.LengthSq(), 0.0f)));
1247
1248 // Extract sign
1249 if ((inValue & 0x80000000u) != 0)
1250 v = -v;
1251
1252 // Swizzle the components in place
1253 switch ((inValue >> 29) & 3)
1254 {
1255 case 0:
1257 break;
1258
1259 case 1:
1261 break;
1262 }
1263
1264 return v;
1265}
1266
#define JPH_SUPPRESS_WARNINGS_STD_BEGIN
Definition Core.h:439
#define JPH_SUPPRESS_WARNINGS_STD_END
Definition Core.h:452
std::uint64_t uint64
Definition Core.h:515
unsigned int uint
Definition Core.h:510
#define JPH_NAMESPACE_END
Definition Core.h:434
std::uint32_t uint32
Definition Core.h:513
#define JPH_NAMESPACE_BEGIN
Definition Core.h:428
#define xy
Definition HLSLToCPP.h:511
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:223
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
JPH_INLINE constexpr To BitCast(const From &inValue)
Simple implementation of C++20 std::bit_cast.
Definition Math.h:239
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_X
Use the X component.
Definition Swizzle.h:12
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec3 operator*(float inV1, Vec3Arg inV2)
Definition Vec3.inl:531
Class that holds 3 floats. Used as a storage class. Convert to Vec3 for calculations.
Definition Float3.h:13
float y
Definition Float3.h:39
float z
Definition Float3.h:40
float x
Definition Float3.h:38
Definition UVec4.h:12
JPH_INLINE UVec4 Swizzle() const
Swizzle the elements in inV.
JPH_INLINE uint32 GetY() const
Definition UVec4.h:103
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:292
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:250
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:663
Type mValue
Definition UVec4.h:223
JPH_INLINE uint32 GetX() const
Get individual components.
Definition UVec4.h:102
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:271
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:527
uint32 mU32[4]
Definition UVec4.h:224
Definition Vec3.h:17
JPH_INLINE bool IsClose(Vec3Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec3.inl:486
static JPH_INLINE Vec3 sMax(Vec3Arg inV1, Vec3Arg inV2)
Return the maximum of each of the components.
Definition Vec3.inl:203
JPH_INLINE float Dot(Vec3Arg inV2) const
Dot product.
Definition Vec3.inl:927
JPH_INLINE Vec3 Normalized() const
Normalize vector.
Definition Vec3.inl:969
static JPH_INLINE Type sFixW(Type inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Vec4::Type Type
Definition Vec3.h:27
JPH_INLINE bool operator==(Vec3Arg inV2) const
Comparison.
Definition Vec3.inl:481
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec3.inl:757
static JPH_INLINE Vec3 sMin(Vec3Arg inV1, Vec3Arg inV2)
Return the minimum value of each of the components.
Definition Vec3.inl:183
JPH_INLINE Vec3 Cross(Vec3Arg inV2) const
Cross product.
Definition Vec3.inl:851
JPH_INLINE Vec3 GetNormalizedPerpendicular() const
Get normalized vector that is perpendicular to this vector.
Definition Vec3.inl:1115
static Vec3 sRandom(Random &inRandom)
Get random unit vector.
Definition Vec3.inl:473
JPH_INLINE float GetX() const
Get individual components.
Definition Vec3.h:127
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec3.inl:1026
static JPH_INLINE Vec3 sXor(Vec3Arg inV1, Vec3Arg inV2)
Logical xor (component wise)
Definition Vec3.inl:429
static JPH_INLINE Vec3 sDecompressUnitVector(uint32 inValue)
Decompress a unit vector from a 32 bit value.
Definition Vec3.inl:1232
JPH_INLINE float Length() const
Length of vector.
Definition Vec3.inl:947
static JPH_INLINE UVec4 sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Greater than or equal (component wise)
Definition Vec3.inl:328
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y and Z.
Definition Vec3.inl:1101
JPH_INLINE Vec3 & operator-=(Vec3Arg inV2)
Subtract two float vectors (component wise)
Definition Vec3.inl:717
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y and Z.
Definition Vec3.inl:1108
static JPH_INLINE Vec3 sDifferenceOfProducts(Vec3Arg inA, Vec3Arg inB, Vec3Arg inC, Vec3Arg inD)
Calculates inA * inB - inC * inD with more precision when FMA instructions are available....
Definition Vec3.inl:839
static JPH_INLINE UVec4 sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Less than or equal (component wise)
Definition Vec3.inl:278
JPH_INLINE Vec3 operator/(float inV2) const
Divide vector by float.
Definition Vec3.inl:548
friend JPH_INLINE Vec3 operator*(float inV1, Vec3Arg inV2)
Multiply vector with float.
Definition Vec3.inl:531
JPH_INLINE int GetLowestComponentIndex() const
Get index of component with lowest value.
Definition Vec3.inl:805
JPH_INLINE Vec3 & operator/=(float inV2)
Divide vector by float.
Definition Vec3.inl:606
JPH_INLINE Vec4 DotV4(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition Vec3.inl:937
JPH_INLINE Vec3 Abs() const
Return the absolute value of each of the components.
Definition Vec3.inl:815
static JPH_INLINE Vec3 sOne()
Vector with all ones.
Definition Vec3.inl:157
JPH_INLINE Vec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec3.inl:834
JPH_INLINE Vec3 NormalizedOr(Vec3Arg inZeroValue) const
Normalize vector or return inZeroValue if the length of the vector is zero.
Definition Vec3.inl:974
JPH_INLINE Vec3 FlipSign() const
Flips the signs of the components, e.g. FlipSign<-1, 1, -1>() will flip the signs of the X and Z comp...
Definition Vec3.inl:1185
JPH_INLINE Vec3 operator+(Vec3Arg inV2) const
Add two float vectors (component wise)
Definition Vec3.inl:626
JPH_INLINE float ReduceSum() const
Get the sum of X, Y and Z.
Definition Vec3.inl:894
JPH_INLINE uint32 CompressUnitVector() const
Compress a unit vector to a 32 bit value, precision is around 10^-4.
Definition Vec3.inl:1193
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec3.inl:789
JPH_INLINE Vec3 CrossPrecise(Vec3Arg inV2) const
Cross product (more precise version when FMA is available)
Definition Vec3.inl:889
static JPH_INLINE Vec3 sOr(Vec3Arg inV1, Vec3Arg inV2)
Logical or (component wise)
Definition Vec3.inl:411
static JPH_INLINE UVec4 sGreater(Vec3Arg inV1, Vec3Arg inV2)
Greater than (component wise)
Definition Vec3.inl:303
JPH_INLINE void SetZ(float inZ)
Definition Vec3.h:135
static JPH_INLINE Vec3 sAnd(Vec3Arg inV1, Vec3Arg inV2)
Logical and (component wise)
Definition Vec3.inl:447
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
static JPH_INLINE Vec3 sUnitSpherical(float inTheta, float inPhi)
Definition Vec3.inl:465
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec3.inl:1073
Type mValue
Definition Vec3.h:308
JPH_INLINE float GetY() const
Definition Vec3.h:128
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec3.inl:773
JPH_INLINE Vec3 operator-() const
Negate.
Definition Vec3.inl:665
JPH_INLINE void StoreFloat3(Float3 *outV) const
Store 3 floats to memory.
Definition Vec3.inl:1051
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec3.inl:942
float mF32[4]
Definition Vec3.h:309
static JPH_INLINE UVec4 sEquals(Vec3Arg inV1, Vec3Arg inV2)
Equals (component wise)
Definition Vec3.inl:228
JPH_INLINE bool IsNearZero(float inMaxDistSq=1.0e-12f) const
Test if vector is near zero.
Definition Vec3.inl:491
static JPH_INLINE Vec3 sZero()
Vector with all zeros.
Definition Vec3.inl:125
static JPH_INLINE UVec4 sLess(Vec3Arg inV1, Vec3Arg inV2)
Less than (component wise)
Definition Vec3.inl:253
static JPH_INLINE Vec3 sReplicate(float inV)
Replicate inV across all components.
Definition Vec3.inl:141
static JPH_INLINE Vec3 sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition Vec3.inl:223
JPH_INLINE Vec3 & operator*=(float inV2)
Multiply vector with float.
Definition Vec3.inl:565
JPH_INLINE Vec3 & operator+=(Vec3Arg inV2)
Add two float vectors (component wise)
Definition Vec3.inl:644
static JPH_INLINE Vec3 sSelect(Vec3Arg inNotSet, Vec3Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition Vec3.inl:376
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec3.inl:1031
JPH_INLINE Vec3 Sqrt() const
Component wise square root.
Definition Vec3.inl:952
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec3 as a UVec4 (doesn't change the bits)
Definition Vec3.inl:1090
JPH_INLINE Vec3 DotV(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y and Z components.
Definition Vec3.inl:932
static JPH_INLINE Vec3 sLoadFloat3Unsafe(const Float3 &inV)
Load 3 floats from memory (reads 32 bits extra which it doesn't use)
Definition Vec3.inl:167
JPH_INLINE float GetZ() const
Definition Vec3.h:129
JPH_INLINE Vec3 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec3.inl:1157
static JPH_INLINE Vec3 sNaN()
Vector with all NaN's.
Definition Vec3.inl:162
Vec3()=default
Constructor.
JPH_INLINE int GetHighestComponentIndex() const
Get index of component with highest value.
Definition Vec3.inl:810
static JPH_INLINE Vec3 sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec3.inl:353
JPH_INLINE Vec3 Swizzle() const
Swizzle the elements in inV.
Definition Vec4.h:14
float mF32[4]
Definition Vec4.h:318
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:119
JPH_INLINE float GetY() const
Definition Vec4.h:120
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:97
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:1171