Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Vec3.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#include <Jolt/Math/Vec4.h>
6#include <Jolt/Math/UVec4.h>
8
10#include <random>
12
13// Create a std::hash/JPH::Hash for Vec3
14JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
15
17
18void Vec3::CheckW() const
19{
20#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
21 // Avoid asserts when both components are NaN
22 JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
23#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
24}
25
26JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
27{
28#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
29 #if defined(JPH_USE_SSE)
30 return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
31 #elif defined(JPH_USE_NEON)
32 return JPH_NEON_SHUFFLE_F32x4(inValue, inValue, 0, 1, 2, 2);
33 #else
34 Type value;
35 value.mData[0] = inValue.mData[0];
36 value.mData[1] = inValue.mData[1];
37 value.mData[2] = inValue.mData[2];
38 value.mData[3] = inValue.mData[2];
39 return value;
40 #endif
41#else
42 return inValue;
43#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
44}
45
47 mValue(sFixW(inRHS.mValue))
48{
49}
50
51Vec3::Vec3(const Float3 &inV)
52{
53#if defined(JPH_USE_SSE)
54 Type x = _mm_load_ss(&inV.x);
55 Type y = _mm_load_ss(&inV.y);
56 Type z = _mm_load_ss(&inV.z);
57 Type xy = _mm_unpacklo_ps(x, y);
58 mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
59#elif defined(JPH_USE_NEON)
60 float32x2_t xy = vld1_f32(&inV.x);
61 float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
62 mValue = vcombine_f32(xy, zz);
63#else
64 mF32[0] = inV[0];
65 mF32[1] = inV[1];
66 mF32[2] = inV[2];
67 mF32[3] = inV[2]; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables
68#endif
69}
70
71Vec3::Vec3(float inX, float inY, float inZ)
72{
73#if defined(JPH_USE_SSE)
74 mValue = _mm_set_ps(inZ, inZ, inY, inX);
75#elif defined(JPH_USE_NEON)
76 uint32x2_t xy = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inX)) | (static_cast<uint64>(BitCast<uint32>(inY)) << 32));
77 uint32x2_t zz = vreinterpret_u32_f32(vdup_n_f32(inZ));
78 mValue = vreinterpretq_f32_u32(vcombine_u32(xy, zz));
79#else
80 mF32[0] = inX;
81 mF32[1] = inY;
82 mF32[2] = inZ;
83 mF32[3] = inZ; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables
84#endif
85}
86
87template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
89{
90 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
91 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
92 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
93
94#if defined(JPH_USE_SSE)
95 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
96#elif defined(JPH_USE_NEON)
97 return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
98#else
99 return Vec3(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ]);
100#endif
101}
102
104{
105#if defined(JPH_USE_SSE)
106 return _mm_setzero_ps();
107#elif defined(JPH_USE_NEON)
108 return vdupq_n_f32(0);
109#else
110 return Vec3(0, 0, 0);
111#endif
112}
113
115{
116#if defined(JPH_USE_SSE)
117 return _mm_set1_ps(inV);
118#elif defined(JPH_USE_NEON)
119 return vdupq_n_f32(inV);
120#else
121 return Vec3(inV, inV, inV);
122#endif
123}
124
126{
127 return sReplicate(numeric_limits<float>::quiet_NaN());
128}
129
131{
132#if defined(JPH_USE_SSE)
133 Type v = _mm_loadu_ps(&inV.x);
134#elif defined(JPH_USE_NEON)
135 Type v = vld1q_f32(&inV.x);
136#else
137 Type v = { inV.x, inV.y, inV.z };
138#endif
139 return sFixW(v);
140}
141
143{
144#if defined(JPH_USE_SSE)
145 return _mm_min_ps(inV1.mValue, inV2.mValue);
146#elif defined(JPH_USE_NEON)
147 return vminq_f32(inV1.mValue, inV2.mValue);
148#else
149 return Vec3(min(inV1.mF32[0], inV2.mF32[0]),
150 min(inV1.mF32[1], inV2.mF32[1]),
151 min(inV1.mF32[2], inV2.mF32[2]));
152#endif
153}
154
156{
157#if defined(JPH_USE_SSE)
158 return _mm_max_ps(inV1.mValue, inV2.mValue);
159#elif defined(JPH_USE_NEON)
160 return vmaxq_f32(inV1.mValue, inV2.mValue);
161#else
162 return Vec3(max(inV1.mF32[0], inV2.mF32[0]),
163 max(inV1.mF32[1], inV2.mF32[1]),
164 max(inV1.mF32[2], inV2.mF32[2]));
165#endif
166}
167
169{
170 return sMax(sMin(inV, inMax), inMin);
171}
172
174{
175#if defined(JPH_USE_SSE)
176 return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
177#elif defined(JPH_USE_NEON)
178 return vceqq_f32(inV1.mValue, inV2.mValue);
179#else
180 uint32 z = inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0;
181 return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
182 inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
183 z,
184 z);
185#endif
186}
187
189{
190#if defined(JPH_USE_SSE)
191 return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
192#elif defined(JPH_USE_NEON)
193 return vcltq_f32(inV1.mValue, inV2.mValue);
194#else
195 uint32 z = inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0;
196 return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
197 inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
198 z,
199 z);
200#endif
201}
202
204{
205#if defined(JPH_USE_SSE)
206 return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
207#elif defined(JPH_USE_NEON)
208 return vcleq_f32(inV1.mValue, inV2.mValue);
209#else
210 uint32 z = inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0;
211 return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
212 inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
213 z,
214 z);
215#endif
216}
217
219{
220#if defined(JPH_USE_SSE)
221 return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
222#elif defined(JPH_USE_NEON)
223 return vcgtq_f32(inV1.mValue, inV2.mValue);
224#else
225 uint32 z = inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0;
226 return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
227 inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
228 z,
229 z);
230#endif
231}
232
234{
235#if defined(JPH_USE_SSE)
236 return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
237#elif defined(JPH_USE_NEON)
238 return vcgeq_f32(inV1.mValue, inV2.mValue);
239#else
240 uint32 z = inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0;
241 return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
242 inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
243 z,
244 z);
245#endif
246}
247
249{
250#if defined(JPH_USE_SSE)
251 #ifdef JPH_USE_FMADD
252 return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
253 #else
254 return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
255 #endif
256#elif defined(JPH_USE_NEON)
257 return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
258#else
259 return Vec3(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
260 inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
261 inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2]);
262#endif
263}
264
265Vec3 Vec3::sSelect(Vec3Arg inNotSet, Vec3Arg inSet, UVec4Arg inControl)
266{
267#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
268 Type v = _mm_blendv_ps(inNotSet.mValue, inSet.mValue, _mm_castsi128_ps(inControl.mValue));
269 return sFixW(v);
270#elif defined(JPH_USE_SSE)
271 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.mValue, 31));
272 Type v = _mm_or_ps(_mm_and_ps(is_set, inSet.mValue), _mm_andnot_ps(is_set, inNotSet.mValue));
273 return sFixW(v);
274#elif defined(JPH_USE_NEON)
275 Type v = vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inSet.mValue, inNotSet.mValue);
276 return sFixW(v);
277#else
278 Vec3 result;
279 for (int i = 0; i < 3; i++)
280 result.mF32[i] = (inControl.mU32[i] & 0x80000000u) ? inSet.mF32[i] : inNotSet.mF32[i];
281#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
282 result.mF32[3] = result.mF32[2];
283#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
284 return result;
285#endif
286}
287
289{
290#if defined(JPH_USE_SSE)
291 return _mm_or_ps(inV1.mValue, inV2.mValue);
292#elif defined(JPH_USE_NEON)
293 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
294#else
296#endif
297}
298
300{
301#if defined(JPH_USE_SSE)
302 return _mm_xor_ps(inV1.mValue, inV2.mValue);
303#elif defined(JPH_USE_NEON)
304 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
305#else
307#endif
308}
309
311{
312#if defined(JPH_USE_SSE)
313 return _mm_and_ps(inV1.mValue, inV2.mValue);
314#elif defined(JPH_USE_NEON)
315 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
316#else
318#endif
319}
320
321Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
322{
323 Vec4 s, c;
324 Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
325 return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
326}
327
328template <class Random>
329Vec3 Vec3::sRandom(Random &inRandom)
330{
331 std::uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
332 float theta = JPH_PI * zero_to_one(inRandom);
333 float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
334 return sUnitSpherical(theta, phi);
335}
336
338{
339 return sEquals(*this, inV2).TestAllXYZTrue();
340}
341
342bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
343{
344 return (inV2 - *this).LengthSq() <= inMaxDistSq;
345}
346
347bool Vec3::IsNearZero(float inMaxDistSq) const
348{
349 return LengthSq() <= inMaxDistSq;
350}
351
353{
354#if defined(JPH_USE_SSE)
355 return _mm_mul_ps(mValue, inV2.mValue);
356#elif defined(JPH_USE_NEON)
357 return vmulq_f32(mValue, inV2.mValue);
358#else
359 return Vec3(mF32[0] * inV2.mF32[0], mF32[1] * inV2.mF32[1], mF32[2] * inV2.mF32[2]);
360#endif
361}
362
363Vec3 Vec3::operator * (float inV2) const
364{
365#if defined(JPH_USE_SSE)
366 return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
367#elif defined(JPH_USE_NEON)
368 return vmulq_n_f32(mValue, inV2);
369#else
370 return Vec3(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2);
371#endif
372}
373
374Vec3 operator * (float inV1, Vec3Arg inV2)
375{
376#if defined(JPH_USE_SSE)
377 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
378#elif defined(JPH_USE_NEON)
379 return vmulq_n_f32(inV2.mValue, inV1);
380#else
381 return Vec3(inV1 * inV2.mF32[0], inV1 * inV2.mF32[1], inV1 * inV2.mF32[2]);
382#endif
383}
384
385Vec3 Vec3::operator / (float inV2) const
386{
387#if defined(JPH_USE_SSE)
388 return _mm_div_ps(mValue, _mm_set1_ps(inV2));
389#elif defined(JPH_USE_NEON)
390 return vdivq_f32(mValue, vdupq_n_f32(inV2));
391#else
392 return Vec3(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2);
393#endif
394}
395
397{
398#if defined(JPH_USE_SSE)
399 mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
400#elif defined(JPH_USE_NEON)
401 mValue = vmulq_n_f32(mValue, inV2);
402#else
403 for (int i = 0; i < 3; ++i)
404 mF32[i] *= inV2;
405 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
406 mF32[3] = mF32[2];
407 #endif
408#endif
409 return *this;
410}
411
413{
414#if defined(JPH_USE_SSE)
415 mValue = _mm_mul_ps(mValue, inV2.mValue);
416#elif defined(JPH_USE_NEON)
417 mValue = vmulq_f32(mValue, inV2.mValue);
418#else
419 for (int i = 0; i < 3; ++i)
420 mF32[i] *= inV2.mF32[i];
421 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
422 mF32[3] = mF32[2];
423 #endif
424#endif
425 return *this;
426}
427
429{
430#if defined(JPH_USE_SSE)
431 mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
432#elif defined(JPH_USE_NEON)
433 mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
434#else
435 for (int i = 0; i < 3; ++i)
436 mF32[i] /= inV2;
437 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
438 mF32[3] = mF32[2];
439 #endif
440#endif
441 return *this;
442}
443
445{
446#if defined(JPH_USE_SSE)
447 return _mm_add_ps(mValue, inV2.mValue);
448#elif defined(JPH_USE_NEON)
449 return vaddq_f32(mValue, inV2.mValue);
450#else
451 return Vec3(mF32[0] + inV2.mF32[0], mF32[1] + inV2.mF32[1], mF32[2] + inV2.mF32[2]);
452#endif
453}
454
456{
457#if defined(JPH_USE_SSE)
458 mValue = _mm_add_ps(mValue, inV2.mValue);
459#elif defined(JPH_USE_NEON)
460 mValue = vaddq_f32(mValue, inV2.mValue);
461#else
462 for (int i = 0; i < 3; ++i)
463 mF32[i] += inV2.mF32[i];
464 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
465 mF32[3] = mF32[2];
466 #endif
467#endif
468 return *this;
469}
470
472{
473#if defined(JPH_USE_SSE)
474 return _mm_sub_ps(_mm_setzero_ps(), mValue);
475#elif defined(JPH_USE_NEON)
476 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
477 return vsubq_f32(vdupq_n_f32(0), mValue);
478 #else
479 return vnegq_f32(mValue);
480 #endif
481#else
482 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
483 return Vec3(0.0f - mF32[0], 0.0f - mF32[1], 0.0f - mF32[2]);
484 #else
485 return Vec3(-mF32[0], -mF32[1], -mF32[2]);
486 #endif
487#endif
488}
489
491{
492#if defined(JPH_USE_SSE)
493 return _mm_sub_ps(mValue, inV2.mValue);
494#elif defined(JPH_USE_NEON)
495 return vsubq_f32(mValue, inV2.mValue);
496#else
497 return Vec3(mF32[0] - inV2.mF32[0], mF32[1] - inV2.mF32[1], mF32[2] - inV2.mF32[2]);
498#endif
499}
500
502{
503#if defined(JPH_USE_SSE)
504 mValue = _mm_sub_ps(mValue, inV2.mValue);
505#elif defined(JPH_USE_NEON)
506 mValue = vsubq_f32(mValue, inV2.mValue);
507#else
508 for (int i = 0; i < 3; ++i)
509 mF32[i] -= inV2.mF32[i];
510 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
511 mF32[3] = mF32[2];
512 #endif
513#endif
514 return *this;
515}
516
518{
519 inV2.CheckW(); // Check W equals Z to avoid div by zero
520#if defined(JPH_USE_SSE)
521 return _mm_div_ps(mValue, inV2.mValue);
522#elif defined(JPH_USE_NEON)
523 return vdivq_f32(mValue, inV2.mValue);
524#else
525 return Vec3(mF32[0] / inV2.mF32[0], mF32[1] / inV2.mF32[1], mF32[2] / inV2.mF32[2]);
526#endif
527}
528
530{
531#if defined(JPH_USE_SSE)
532 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
533#elif defined(JPH_USE_NEON)
534 return vdupq_laneq_f32(mValue, 0);
535#else
536 return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
537#endif
538}
539
541{
542#if defined(JPH_USE_SSE)
543 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
544#elif defined(JPH_USE_NEON)
545 return vdupq_laneq_f32(mValue, 1);
546#else
547 return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
548#endif
549}
550
552{
553#if defined(JPH_USE_SSE)
554 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
555#elif defined(JPH_USE_NEON)
556 return vdupq_laneq_f32(mValue, 2);
557#else
558 return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
559#endif
560}
561
563{
564 return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
565}
566
568{
569 return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
570}
571
573{
574#if defined(JPH_USE_AVX512)
575 return _mm_range_ps(mValue, mValue, 0b1000);
576#elif defined(JPH_USE_SSE)
577 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
578#elif defined(JPH_USE_NEON)
579 return vabsq_f32(mValue);
580#else
581 return Vec3(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]));
582#endif
583}
584
586{
587 return sReplicate(1.0f) / mValue;
588}
589
591{
592#if defined(JPH_USE_SSE)
593 Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
594 t1 = _mm_mul_ps(t1, mValue);
595 Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
596 t2 = _mm_mul_ps(t2, inV2.mValue);
597 Type t3 = _mm_sub_ps(t1, t2);
598 return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
599#elif defined(JPH_USE_NEON)
600 Type t1 = JPH_NEON_SHUFFLE_F32x4(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
601 t1 = vmulq_f32(t1, mValue);
602 Type t2 = JPH_NEON_SHUFFLE_F32x4(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
603 t2 = vmulq_f32(t2, inV2.mValue);
604 Type t3 = vsubq_f32(t1, t2);
605 return JPH_NEON_SHUFFLE_F32x4(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
606#else
607 return Vec3(mF32[1] * inV2.mF32[2] - mF32[2] * inV2.mF32[1],
608 mF32[2] * inV2.mF32[0] - mF32[0] * inV2.mF32[2],
609 mF32[0] * inV2.mF32[1] - mF32[1] * inV2.mF32[0]);
610#endif
611}
612
614{
615#if defined(JPH_USE_SSE4_1)
616 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
617#elif defined(JPH_USE_NEON)
618 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
619 mul = vsetq_lane_f32(0, mul, 3);
620 return vdupq_n_f32(vaddvq_f32(mul));
621#else
622 float dot = 0.0f;
623 for (int i = 0; i < 3; i++)
624 dot += mF32[i] * inV2.mF32[i];
625 return Vec3::sReplicate(dot);
626#endif
627}
628
630{
631#if defined(JPH_USE_SSE4_1)
632 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
633#elif defined(JPH_USE_NEON)
634 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
635 mul = vsetq_lane_f32(0, mul, 3);
636 return vdupq_n_f32(vaddvq_f32(mul));
637#else
638 float dot = 0.0f;
639 for (int i = 0; i < 3; i++)
640 dot += mF32[i] * inV2.mF32[i];
641 return Vec4::sReplicate(dot);
642#endif
643}
644
645float Vec3::Dot(Vec3Arg inV2) const
646{
647#if defined(JPH_USE_SSE4_1)
648 return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0x7f));
649#elif defined(JPH_USE_NEON)
650 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
651 mul = vsetq_lane_f32(0, mul, 3);
652 return vaddvq_f32(mul);
653#else
654 float dot = 0.0f;
655 for (int i = 0; i < 3; i++)
656 dot += mF32[i] * inV2.mF32[i];
657 return dot;
658#endif
659}
660
661float Vec3::LengthSq() const
662{
663#if defined(JPH_USE_SSE4_1)
664 return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0x7f));
665#elif defined(JPH_USE_NEON)
666 float32x4_t mul = vmulq_f32(mValue, mValue);
667 mul = vsetq_lane_f32(0, mul, 3);
668 return vaddvq_f32(mul);
669#else
670 float len_sq = 0.0f;
671 for (int i = 0; i < 3; i++)
672 len_sq += mF32[i] * mF32[i];
673 return len_sq;
674#endif
675}
676
677float Vec3::Length() const
678{
679#if defined(JPH_USE_SSE4_1)
680 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0x7f)));
681#elif defined(JPH_USE_NEON)
682 float32x4_t mul = vmulq_f32(mValue, mValue);
683 mul = vsetq_lane_f32(0, mul, 3);
684 float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
685 return vget_lane_f32(vsqrt_f32(sum), 0);
686#else
687 return sqrt(LengthSq());
688#endif
689}
690
692{
693#if defined(JPH_USE_SSE)
694 return _mm_sqrt_ps(mValue);
695#elif defined(JPH_USE_NEON)
696 return vsqrtq_f32(mValue);
697#else
698 return Vec3(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]));
699#endif
700}
701
703{
704#if defined(JPH_USE_SSE4_1)
705 return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0x7f)));
706#elif defined(JPH_USE_NEON)
707 float32x4_t mul = vmulq_f32(mValue, mValue);
708 mul = vsetq_lane_f32(0, mul, 3);
709 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
710 return vdivq_f32(mValue, vsqrtq_f32(sum));
711#else
712 return *this / Length();
713#endif
714}
715
717{
718#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
719 Type len_sq = _mm_dp_ps(mValue, mValue, 0x7f);
720 // clang with '-ffast-math' (which you should not use!) can generate _mm_rsqrt_ps
721 // instructions which produce INFs/NaNs when they get a denormal float as input.
722 // We therefore treat denormals as zero here.
723 Type is_zero = _mm_cmple_ps(len_sq, _mm_set1_ps(FLT_MIN));
724#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
725 if (_mm_movemask_ps(is_zero) == 0xf)
726 return inZeroValue;
727 else
728 return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
729#else
730 return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
731#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
732#elif defined(JPH_USE_NEON)
733 float32x4_t mul = vmulq_f32(mValue, mValue);
734 mul = vsetq_lane_f32(0, mul, 3);
735 float32x4_t len_sq = vdupq_n_f32(vaddvq_f32(mul));
736 uint32x4_t is_zero = vcleq_f32(len_sq, vdupq_n_f32(FLT_MIN));
737 return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, vsqrtq_f32(len_sq)));
738#else
739 float len_sq = LengthSq();
740 if (len_sq <= FLT_MIN)
741 return inZeroValue;
742 else
743 return *this / sqrt(len_sq);
744#endif
745}
746
747bool Vec3::IsNormalized(float inTolerance) const
748{
749 return abs(LengthSq() - 1.0f) <= inTolerance;
750}
751
752bool Vec3::IsNaN() const
753{
754#if defined(JPH_USE_AVX512)
755 return (_mm_fpclass_ps_mask(mValue, 0b10000001) & 0x7) != 0;
756#elif defined(JPH_USE_SSE)
757 return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
758#elif defined(JPH_USE_NEON)
759 uint32x4_t mask = JPH_NEON_UINT32x4(1, 1, 1, 0);
760 uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
761 return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
762#else
763 return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]);
764#endif
765}
766
767void Vec3::StoreFloat3(Float3 *outV) const
768{
769#if defined(JPH_USE_SSE)
770 _mm_store_ss(&outV->x, mValue);
771 Vec3 t = Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED>();
772 _mm_store_ss(&outV->y, t.mValue);
774 _mm_store_ss(&outV->z, t.mValue);
775#elif defined(JPH_USE_NEON)
776 float32x2_t xy = vget_low_f32(mValue);
777 vst1_f32(&outV->x, xy);
778 vst1q_lane_f32(&outV->z, mValue, 2);
779#else
780 outV->x = mF32[0];
781 outV->y = mF32[1];
782 outV->z = mF32[2];
783#endif
784}
785
787{
788#if defined(JPH_USE_SSE)
789 return _mm_cvttps_epi32(mValue);
790#elif defined(JPH_USE_NEON)
791 return vcvtq_u32_f32(mValue);
792#else
793 return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
794#endif
795}
796
798{
799#if defined(JPH_USE_SSE)
800 return UVec4(_mm_castps_si128(mValue));
801#elif defined(JPH_USE_NEON)
802 return vreinterpretq_u32_f32(mValue);
803#else
804 return *reinterpret_cast<const UVec4 *>(this);
805#endif
806}
807
808float Vec3::ReduceMin() const
809{
810 Vec3 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
812 return v.GetX();
813}
814
815float Vec3::ReduceMax() const
816{
817 Vec3 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
819 return v.GetX();
820}
821
823{
824 if (abs(mF32[0]) > abs(mF32[1]))
825 {
826 float len = sqrt(mF32[0] * mF32[0] + mF32[2] * mF32[2]);
827 return Vec3(mF32[2], 0.0f, -mF32[0]) / len;
828 }
829 else
830 {
831 float len = sqrt(mF32[1] * mF32[1] + mF32[2] * mF32[2]);
832 return Vec3(0.0f, mF32[2], -mF32[1]) / len;
833 }
834}
835
837{
838#if defined(JPH_USE_AVX512)
839 return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
840#elif defined(JPH_USE_SSE)
841 Type minus_one = _mm_set1_ps(-1.0f);
842 Type one = _mm_set1_ps(1.0f);
843 return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
844#elif defined(JPH_USE_NEON)
845 Type minus_one = vdupq_n_f32(-1.0f);
846 Type one = vdupq_n_f32(1.0f);
847 return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(mValue), vreinterpretq_u32_f32(minus_one)), vreinterpretq_u32_f32(one)));
848#else
849 return Vec3(std::signbit(mF32[0])? -1.0f : 1.0f,
850 std::signbit(mF32[1])? -1.0f : 1.0f,
851 std::signbit(mF32[2])? -1.0f : 1.0f);
852#endif
853}
854
#define JPH_SUPPRESS_WARNINGS_STD_BEGIN
Definition Core.h:419
#define JPH_SUPPRESS_WARNINGS_STD_END
Definition Core.h:431
std::uint64_t uint64
Definition Core.h:485
#define JPH_NAMESPACE_END
Definition Core.h:414
std::uint32_t uint32
Definition Core.h:484
#define JPH_NAMESPACE_BEGIN
Definition Core.h:408
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:223
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec3 operator*(float inV1, Vec3Arg inV2)
Definition Vec3.inl:374
Class that holds 3 floats. Used as a storage class. Convert to Vec3 for calculations.
Definition Float3.h:13
float y
Definition Float3.h:39
float z
Definition Float3.h:40
float x
Definition Float3.h:38
Definition UVec4.h:12
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:202
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:174
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:413
Type mValue
Definition UVec4.h:211
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:188
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:340
uint32 mU32[4]
Definition UVec4.h:212
Definition Vec3.h:17
JPH_INLINE bool IsClose(Vec3Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec3.inl:342
static JPH_INLINE Vec3 sMax(Vec3Arg inV1, Vec3Arg inV2)
Return the maximum of each of the components.
Definition Vec3.inl:155
JPH_INLINE float Dot(Vec3Arg inV2) const
Dot product.
Definition Vec3.inl:645
JPH_INLINE Vec3 Normalized() const
Normalize vector.
Definition Vec3.inl:702
static JPH_INLINE Type sFixW(Type inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Vec4::Type Type
Definition Vec3.h:27
JPH_INLINE bool operator==(Vec3Arg inV2) const
Comparison.
Definition Vec3.inl:337
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec3.inl:529
static JPH_INLINE Vec3 sMin(Vec3Arg inV1, Vec3Arg inV2)
Return the minimum value of each of the components.
Definition Vec3.inl:142
JPH_INLINE Vec3 Cross(Vec3Arg inV2) const
Cross product.
Definition Vec3.inl:590
JPH_INLINE Vec3 GetNormalizedPerpendicular() const
Get normalized vector that is perpendicular to this vector.
Definition Vec3.inl:822
static Vec3 sRandom(Random &inRandom)
Get random unit vector.
Definition Vec3.inl:329
JPH_INLINE float GetX() const
Get individual components.
Definition Vec3.h:124
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec3.inl:747
static JPH_INLINE Vec3 sXor(Vec3Arg inV1, Vec3Arg inV2)
Logical xor (component wise)
Definition Vec3.inl:299
JPH_INLINE float Length() const
Length of vector.
Definition Vec3.inl:677
static JPH_INLINE UVec4 sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Greater than or equal (component wise)
Definition Vec3.inl:233
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y and Z.
Definition Vec3.inl:808
JPH_INLINE Vec3 & operator-=(Vec3Arg inV2)
Subtract two float vectors (component wise)
Definition Vec3.inl:501
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y and Z.
Definition Vec3.inl:815
static JPH_INLINE UVec4 sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Less than or equal (component wise)
Definition Vec3.inl:203
JPH_INLINE Vec3 operator/(float inV2) const
Divide vector by float.
Definition Vec3.inl:385
friend JPH_INLINE Vec3 operator*(float inV1, Vec3Arg inV2)
Multiply vector with float.
Definition Vec3.inl:374
JPH_INLINE int GetLowestComponentIndex() const
Get index of component with lowest value.
Definition Vec3.inl:562
JPH_INLINE Vec3 & operator/=(float inV2)
Divide vector by float.
Definition Vec3.inl:428
JPH_INLINE Vec4 DotV4(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition Vec3.inl:629
JPH_INLINE Vec3 Abs() const
Return the absolute value of each of the components.
Definition Vec3.inl:572
JPH_INLINE Vec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec3.inl:585
JPH_INLINE Vec3 NormalizedOr(Vec3Arg inZeroValue) const
Normalize vector or return inZeroValue if the length of the vector is zero.
Definition Vec3.inl:716
JPH_INLINE Vec3 operator+(Vec3Arg inV2) const
Add two float vectors (component wise)
Definition Vec3.inl:444
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec3.inl:551
static JPH_INLINE Vec3 sOr(Vec3Arg inV1, Vec3Arg inV2)
Logical or (component wise)
Definition Vec3.inl:288
static JPH_INLINE UVec4 sGreater(Vec3Arg inV1, Vec3Arg inV2)
Greater than (component wise)
Definition Vec3.inl:218
static JPH_INLINE Vec3 sAnd(Vec3Arg inV1, Vec3Arg inV2)
Logical and (component wise)
Definition Vec3.inl:310
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
static JPH_INLINE Vec3 sUnitSpherical(float inTheta, float inPhi)
Definition Vec3.inl:321
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec3.inl:786
Type mValue
Definition Vec3.h:286
JPH_INLINE float GetY() const
Definition Vec3.h:125
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec3.inl:540
JPH_INLINE Vec3 operator-() const
Negate.
Definition Vec3.inl:471
JPH_INLINE void StoreFloat3(Float3 *outV) const
Store 3 floats to memory.
Definition Vec3.inl:767
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec3.inl:661
float mF32[4]
Definition Vec3.h:287
static JPH_INLINE UVec4 sEquals(Vec3Arg inV1, Vec3Arg inV2)
Equals (component wise)
Definition Vec3.inl:173
JPH_INLINE bool IsNearZero(float inMaxDistSq=1.0e-12f) const
Test if vector is near zero.
Definition Vec3.inl:347
static JPH_INLINE Vec3 sZero()
Vector with all zeros.
Definition Vec3.inl:103
static JPH_INLINE UVec4 sLess(Vec3Arg inV1, Vec3Arg inV2)
Less than (component wise)
Definition Vec3.inl:188
static JPH_INLINE Vec3 sReplicate(float inV)
Replicate inV across all components.
Definition Vec3.inl:114
static JPH_INLINE Vec3 sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition Vec3.inl:168
JPH_INLINE Vec3 & operator*=(float inV2)
Multiply vector with float.
Definition Vec3.inl:396
JPH_INLINE Vec3 & operator+=(Vec3Arg inV2)
Add two float vectors (component wise)
Definition Vec3.inl:455
static JPH_INLINE Vec3 sSelect(Vec3Arg inNotSet, Vec3Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition Vec3.inl:265
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec3.inl:752
JPH_INLINE Vec3 Sqrt() const
Component wise square root.
Definition Vec3.inl:691
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec3 as a UVec4 (doesn't change the bits)
Definition Vec3.inl:797
JPH_INLINE Vec3 DotV(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y and Z components.
Definition Vec3.inl:613
static JPH_INLINE Vec3 sLoadFloat3Unsafe(const Float3 &inV)
Load 3 floats from memory (reads 32 bits extra which it doesn't use)
Definition Vec3.inl:130
JPH_INLINE float GetZ() const
Definition Vec3.h:126
JPH_INLINE Vec3 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec3.inl:836
static JPH_INLINE Vec3 sNaN()
Vector with all NaN's.
Definition Vec3.inl:125
Vec3()=default
Constructor.
JPH_INLINE int GetHighestComponentIndex() const
Get index of component with highest value.
Definition Vec3.inl:567
static JPH_INLINE Vec3 sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec3.inl:248
JPH_INLINE Vec3 Swizzle() const
Swizzle the elements in inV.
Definition Vec4.h:14
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:113
JPH_INLINE float GetY() const
Definition Vec4.h:114
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:74
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:778