Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Vec3.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#include <Jolt/Math/Vec4.h>
6#include <Jolt/Math/UVec4.h>
8
10#include <random>
12
13// Create a std::hash/JPH::Hash for Vec3
14JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
15
17
18void Vec3::CheckW() const
19{
20#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
21 // Avoid asserts when both components are NaN
22 JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
23#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
24}
25
26JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
27{
28#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
29 #if defined(JPH_USE_SSE)
30 return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
31 #elif defined(JPH_USE_NEON)
32 return JPH_NEON_SHUFFLE_F32x4(inValue, inValue, 0, 1, 2, 2);
33 #else
34 Type value;
35 value.mData[0] = inValue.mData[0];
36 value.mData[1] = inValue.mData[1];
37 value.mData[2] = inValue.mData[2];
38 value.mData[3] = inValue.mData[2];
39 return value;
40 #endif
41#else
42 return inValue;
43#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
44}
45
47 mValue(sFixW(inRHS.mValue))
48{
49}
50
51Vec3::Vec3(const Float3 &inV)
52{
53#if defined(JPH_USE_SSE)
54 Type x = _mm_load_ss(&inV.x);
55 Type y = _mm_load_ss(&inV.y);
56 Type z = _mm_load_ss(&inV.z);
57 Type xy = _mm_unpacklo_ps(x, y);
58 mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
59#elif defined(JPH_USE_NEON)
60 float32x2_t xy = vld1_f32(&inV.x);
61 float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
62 mValue = vcombine_f32(xy, zz);
63#else
64 mF32[0] = inV[0];
65 mF32[1] = inV[1];
66 mF32[2] = inV[2];
67 mF32[3] = inV[2]; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables
68#endif
69}
70
71Vec3::Vec3(float inX, float inY, float inZ)
72{
73#if defined(JPH_USE_SSE)
74 mValue = _mm_set_ps(inZ, inZ, inY, inX);
75#elif defined(JPH_USE_NEON)
76 uint32x2_t xy = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inX)) | (static_cast<uint64>(BitCast<uint32>(inY)) << 32));
77 uint32x2_t zz = vreinterpret_u32_f32(vdup_n_f32(inZ));
78 mValue = vreinterpretq_f32_u32(vcombine_u32(xy, zz));
79#else
80 mF32[0] = inX;
81 mF32[1] = inY;
82 mF32[2] = inZ;
83 mF32[3] = inZ; // Not strictly needed when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED is off but prevents warnings about uninitialized variables
84#endif
85}
86
87template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
89{
90 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
91 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
92 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
93
94#if defined(JPH_USE_SSE)
95 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
96#elif defined(JPH_USE_NEON)
97 return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
98#else
99 return Vec3(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ]);
100#endif
101}
102
104{
105#if defined(JPH_USE_SSE)
106 return _mm_setzero_ps();
107#elif defined(JPH_USE_NEON)
108 return vdupq_n_f32(0);
109#else
110 return Vec3(0, 0, 0);
111#endif
112}
113
115{
116#if defined(JPH_USE_SSE)
117 return _mm_set1_ps(inV);
118#elif defined(JPH_USE_NEON)
119 return vdupq_n_f32(inV);
120#else
121 return Vec3(inV, inV, inV);
122#endif
123}
124
126{
127 return sReplicate(1.0f);
128}
129
131{
132 return sReplicate(numeric_limits<float>::quiet_NaN());
133}
134
136{
137#if defined(JPH_USE_SSE)
138 Type v = _mm_loadu_ps(&inV.x);
139#elif defined(JPH_USE_NEON)
140 Type v = vld1q_f32(&inV.x);
141#else
142 Type v = { inV.x, inV.y, inV.z };
143#endif
144 return sFixW(v);
145}
146
148{
149#if defined(JPH_USE_SSE)
150 return _mm_min_ps(inV1.mValue, inV2.mValue);
151#elif defined(JPH_USE_NEON)
152 return vminq_f32(inV1.mValue, inV2.mValue);
153#else
154 return Vec3(min(inV1.mF32[0], inV2.mF32[0]),
155 min(inV1.mF32[1], inV2.mF32[1]),
156 min(inV1.mF32[2], inV2.mF32[2]));
157#endif
158}
159
161{
162#if defined(JPH_USE_SSE)
163 return _mm_max_ps(inV1.mValue, inV2.mValue);
164#elif defined(JPH_USE_NEON)
165 return vmaxq_f32(inV1.mValue, inV2.mValue);
166#else
167 return Vec3(max(inV1.mF32[0], inV2.mF32[0]),
168 max(inV1.mF32[1], inV2.mF32[1]),
169 max(inV1.mF32[2], inV2.mF32[2]));
170#endif
171}
172
174{
175 return sMax(sMin(inV, inMax), inMin);
176}
177
179{
180#if defined(JPH_USE_SSE)
181 return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
182#elif defined(JPH_USE_NEON)
183 return vceqq_f32(inV1.mValue, inV2.mValue);
184#else
185 uint32 z = inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0;
186 return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
187 inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
188 z,
189 z);
190#endif
191}
192
194{
195#if defined(JPH_USE_SSE)
196 return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
197#elif defined(JPH_USE_NEON)
198 return vcltq_f32(inV1.mValue, inV2.mValue);
199#else
200 uint32 z = inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0;
201 return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
202 inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
203 z,
204 z);
205#endif
206}
207
209{
210#if defined(JPH_USE_SSE)
211 return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
212#elif defined(JPH_USE_NEON)
213 return vcleq_f32(inV1.mValue, inV2.mValue);
214#else
215 uint32 z = inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0;
216 return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
217 inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
218 z,
219 z);
220#endif
221}
222
224{
225#if defined(JPH_USE_SSE)
226 return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
227#elif defined(JPH_USE_NEON)
228 return vcgtq_f32(inV1.mValue, inV2.mValue);
229#else
230 uint32 z = inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0;
231 return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
232 inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
233 z,
234 z);
235#endif
236}
237
239{
240#if defined(JPH_USE_SSE)
241 return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
242#elif defined(JPH_USE_NEON)
243 return vcgeq_f32(inV1.mValue, inV2.mValue);
244#else
245 uint32 z = inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0;
246 return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
247 inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
248 z,
249 z);
250#endif
251}
252
254{
255#if defined(JPH_USE_SSE)
256 #ifdef JPH_USE_FMADD
257 return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
258 #else
259 return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
260 #endif
261#elif defined(JPH_USE_NEON)
262 return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
263#else
264 return Vec3(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
265 inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
266 inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2]);
267#endif
268}
269
270Vec3 Vec3::sSelect(Vec3Arg inNotSet, Vec3Arg inSet, UVec4Arg inControl)
271{
272#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
273 Type v = _mm_blendv_ps(inNotSet.mValue, inSet.mValue, _mm_castsi128_ps(inControl.mValue));
274 return sFixW(v);
275#elif defined(JPH_USE_SSE)
276 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.mValue, 31));
277 Type v = _mm_or_ps(_mm_and_ps(is_set, inSet.mValue), _mm_andnot_ps(is_set, inNotSet.mValue));
278 return sFixW(v);
279#elif defined(JPH_USE_NEON)
280 Type v = vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inSet.mValue, inNotSet.mValue);
281 return sFixW(v);
282#else
283 Vec3 result;
284 for (int i = 0; i < 3; i++)
285 result.mF32[i] = (inControl.mU32[i] & 0x80000000u) ? inSet.mF32[i] : inNotSet.mF32[i];
286#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
287 result.mF32[3] = result.mF32[2];
288#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
289 return result;
290#endif
291}
292
294{
295#if defined(JPH_USE_SSE)
296 return _mm_or_ps(inV1.mValue, inV2.mValue);
297#elif defined(JPH_USE_NEON)
298 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
299#else
301#endif
302}
303
305{
306#if defined(JPH_USE_SSE)
307 return _mm_xor_ps(inV1.mValue, inV2.mValue);
308#elif defined(JPH_USE_NEON)
309 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
310#else
312#endif
313}
314
316{
317#if defined(JPH_USE_SSE)
318 return _mm_and_ps(inV1.mValue, inV2.mValue);
319#elif defined(JPH_USE_NEON)
320 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
321#else
323#endif
324}
325
326Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
327{
328 Vec4 s, c;
329 Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
330 return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
331}
332
333template <class Random>
334Vec3 Vec3::sRandom(Random &inRandom)
335{
336 std::uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
337 float theta = JPH_PI * zero_to_one(inRandom);
338 float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
339 return sUnitSpherical(theta, phi);
340}
341
343{
344 return sEquals(*this, inV2).TestAllXYZTrue();
345}
346
347bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
348{
349 return (inV2 - *this).LengthSq() <= inMaxDistSq;
350}
351
352bool Vec3::IsNearZero(float inMaxDistSq) const
353{
354 return LengthSq() <= inMaxDistSq;
355}
356
358{
359#if defined(JPH_USE_SSE)
360 return _mm_mul_ps(mValue, inV2.mValue);
361#elif defined(JPH_USE_NEON)
362 return vmulq_f32(mValue, inV2.mValue);
363#else
364 return Vec3(mF32[0] * inV2.mF32[0], mF32[1] * inV2.mF32[1], mF32[2] * inV2.mF32[2]);
365#endif
366}
367
368Vec3 Vec3::operator * (float inV2) const
369{
370#if defined(JPH_USE_SSE)
371 return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
372#elif defined(JPH_USE_NEON)
373 return vmulq_n_f32(mValue, inV2);
374#else
375 return Vec3(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2);
376#endif
377}
378
379Vec3 operator * (float inV1, Vec3Arg inV2)
380{
381#if defined(JPH_USE_SSE)
382 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
383#elif defined(JPH_USE_NEON)
384 return vmulq_n_f32(inV2.mValue, inV1);
385#else
386 return Vec3(inV1 * inV2.mF32[0], inV1 * inV2.mF32[1], inV1 * inV2.mF32[2]);
387#endif
388}
389
390Vec3 Vec3::operator / (float inV2) const
391{
392#if defined(JPH_USE_SSE)
393 return _mm_div_ps(mValue, _mm_set1_ps(inV2));
394#elif defined(JPH_USE_NEON)
395 return vdivq_f32(mValue, vdupq_n_f32(inV2));
396#else
397 return Vec3(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2);
398#endif
399}
400
402{
403#if defined(JPH_USE_SSE)
404 mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
405#elif defined(JPH_USE_NEON)
406 mValue = vmulq_n_f32(mValue, inV2);
407#else
408 for (int i = 0; i < 3; ++i)
409 mF32[i] *= inV2;
410 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
411 mF32[3] = mF32[2];
412 #endif
413#endif
414 return *this;
415}
416
418{
419#if defined(JPH_USE_SSE)
420 mValue = _mm_mul_ps(mValue, inV2.mValue);
421#elif defined(JPH_USE_NEON)
422 mValue = vmulq_f32(mValue, inV2.mValue);
423#else
424 for (int i = 0; i < 3; ++i)
425 mF32[i] *= inV2.mF32[i];
426 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
427 mF32[3] = mF32[2];
428 #endif
429#endif
430 return *this;
431}
432
434{
435#if defined(JPH_USE_SSE)
436 mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
437#elif defined(JPH_USE_NEON)
438 mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
439#else
440 for (int i = 0; i < 3; ++i)
441 mF32[i] /= inV2;
442 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
443 mF32[3] = mF32[2];
444 #endif
445#endif
446 return *this;
447}
448
450{
451#if defined(JPH_USE_SSE)
452 return _mm_add_ps(mValue, inV2.mValue);
453#elif defined(JPH_USE_NEON)
454 return vaddq_f32(mValue, inV2.mValue);
455#else
456 return Vec3(mF32[0] + inV2.mF32[0], mF32[1] + inV2.mF32[1], mF32[2] + inV2.mF32[2]);
457#endif
458}
459
461{
462#if defined(JPH_USE_SSE)
463 mValue = _mm_add_ps(mValue, inV2.mValue);
464#elif defined(JPH_USE_NEON)
465 mValue = vaddq_f32(mValue, inV2.mValue);
466#else
467 for (int i = 0; i < 3; ++i)
468 mF32[i] += inV2.mF32[i];
469 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
470 mF32[3] = mF32[2];
471 #endif
472#endif
473 return *this;
474}
475
477{
478#if defined(JPH_USE_SSE)
479 return _mm_sub_ps(_mm_setzero_ps(), mValue);
480#elif defined(JPH_USE_NEON)
481 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
482 return vsubq_f32(vdupq_n_f32(0), mValue);
483 #else
484 return vnegq_f32(mValue);
485 #endif
486#else
487 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
488 return Vec3(0.0f - mF32[0], 0.0f - mF32[1], 0.0f - mF32[2]);
489 #else
490 return Vec3(-mF32[0], -mF32[1], -mF32[2]);
491 #endif
492#endif
493}
494
496{
497#if defined(JPH_USE_SSE)
498 return _mm_sub_ps(mValue, inV2.mValue);
499#elif defined(JPH_USE_NEON)
500 return vsubq_f32(mValue, inV2.mValue);
501#else
502 return Vec3(mF32[0] - inV2.mF32[0], mF32[1] - inV2.mF32[1], mF32[2] - inV2.mF32[2]);
503#endif
504}
505
507{
508#if defined(JPH_USE_SSE)
509 mValue = _mm_sub_ps(mValue, inV2.mValue);
510#elif defined(JPH_USE_NEON)
511 mValue = vsubq_f32(mValue, inV2.mValue);
512#else
513 for (int i = 0; i < 3; ++i)
514 mF32[i] -= inV2.mF32[i];
515 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
516 mF32[3] = mF32[2];
517 #endif
518#endif
519 return *this;
520}
521
523{
524 inV2.CheckW(); // Check W equals Z to avoid div by zero
525#if defined(JPH_USE_SSE)
526 return _mm_div_ps(mValue, inV2.mValue);
527#elif defined(JPH_USE_NEON)
528 return vdivq_f32(mValue, inV2.mValue);
529#else
530 return Vec3(mF32[0] / inV2.mF32[0], mF32[1] / inV2.mF32[1], mF32[2] / inV2.mF32[2]);
531#endif
532}
533
535{
536#if defined(JPH_USE_SSE)
537 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
538#elif defined(JPH_USE_NEON)
539 return vdupq_laneq_f32(mValue, 0);
540#else
541 return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
542#endif
543}
544
546{
547#if defined(JPH_USE_SSE)
548 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
549#elif defined(JPH_USE_NEON)
550 return vdupq_laneq_f32(mValue, 1);
551#else
552 return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
553#endif
554}
555
557{
558#if defined(JPH_USE_SSE)
559 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
560#elif defined(JPH_USE_NEON)
561 return vdupq_laneq_f32(mValue, 2);
562#else
563 return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
564#endif
565}
566
568{
569 return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
570}
571
573{
574 return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
575}
576
578{
579#if defined(JPH_USE_AVX512)
580 return _mm_range_ps(mValue, mValue, 0b1000);
581#elif defined(JPH_USE_SSE)
582 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
583#elif defined(JPH_USE_NEON)
584 return vabsq_f32(mValue);
585#else
586 return Vec3(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]));
587#endif
588}
589
591{
592 return sOne() / mValue;
593}
594
596{
597#if defined(JPH_USE_SSE)
598 Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
599 t1 = _mm_mul_ps(t1, mValue);
600 Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
601 t2 = _mm_mul_ps(t2, inV2.mValue);
602 Type t3 = _mm_sub_ps(t1, t2);
603 return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
604#elif defined(JPH_USE_NEON)
605 Type t1 = JPH_NEON_SHUFFLE_F32x4(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
606 t1 = vmulq_f32(t1, mValue);
607 Type t2 = JPH_NEON_SHUFFLE_F32x4(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
608 t2 = vmulq_f32(t2, inV2.mValue);
609 Type t3 = vsubq_f32(t1, t2);
610 return JPH_NEON_SHUFFLE_F32x4(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
611#else
612 return Vec3(mF32[1] * inV2.mF32[2] - mF32[2] * inV2.mF32[1],
613 mF32[2] * inV2.mF32[0] - mF32[0] * inV2.mF32[2],
614 mF32[0] * inV2.mF32[1] - mF32[1] * inV2.mF32[0]);
615#endif
616}
617
619{
620#if defined(JPH_USE_SSE4_1)
621 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
622#elif defined(JPH_USE_NEON)
623 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
624 mul = vsetq_lane_f32(0, mul, 3);
625 return vdupq_n_f32(vaddvq_f32(mul));
626#else
627 float dot = 0.0f;
628 for (int i = 0; i < 3; i++)
629 dot += mF32[i] * inV2.mF32[i];
630 return Vec3::sReplicate(dot);
631#endif
632}
633
635{
636#if defined(JPH_USE_SSE4_1)
637 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
638#elif defined(JPH_USE_NEON)
639 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
640 mul = vsetq_lane_f32(0, mul, 3);
641 return vdupq_n_f32(vaddvq_f32(mul));
642#else
643 float dot = 0.0f;
644 for (int i = 0; i < 3; i++)
645 dot += mF32[i] * inV2.mF32[i];
646 return Vec4::sReplicate(dot);
647#endif
648}
649
650float Vec3::Dot(Vec3Arg inV2) const
651{
652#if defined(JPH_USE_SSE4_1)
653 return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0x7f));
654#elif defined(JPH_USE_NEON)
655 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
656 mul = vsetq_lane_f32(0, mul, 3);
657 return vaddvq_f32(mul);
658#else
659 float dot = 0.0f;
660 for (int i = 0; i < 3; i++)
661 dot += mF32[i] * inV2.mF32[i];
662 return dot;
663#endif
664}
665
666float Vec3::LengthSq() const
667{
668#if defined(JPH_USE_SSE4_1)
669 return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0x7f));
670#elif defined(JPH_USE_NEON)
671 float32x4_t mul = vmulq_f32(mValue, mValue);
672 mul = vsetq_lane_f32(0, mul, 3);
673 return vaddvq_f32(mul);
674#else
675 float len_sq = 0.0f;
676 for (int i = 0; i < 3; i++)
677 len_sq += mF32[i] * mF32[i];
678 return len_sq;
679#endif
680}
681
682float Vec3::Length() const
683{
684#if defined(JPH_USE_SSE4_1)
685 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0x7f)));
686#elif defined(JPH_USE_NEON)
687 float32x4_t mul = vmulq_f32(mValue, mValue);
688 mul = vsetq_lane_f32(0, mul, 3);
689 float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
690 return vget_lane_f32(vsqrt_f32(sum), 0);
691#else
692 return sqrt(LengthSq());
693#endif
694}
695
697{
698#if defined(JPH_USE_SSE)
699 return _mm_sqrt_ps(mValue);
700#elif defined(JPH_USE_NEON)
701 return vsqrtq_f32(mValue);
702#else
703 return Vec3(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]));
704#endif
705}
706
708{
709#if defined(JPH_USE_SSE4_1)
710 return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0x7f)));
711#elif defined(JPH_USE_NEON)
712 float32x4_t mul = vmulq_f32(mValue, mValue);
713 mul = vsetq_lane_f32(0, mul, 3);
714 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
715 return vdivq_f32(mValue, vsqrtq_f32(sum));
716#else
717 return *this / Length();
718#endif
719}
720
722{
723#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
724 Type len_sq = _mm_dp_ps(mValue, mValue, 0x7f);
725 // clang with '-ffast-math' (which you should not use!) can generate _mm_rsqrt_ps
726 // instructions which produce INFs/NaNs when they get a denormal float as input.
727 // We therefore treat denormals as zero here.
728 Type is_zero = _mm_cmple_ps(len_sq, _mm_set1_ps(FLT_MIN));
729#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
730 if (_mm_movemask_ps(is_zero) == 0xf)
731 return inZeroValue;
732 else
733 return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
734#else
735 return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
736#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
737#elif defined(JPH_USE_NEON)
738 float32x4_t mul = vmulq_f32(mValue, mValue);
739 mul = vsetq_lane_f32(0, mul, 3);
740 float32x4_t len_sq = vdupq_n_f32(vaddvq_f32(mul));
741 uint32x4_t is_zero = vcleq_f32(len_sq, vdupq_n_f32(FLT_MIN));
742 return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, vsqrtq_f32(len_sq)));
743#else
744 float len_sq = LengthSq();
745 if (len_sq <= FLT_MIN)
746 return inZeroValue;
747 else
748 return *this / sqrt(len_sq);
749#endif
750}
751
752bool Vec3::IsNormalized(float inTolerance) const
753{
754 return abs(LengthSq() - 1.0f) <= inTolerance;
755}
756
757bool Vec3::IsNaN() const
758{
759#if defined(JPH_USE_AVX512)
760 return (_mm_fpclass_ps_mask(mValue, 0b10000001) & 0x7) != 0;
761#elif defined(JPH_USE_SSE)
762 return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
763#elif defined(JPH_USE_NEON)
764 uint32x4_t mask = JPH_NEON_UINT32x4(1, 1, 1, 0);
765 uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
766 return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
767#else
768 return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]);
769#endif
770}
771
772void Vec3::StoreFloat3(Float3 *outV) const
773{
774#if defined(JPH_USE_SSE)
775 _mm_store_ss(&outV->x, mValue);
776 Vec3 t = Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED>();
777 _mm_store_ss(&outV->y, t.mValue);
779 _mm_store_ss(&outV->z, t.mValue);
780#elif defined(JPH_USE_NEON)
781 float32x2_t xy = vget_low_f32(mValue);
782 vst1_f32(&outV->x, xy);
783 vst1q_lane_f32(&outV->z, mValue, 2);
784#else
785 outV->x = mF32[0];
786 outV->y = mF32[1];
787 outV->z = mF32[2];
788#endif
789}
790
792{
793#if defined(JPH_USE_SSE)
794 return _mm_cvttps_epi32(mValue);
795#elif defined(JPH_USE_NEON)
796 return vcvtq_u32_f32(mValue);
797#else
798 return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
799#endif
800}
801
803{
804#if defined(JPH_USE_SSE)
805 return UVec4(_mm_castps_si128(mValue));
806#elif defined(JPH_USE_NEON)
807 return vreinterpretq_u32_f32(mValue);
808#else
809 return *reinterpret_cast<const UVec4 *>(this);
810#endif
811}
812
813float Vec3::ReduceMin() const
814{
815 Vec3 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
817 return v.GetX();
818}
819
820float Vec3::ReduceMax() const
821{
822 Vec3 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
824 return v.GetX();
825}
826
828{
829 if (abs(mF32[0]) > abs(mF32[1]))
830 {
831 float len = sqrt(mF32[0] * mF32[0] + mF32[2] * mF32[2]);
832 return Vec3(mF32[2], 0.0f, -mF32[0]) / len;
833 }
834 else
835 {
836 float len = sqrt(mF32[1] * mF32[1] + mF32[2] * mF32[2]);
837 return Vec3(0.0f, mF32[2], -mF32[1]) / len;
838 }
839}
840
842{
843#if defined(JPH_USE_AVX512)
844 return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
845#elif defined(JPH_USE_SSE)
846 Type minus_one = _mm_set1_ps(-1.0f);
847 Type one = _mm_set1_ps(1.0f);
848 return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
849#elif defined(JPH_USE_NEON)
850 Type minus_one = vdupq_n_f32(-1.0f);
851 Type one = vdupq_n_f32(1.0f);
852 return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(mValue), vreinterpretq_u32_f32(minus_one)), vreinterpretq_u32_f32(one)));
853#else
854 return Vec3(std::signbit(mF32[0])? -1.0f : 1.0f,
855 std::signbit(mF32[1])? -1.0f : 1.0f,
856 std::signbit(mF32[2])? -1.0f : 1.0f);
857#endif
858}
859
#define JPH_SUPPRESS_WARNINGS_STD_BEGIN
Definition Core.h:419
#define JPH_SUPPRESS_WARNINGS_STD_END
Definition Core.h:431
std::uint64_t uint64
Definition Core.h:485
#define JPH_NAMESPACE_END
Definition Core.h:414
std::uint32_t uint32
Definition Core.h:484
#define JPH_NAMESPACE_BEGIN
Definition Core.h:408
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:223
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec3 operator*(float inV1, Vec3Arg inV2)
Definition Vec3.inl:379
Class that holds 3 floats. Used as a storage class. Convert to Vec3 for calculations.
Definition Float3.h:13
float y
Definition Float3.h:39
float z
Definition Float3.h:40
float x
Definition Float3.h:38
Definition UVec4.h:12
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:202
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:174
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:413
Type mValue
Definition UVec4.h:211
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:188
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:340
uint32 mU32[4]
Definition UVec4.h:212
Definition Vec3.h:17
JPH_INLINE bool IsClose(Vec3Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec3.inl:347
static JPH_INLINE Vec3 sMax(Vec3Arg inV1, Vec3Arg inV2)
Return the maximum of each of the components.
Definition Vec3.inl:160
JPH_INLINE float Dot(Vec3Arg inV2) const
Dot product.
Definition Vec3.inl:650
JPH_INLINE Vec3 Normalized() const
Normalize vector.
Definition Vec3.inl:707
static JPH_INLINE Type sFixW(Type inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Vec4::Type Type
Definition Vec3.h:27
JPH_INLINE bool operator==(Vec3Arg inV2) const
Comparison.
Definition Vec3.inl:342
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec3.inl:534
static JPH_INLINE Vec3 sMin(Vec3Arg inV1, Vec3Arg inV2)
Return the minimum value of each of the components.
Definition Vec3.inl:147
JPH_INLINE Vec3 Cross(Vec3Arg inV2) const
Cross product.
Definition Vec3.inl:595
JPH_INLINE Vec3 GetNormalizedPerpendicular() const
Get normalized vector that is perpendicular to this vector.
Definition Vec3.inl:827
static Vec3 sRandom(Random &inRandom)
Get random unit vector.
Definition Vec3.inl:334
JPH_INLINE float GetX() const
Get individual components.
Definition Vec3.h:127
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec3.inl:752
static JPH_INLINE Vec3 sXor(Vec3Arg inV1, Vec3Arg inV2)
Logical xor (component wise)
Definition Vec3.inl:304
JPH_INLINE float Length() const
Length of vector.
Definition Vec3.inl:682
static JPH_INLINE UVec4 sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Greater than or equal (component wise)
Definition Vec3.inl:238
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y and Z.
Definition Vec3.inl:813
JPH_INLINE Vec3 & operator-=(Vec3Arg inV2)
Subtract two float vectors (component wise)
Definition Vec3.inl:506
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y and Z.
Definition Vec3.inl:820
static JPH_INLINE UVec4 sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Less than or equal (component wise)
Definition Vec3.inl:208
JPH_INLINE Vec3 operator/(float inV2) const
Divide vector by float.
Definition Vec3.inl:390
friend JPH_INLINE Vec3 operator*(float inV1, Vec3Arg inV2)
Multiply vector with float.
Definition Vec3.inl:379
JPH_INLINE int GetLowestComponentIndex() const
Get index of component with lowest value.
Definition Vec3.inl:567
JPH_INLINE Vec3 & operator/=(float inV2)
Divide vector by float.
Definition Vec3.inl:433
JPH_INLINE Vec4 DotV4(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition Vec3.inl:634
JPH_INLINE Vec3 Abs() const
Return the absolute value of each of the components.
Definition Vec3.inl:577
static JPH_INLINE Vec3 sOne()
Vector with all ones.
Definition Vec3.inl:125
JPH_INLINE Vec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec3.inl:590
JPH_INLINE Vec3 NormalizedOr(Vec3Arg inZeroValue) const
Normalize vector or return inZeroValue if the length of the vector is zero.
Definition Vec3.inl:721
JPH_INLINE Vec3 operator+(Vec3Arg inV2) const
Add two float vectors (component wise)
Definition Vec3.inl:449
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec3.inl:556
static JPH_INLINE Vec3 sOr(Vec3Arg inV1, Vec3Arg inV2)
Logical or (component wise)
Definition Vec3.inl:293
static JPH_INLINE UVec4 sGreater(Vec3Arg inV1, Vec3Arg inV2)
Greater than (component wise)
Definition Vec3.inl:223
static JPH_INLINE Vec3 sAnd(Vec3Arg inV1, Vec3Arg inV2)
Logical and (component wise)
Definition Vec3.inl:315
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
static JPH_INLINE Vec3 sUnitSpherical(float inTheta, float inPhi)
Definition Vec3.inl:326
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec3.inl:791
Type mValue
Definition Vec3.h:289
JPH_INLINE float GetY() const
Definition Vec3.h:128
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec3.inl:545
JPH_INLINE Vec3 operator-() const
Negate.
Definition Vec3.inl:476
JPH_INLINE void StoreFloat3(Float3 *outV) const
Store 3 floats to memory.
Definition Vec3.inl:772
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec3.inl:666
float mF32[4]
Definition Vec3.h:290
static JPH_INLINE UVec4 sEquals(Vec3Arg inV1, Vec3Arg inV2)
Equals (component wise)
Definition Vec3.inl:178
JPH_INLINE bool IsNearZero(float inMaxDistSq=1.0e-12f) const
Test if vector is near zero.
Definition Vec3.inl:352
static JPH_INLINE Vec3 sZero()
Vector with all zeros.
Definition Vec3.inl:103
static JPH_INLINE UVec4 sLess(Vec3Arg inV1, Vec3Arg inV2)
Less than (component wise)
Definition Vec3.inl:193
static JPH_INLINE Vec3 sReplicate(float inV)
Replicate inV across all components.
Definition Vec3.inl:114
static JPH_INLINE Vec3 sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition Vec3.inl:173
JPH_INLINE Vec3 & operator*=(float inV2)
Multiply vector with float.
Definition Vec3.inl:401
JPH_INLINE Vec3 & operator+=(Vec3Arg inV2)
Add two float vectors (component wise)
Definition Vec3.inl:460
static JPH_INLINE Vec3 sSelect(Vec3Arg inNotSet, Vec3Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition Vec3.inl:270
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec3.inl:757
JPH_INLINE Vec3 Sqrt() const
Component wise square root.
Definition Vec3.inl:696
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec3 as a UVec4 (doesn't change the bits)
Definition Vec3.inl:802
JPH_INLINE Vec3 DotV(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y and Z components.
Definition Vec3.inl:618
static JPH_INLINE Vec3 sLoadFloat3Unsafe(const Float3 &inV)
Load 3 floats from memory (reads 32 bits extra which it doesn't use)
Definition Vec3.inl:135
JPH_INLINE float GetZ() const
Definition Vec3.h:129
JPH_INLINE Vec3 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec3.inl:841
static JPH_INLINE Vec3 sNaN()
Vector with all NaN's.
Definition Vec3.inl:130
Vec3()=default
Constructor.
JPH_INLINE int GetHighestComponentIndex() const
Get index of component with highest value.
Definition Vec3.inl:572
static JPH_INLINE Vec3 sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec3.inl:253
JPH_INLINE Vec3 Swizzle() const
Swizzle the elements in inV.
Definition Vec4.h:14
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:116
JPH_INLINE float GetY() const
Definition Vec4.h:117
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:74
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:783