Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Vec3.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#include <Jolt/Math/Vec4.h>
6#include <Jolt/Math/UVec4.h>
8
10#include <random>
12
13// Create a std::hash/JPH::Hash for Vec3
14JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
15
17
18void Vec3::CheckW() const
19{
20#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
21 // Avoid asserts when both components are NaN
22 JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
23#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
24}
25
26JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
27{
28#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
29 #if defined(JPH_USE_SSE)
30 return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
31 #elif defined(JPH_USE_NEON)
32 return JPH_NEON_SHUFFLE_F32x4(inValue, inValue, 0, 1, 2, 2);
33 #else
34 Type value;
35 value.mData[0] = inValue.mData[0];
36 value.mData[1] = inValue.mData[1];
37 value.mData[2] = inValue.mData[2];
38 value.mData[3] = inValue.mData[2];
39 return value;
40 #endif
41#else
42 return inValue;
43#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
44}
45
47 mValue(sFixW(inRHS.mValue))
48{
49}
50
51Vec3::Vec3(const Float3 &inV)
52{
53#if defined(JPH_USE_SSE)
54 Type x = _mm_load_ss(&inV.x);
55 Type y = _mm_load_ss(&inV.y);
56 Type z = _mm_load_ss(&inV.z);
57 Type xy = _mm_unpacklo_ps(x, y);
58 mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
59#elif defined(JPH_USE_NEON)
60 float32x2_t xy = vld1_f32(&inV.x);
61 float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
62 mValue = vcombine_f32(xy, zz);
63#else
64 mF32[0] = inV[0];
65 mF32[1] = inV[1];
66 mF32[2] = inV[2];
67 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
68 mF32[3] = inV[2];
69 #endif
70#endif
71}
72
73Vec3::Vec3(float inX, float inY, float inZ)
74{
75#if defined(JPH_USE_SSE)
76 mValue = _mm_set_ps(inZ, inZ, inY, inX);
77#elif defined(JPH_USE_NEON)
78 uint32x2_t xy = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inX)) | (static_cast<uint64>(BitCast<uint32>(inY)) << 32));
79 uint32x2_t zz = vreinterpret_u32_f32(vdup_n_f32(inZ));
80 mValue = vreinterpretq_f32_u32(vcombine_u32(xy, zz));
81#else
82 mF32[0] = inX;
83 mF32[1] = inY;
84 mF32[2] = inZ;
85 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
86 mF32[3] = inZ;
87 #endif
88#endif
89}
90
91template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
93{
94 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
95 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
96 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
97
98#if defined(JPH_USE_SSE)
99 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
100#elif defined(JPH_USE_NEON)
101 return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
102#else
103 return Vec3(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ]);
104#endif
105}
106
108{
109#if defined(JPH_USE_SSE)
110 return _mm_setzero_ps();
111#elif defined(JPH_USE_NEON)
112 return vdupq_n_f32(0);
113#else
114 return Vec3(0, 0, 0);
115#endif
116}
117
119{
120#if defined(JPH_USE_SSE)
121 return _mm_set1_ps(inV);
122#elif defined(JPH_USE_NEON)
123 return vdupq_n_f32(inV);
124#else
125 return Vec3(inV, inV, inV);
126#endif
127}
128
130{
131 return sReplicate(numeric_limits<float>::quiet_NaN());
132}
133
135{
136#if defined(JPH_USE_SSE)
137 Type v = _mm_loadu_ps(&inV.x);
138#elif defined(JPH_USE_NEON)
139 Type v = vld1q_f32(&inV.x);
140#else
141 Type v = { inV.x, inV.y, inV.z };
142#endif
143 return sFixW(v);
144}
145
147{
148#if defined(JPH_USE_SSE)
149 return _mm_min_ps(inV1.mValue, inV2.mValue);
150#elif defined(JPH_USE_NEON)
151 return vminq_f32(inV1.mValue, inV2.mValue);
152#else
153 return Vec3(min(inV1.mF32[0], inV2.mF32[0]),
154 min(inV1.mF32[1], inV2.mF32[1]),
155 min(inV1.mF32[2], inV2.mF32[2]));
156#endif
157}
158
160{
161#if defined(JPH_USE_SSE)
162 return _mm_max_ps(inV1.mValue, inV2.mValue);
163#elif defined(JPH_USE_NEON)
164 return vmaxq_f32(inV1.mValue, inV2.mValue);
165#else
166 return Vec3(max(inV1.mF32[0], inV2.mF32[0]),
167 max(inV1.mF32[1], inV2.mF32[1]),
168 max(inV1.mF32[2], inV2.mF32[2]));
169#endif
170}
171
173{
174 return sMax(sMin(inV, inMax), inMin);
175}
176
178{
179#if defined(JPH_USE_SSE)
180 return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
181#elif defined(JPH_USE_NEON)
182 return vceqq_f32(inV1.mValue, inV2.mValue);
183#else
184 uint32 z = inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0;
185 return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
186 inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
187 z,
188 z);
189#endif
190}
191
193{
194#if defined(JPH_USE_SSE)
195 return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
196#elif defined(JPH_USE_NEON)
197 return vcltq_f32(inV1.mValue, inV2.mValue);
198#else
199 uint32 z = inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0;
200 return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
201 inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
202 z,
203 z);
204#endif
205}
206
208{
209#if defined(JPH_USE_SSE)
210 return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
211#elif defined(JPH_USE_NEON)
212 return vcleq_f32(inV1.mValue, inV2.mValue);
213#else
214 uint32 z = inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0;
215 return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
216 inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
217 z,
218 z);
219#endif
220}
221
223{
224#if defined(JPH_USE_SSE)
225 return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
226#elif defined(JPH_USE_NEON)
227 return vcgtq_f32(inV1.mValue, inV2.mValue);
228#else
229 uint32 z = inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0;
230 return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
231 inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
232 z,
233 z);
234#endif
235}
236
238{
239#if defined(JPH_USE_SSE)
240 return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
241#elif defined(JPH_USE_NEON)
242 return vcgeq_f32(inV1.mValue, inV2.mValue);
243#else
244 uint32 z = inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0;
245 return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
246 inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
247 z,
248 z);
249#endif
250}
251
253{
254#if defined(JPH_USE_SSE)
255 #ifdef JPH_USE_FMADD
256 return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
257 #else
258 return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
259 #endif
260#elif defined(JPH_USE_NEON)
261 return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
262#else
263 return Vec3(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
264 inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
265 inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2]);
266#endif
267}
268
269Vec3 Vec3::sSelect(Vec3Arg inNotSet, Vec3Arg inSet, UVec4Arg inControl)
270{
271#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
272 Type v = _mm_blendv_ps(inNotSet.mValue, inSet.mValue, _mm_castsi128_ps(inControl.mValue));
273 return sFixW(v);
274#elif defined(JPH_USE_SSE)
275 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.mValue, 31));
276 Type v = _mm_or_ps(_mm_and_ps(is_set, inSet.mValue), _mm_andnot_ps(is_set, inNotSet.mValue));
277 return sFixW(v);
278#elif defined(JPH_USE_NEON)
279 Type v = vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inSet.mValue, inNotSet.mValue);
280 return sFixW(v);
281#else
282 Vec3 result;
283 for (int i = 0; i < 3; i++)
284 result.mF32[i] = (inControl.mU32[i] & 0x80000000u) ? inSet.mF32[i] : inNotSet.mF32[i];
285#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
286 result.mF32[3] = result.mF32[2];
287#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
288 return result;
289#endif
290}
291
293{
294#if defined(JPH_USE_SSE)
295 return _mm_or_ps(inV1.mValue, inV2.mValue);
296#elif defined(JPH_USE_NEON)
297 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
298#else
300#endif
301}
302
304{
305#if defined(JPH_USE_SSE)
306 return _mm_xor_ps(inV1.mValue, inV2.mValue);
307#elif defined(JPH_USE_NEON)
308 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
309#else
311#endif
312}
313
315{
316#if defined(JPH_USE_SSE)
317 return _mm_and_ps(inV1.mValue, inV2.mValue);
318#elif defined(JPH_USE_NEON)
319 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
320#else
322#endif
323}
324
325Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
326{
327 Vec4 s, c;
328 Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
329 return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
330}
331
332template <class Random>
333Vec3 Vec3::sRandom(Random &inRandom)
334{
335 std::uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
336 float theta = JPH_PI * zero_to_one(inRandom);
337 float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
338 return sUnitSpherical(theta, phi);
339}
340
342{
343 return sEquals(*this, inV2).TestAllXYZTrue();
344}
345
346bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
347{
348 return (inV2 - *this).LengthSq() <= inMaxDistSq;
349}
350
351bool Vec3::IsNearZero(float inMaxDistSq) const
352{
353 return LengthSq() <= inMaxDistSq;
354}
355
357{
358#if defined(JPH_USE_SSE)
359 return _mm_mul_ps(mValue, inV2.mValue);
360#elif defined(JPH_USE_NEON)
361 return vmulq_f32(mValue, inV2.mValue);
362#else
363 return Vec3(mF32[0] * inV2.mF32[0], mF32[1] * inV2.mF32[1], mF32[2] * inV2.mF32[2]);
364#endif
365}
366
367Vec3 Vec3::operator * (float inV2) const
368{
369#if defined(JPH_USE_SSE)
370 return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
371#elif defined(JPH_USE_NEON)
372 return vmulq_n_f32(mValue, inV2);
373#else
374 return Vec3(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2);
375#endif
376}
377
378Vec3 operator * (float inV1, Vec3Arg inV2)
379{
380#if defined(JPH_USE_SSE)
381 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
382#elif defined(JPH_USE_NEON)
383 return vmulq_n_f32(inV2.mValue, inV1);
384#else
385 return Vec3(inV1 * inV2.mF32[0], inV1 * inV2.mF32[1], inV1 * inV2.mF32[2]);
386#endif
387}
388
389Vec3 Vec3::operator / (float inV2) const
390{
391#if defined(JPH_USE_SSE)
392 return _mm_div_ps(mValue, _mm_set1_ps(inV2));
393#elif defined(JPH_USE_NEON)
394 return vdivq_f32(mValue, vdupq_n_f32(inV2));
395#else
396 return Vec3(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2);
397#endif
398}
399
401{
402#if defined(JPH_USE_SSE)
403 mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
404#elif defined(JPH_USE_NEON)
405 mValue = vmulq_n_f32(mValue, inV2);
406#else
407 for (int i = 0; i < 3; ++i)
408 mF32[i] *= inV2;
409 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
410 mF32[3] = mF32[2];
411 #endif
412#endif
413 return *this;
414}
415
417{
418#if defined(JPH_USE_SSE)
419 mValue = _mm_mul_ps(mValue, inV2.mValue);
420#elif defined(JPH_USE_NEON)
421 mValue = vmulq_f32(mValue, inV2.mValue);
422#else
423 for (int i = 0; i < 3; ++i)
424 mF32[i] *= inV2.mF32[i];
425 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
426 mF32[3] = mF32[2];
427 #endif
428#endif
429 return *this;
430}
431
433{
434#if defined(JPH_USE_SSE)
435 mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
436#elif defined(JPH_USE_NEON)
437 mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
438#else
439 for (int i = 0; i < 3; ++i)
440 mF32[i] /= inV2;
441 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
442 mF32[3] = mF32[2];
443 #endif
444#endif
445 return *this;
446}
447
449{
450#if defined(JPH_USE_SSE)
451 return _mm_add_ps(mValue, inV2.mValue);
452#elif defined(JPH_USE_NEON)
453 return vaddq_f32(mValue, inV2.mValue);
454#else
455 return Vec3(mF32[0] + inV2.mF32[0], mF32[1] + inV2.mF32[1], mF32[2] + inV2.mF32[2]);
456#endif
457}
458
460{
461#if defined(JPH_USE_SSE)
462 mValue = _mm_add_ps(mValue, inV2.mValue);
463#elif defined(JPH_USE_NEON)
464 mValue = vaddq_f32(mValue, inV2.mValue);
465#else
466 for (int i = 0; i < 3; ++i)
467 mF32[i] += inV2.mF32[i];
468 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
469 mF32[3] = mF32[2];
470 #endif
471#endif
472 return *this;
473}
474
476{
477#if defined(JPH_USE_SSE)
478 return _mm_sub_ps(_mm_setzero_ps(), mValue);
479#elif defined(JPH_USE_NEON)
480 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
481 return vsubq_f32(vdupq_n_f32(0), mValue);
482 #else
483 return vnegq_f32(mValue);
484 #endif
485#else
486 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
487 return Vec3(0.0f - mF32[0], 0.0f - mF32[1], 0.0f - mF32[2]);
488 #else
489 return Vec3(-mF32[0], -mF32[1], -mF32[2]);
490 #endif
491#endif
492}
493
495{
496#if defined(JPH_USE_SSE)
497 return _mm_sub_ps(mValue, inV2.mValue);
498#elif defined(JPH_USE_NEON)
499 return vsubq_f32(mValue, inV2.mValue);
500#else
501 return Vec3(mF32[0] - inV2.mF32[0], mF32[1] - inV2.mF32[1], mF32[2] - inV2.mF32[2]);
502#endif
503}
504
506{
507#if defined(JPH_USE_SSE)
508 mValue = _mm_sub_ps(mValue, inV2.mValue);
509#elif defined(JPH_USE_NEON)
510 mValue = vsubq_f32(mValue, inV2.mValue);
511#else
512 for (int i = 0; i < 3; ++i)
513 mF32[i] -= inV2.mF32[i];
514 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
515 mF32[3] = mF32[2];
516 #endif
517#endif
518 return *this;
519}
520
522{
523 inV2.CheckW(); // Check W equals Z to avoid div by zero
524#if defined(JPH_USE_SSE)
525 return _mm_div_ps(mValue, inV2.mValue);
526#elif defined(JPH_USE_NEON)
527 return vdivq_f32(mValue, inV2.mValue);
528#else
529 return Vec3(mF32[0] / inV2.mF32[0], mF32[1] / inV2.mF32[1], mF32[2] / inV2.mF32[2]);
530#endif
531}
532
534{
535#if defined(JPH_USE_SSE)
536 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
537#elif defined(JPH_USE_NEON)
538 return vdupq_laneq_f32(mValue, 0);
539#else
540 return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
541#endif
542}
543
545{
546#if defined(JPH_USE_SSE)
547 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
548#elif defined(JPH_USE_NEON)
549 return vdupq_laneq_f32(mValue, 1);
550#else
551 return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
552#endif
553}
554
556{
557#if defined(JPH_USE_SSE)
558 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
559#elif defined(JPH_USE_NEON)
560 return vdupq_laneq_f32(mValue, 2);
561#else
562 return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
563#endif
564}
565
567{
568 return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
569}
570
572{
573 return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
574}
575
577{
578#if defined(JPH_USE_AVX512)
579 return _mm_range_ps(mValue, mValue, 0b1000);
580#elif defined(JPH_USE_SSE)
581 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
582#elif defined(JPH_USE_NEON)
583 return vabsq_f32(mValue);
584#else
585 return Vec3(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]));
586#endif
587}
588
590{
591 return sReplicate(1.0f) / mValue;
592}
593
595{
596#if defined(JPH_USE_SSE)
597 Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
598 t1 = _mm_mul_ps(t1, mValue);
599 Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
600 t2 = _mm_mul_ps(t2, inV2.mValue);
601 Type t3 = _mm_sub_ps(t1, t2);
602 return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
603#elif defined(JPH_USE_NEON)
604 Type t1 = JPH_NEON_SHUFFLE_F32x4(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
605 t1 = vmulq_f32(t1, mValue);
606 Type t2 = JPH_NEON_SHUFFLE_F32x4(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
607 t2 = vmulq_f32(t2, inV2.mValue);
608 Type t3 = vsubq_f32(t1, t2);
609 return JPH_NEON_SHUFFLE_F32x4(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
610#else
611 return Vec3(mF32[1] * inV2.mF32[2] - mF32[2] * inV2.mF32[1],
612 mF32[2] * inV2.mF32[0] - mF32[0] * inV2.mF32[2],
613 mF32[0] * inV2.mF32[1] - mF32[1] * inV2.mF32[0]);
614#endif
615}
616
618{
619#if defined(JPH_USE_SSE4_1)
620 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
621#elif defined(JPH_USE_NEON)
622 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
623 mul = vsetq_lane_f32(0, mul, 3);
624 return vdupq_n_f32(vaddvq_f32(mul));
625#else
626 float dot = 0.0f;
627 for (int i = 0; i < 3; i++)
628 dot += mF32[i] * inV2.mF32[i];
629 return Vec3::sReplicate(dot);
630#endif
631}
632
634{
635#if defined(JPH_USE_SSE4_1)
636 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
637#elif defined(JPH_USE_NEON)
638 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
639 mul = vsetq_lane_f32(0, mul, 3);
640 return vdupq_n_f32(vaddvq_f32(mul));
641#else
642 float dot = 0.0f;
643 for (int i = 0; i < 3; i++)
644 dot += mF32[i] * inV2.mF32[i];
645 return Vec4::sReplicate(dot);
646#endif
647}
648
649float Vec3::Dot(Vec3Arg inV2) const
650{
651#if defined(JPH_USE_SSE4_1)
652 return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0x7f));
653#elif defined(JPH_USE_NEON)
654 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
655 mul = vsetq_lane_f32(0, mul, 3);
656 return vaddvq_f32(mul);
657#else
658 float dot = 0.0f;
659 for (int i = 0; i < 3; i++)
660 dot += mF32[i] * inV2.mF32[i];
661 return dot;
662#endif
663}
664
665float Vec3::LengthSq() const
666{
667#if defined(JPH_USE_SSE4_1)
668 return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0x7f));
669#elif defined(JPH_USE_NEON)
670 float32x4_t mul = vmulq_f32(mValue, mValue);
671 mul = vsetq_lane_f32(0, mul, 3);
672 return vaddvq_f32(mul);
673#else
674 float len_sq = 0.0f;
675 for (int i = 0; i < 3; i++)
676 len_sq += mF32[i] * mF32[i];
677 return len_sq;
678#endif
679}
680
681float Vec3::Length() const
682{
683#if defined(JPH_USE_SSE4_1)
684 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0x7f)));
685#elif defined(JPH_USE_NEON)
686 float32x4_t mul = vmulq_f32(mValue, mValue);
687 mul = vsetq_lane_f32(0, mul, 3);
688 float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
689 return vget_lane_f32(vsqrt_f32(sum), 0);
690#else
691 return sqrt(LengthSq());
692#endif
693}
694
696{
697#if defined(JPH_USE_SSE)
698 return _mm_sqrt_ps(mValue);
699#elif defined(JPH_USE_NEON)
700 return vsqrtq_f32(mValue);
701#else
702 return Vec3(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]));
703#endif
704}
705
707{
708#if defined(JPH_USE_SSE4_1)
709 return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0x7f)));
710#elif defined(JPH_USE_NEON)
711 float32x4_t mul = vmulq_f32(mValue, mValue);
712 mul = vsetq_lane_f32(0, mul, 3);
713 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
714 return vdivq_f32(mValue, vsqrtq_f32(sum));
715#else
716 return *this / Length();
717#endif
718}
719
721{
722#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
723 Type len_sq = _mm_dp_ps(mValue, mValue, 0x7f);
724 // clang with '-ffast-math' (which you should not use!) can generate _mm_rsqrt_ps
725 // instructions which produce INFs/NaNs when they get a denormal float as input.
726 // We therefore treat denormals as zero here.
727 Type is_zero = _mm_cmple_ps(len_sq, _mm_set1_ps(FLT_MIN));
728#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
729 if (_mm_movemask_ps(is_zero) == 0xf)
730 return inZeroValue;
731 else
732 return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
733#else
734 return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
735#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
736#elif defined(JPH_USE_NEON)
737 float32x4_t mul = vmulq_f32(mValue, mValue);
738 mul = vsetq_lane_f32(0, mul, 3);
739 float32x4_t len_sq = vdupq_n_f32(vaddvq_f32(mul));
740 uint32x4_t is_zero = vcleq_f32(len_sq, vdupq_n_f32(FLT_MIN));
741 return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, vsqrtq_f32(len_sq)));
742#else
743 float len_sq = LengthSq();
744 if (len_sq <= FLT_MIN)
745 return inZeroValue;
746 else
747 return *this / sqrt(len_sq);
748#endif
749}
750
751bool Vec3::IsNormalized(float inTolerance) const
752{
753 return abs(LengthSq() - 1.0f) <= inTolerance;
754}
755
756bool Vec3::IsNaN() const
757{
758#if defined(JPH_USE_AVX512)
759 return (_mm_fpclass_ps_mask(mValue, 0b10000001) & 0x7) != 0;
760#elif defined(JPH_USE_SSE)
761 return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
762#elif defined(JPH_USE_NEON)
763 uint32x4_t mask = JPH_NEON_UINT32x4(1, 1, 1, 0);
764 uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
765 return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
766#else
767 return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]);
768#endif
769}
770
771void Vec3::StoreFloat3(Float3 *outV) const
772{
773#if defined(JPH_USE_SSE)
774 _mm_store_ss(&outV->x, mValue);
775 Vec3 t = Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED>();
776 _mm_store_ss(&outV->y, t.mValue);
778 _mm_store_ss(&outV->z, t.mValue);
779#elif defined(JPH_USE_NEON)
780 float32x2_t xy = vget_low_f32(mValue);
781 vst1_f32(&outV->x, xy);
782 vst1q_lane_f32(&outV->z, mValue, 2);
783#else
784 outV->x = mF32[0];
785 outV->y = mF32[1];
786 outV->z = mF32[2];
787#endif
788}
789
791{
792#if defined(JPH_USE_SSE)
793 return _mm_cvttps_epi32(mValue);
794#elif defined(JPH_USE_NEON)
795 return vcvtq_u32_f32(mValue);
796#else
797 return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
798#endif
799}
800
802{
803#if defined(JPH_USE_SSE)
804 return UVec4(_mm_castps_si128(mValue));
805#elif defined(JPH_USE_NEON)
806 return vreinterpretq_u32_f32(mValue);
807#else
808 return *reinterpret_cast<const UVec4 *>(this);
809#endif
810}
811
812float Vec3::ReduceMin() const
813{
814 Vec3 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
816 return v.GetX();
817}
818
819float Vec3::ReduceMax() const
820{
821 Vec3 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
823 return v.GetX();
824}
825
827{
828 if (abs(mF32[0]) > abs(mF32[1]))
829 {
830 float len = sqrt(mF32[0] * mF32[0] + mF32[2] * mF32[2]);
831 return Vec3(mF32[2], 0.0f, -mF32[0]) / len;
832 }
833 else
834 {
835 float len = sqrt(mF32[1] * mF32[1] + mF32[2] * mF32[2]);
836 return Vec3(0.0f, mF32[2], -mF32[1]) / len;
837 }
838}
839
841{
842#if defined(JPH_USE_AVX512)
843 return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
844#elif defined(JPH_USE_SSE)
845 Type minus_one = _mm_set1_ps(-1.0f);
846 Type one = _mm_set1_ps(1.0f);
847 return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
848#elif defined(JPH_USE_NEON)
849 Type minus_one = vdupq_n_f32(-1.0f);
850 Type one = vdupq_n_f32(1.0f);
851 return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(mValue), vreinterpretq_u32_f32(minus_one)), vreinterpretq_u32_f32(one)));
852#else
853 return Vec3(std::signbit(mF32[0])? -1.0f : 1.0f,
854 std::signbit(mF32[1])? -1.0f : 1.0f,
855 std::signbit(mF32[2])? -1.0f : 1.0f);
856#endif
857}
858
#define JPH_SUPPRESS_WARNINGS_STD_BEGIN
Definition Core.h:384
#define JPH_SUPPRESS_WARNINGS_STD_END
Definition Core.h:396
std::uint64_t uint64
Definition Core.h:457
#define JPH_NAMESPACE_END
Definition Core.h:379
std::uint32_t uint32
Definition Core.h:456
#define JPH_NAMESPACE_BEGIN
Definition Core.h:373
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:191
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec3 operator*(float inV1, Vec3Arg inV2)
Definition Vec3.inl:378
Class that holds 3 floats. Used as a storage class. Convert to Vec3 for calculations.
Definition Float3.h:13
float y
Definition Float3.h:39
float z
Definition Float3.h:40
float x
Definition Float3.h:38
Definition UVec4.h:12
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:202
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:174
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:413
Type mValue
Definition UVec4.h:211
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:188
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:340
uint32 mU32[4]
Definition UVec4.h:212
Definition Vec3.h:17
JPH_INLINE bool IsClose(Vec3Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec3.inl:346
static JPH_INLINE Vec3 sMax(Vec3Arg inV1, Vec3Arg inV2)
Return the maximum of each of the components.
Definition Vec3.inl:159
JPH_INLINE float Dot(Vec3Arg inV2) const
Dot product.
Definition Vec3.inl:649
JPH_INLINE Vec3 Normalized() const
Normalize vector.
Definition Vec3.inl:706
static JPH_INLINE Type sFixW(Type inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Vec4::Type Type
Definition Vec3.h:27
JPH_INLINE bool operator==(Vec3Arg inV2) const
Comparison.
Definition Vec3.inl:341
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec3.inl:533
static JPH_INLINE Vec3 sMin(Vec3Arg inV1, Vec3Arg inV2)
Return the minimum value of each of the components.
Definition Vec3.inl:146
JPH_INLINE Vec3 Cross(Vec3Arg inV2) const
Cross product.
Definition Vec3.inl:594
JPH_INLINE Vec3 GetNormalizedPerpendicular() const
Get normalized vector that is perpendicular to this vector.
Definition Vec3.inl:826
static Vec3 sRandom(Random &inRandom)
Get random unit vector.
Definition Vec3.inl:333
JPH_INLINE float GetX() const
Get individual components.
Definition Vec3.h:124
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec3.inl:751
static JPH_INLINE Vec3 sXor(Vec3Arg inV1, Vec3Arg inV2)
Logical xor (component wise)
Definition Vec3.inl:303
JPH_INLINE float Length() const
Length of vector.
Definition Vec3.inl:681
static JPH_INLINE UVec4 sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Greater than or equal (component wise)
Definition Vec3.inl:237
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y and Z.
Definition Vec3.inl:812
JPH_INLINE Vec3 & operator-=(Vec3Arg inV2)
Subtract two float vectors (component wise)
Definition Vec3.inl:505
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y and Z.
Definition Vec3.inl:819
static JPH_INLINE UVec4 sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Less than or equal (component wise)
Definition Vec3.inl:207
JPH_INLINE Vec3 operator/(float inV2) const
Divide vector by float.
Definition Vec3.inl:389
friend JPH_INLINE Vec3 operator*(float inV1, Vec3Arg inV2)
Multiply vector with float.
Definition Vec3.inl:378
JPH_INLINE int GetLowestComponentIndex() const
Get index of component with lowest value.
Definition Vec3.inl:566
JPH_INLINE Vec3 & operator/=(float inV2)
Divide vector by float.
Definition Vec3.inl:432
JPH_INLINE Vec4 DotV4(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition Vec3.inl:633
JPH_INLINE Vec3 Abs() const
Return the absolute value of each of the components.
Definition Vec3.inl:576
JPH_INLINE Vec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec3.inl:589
JPH_INLINE Vec3 NormalizedOr(Vec3Arg inZeroValue) const
Normalize vector or return inZeroValue if the length of the vector is zero.
Definition Vec3.inl:720
JPH_INLINE Vec3 operator+(Vec3Arg inV2) const
Add two float vectors (component wise)
Definition Vec3.inl:448
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec3.inl:555
static JPH_INLINE Vec3 sOr(Vec3Arg inV1, Vec3Arg inV2)
Logical or (component wise)
Definition Vec3.inl:292
static JPH_INLINE UVec4 sGreater(Vec3Arg inV1, Vec3Arg inV2)
Greater than (component wise)
Definition Vec3.inl:222
static JPH_INLINE Vec3 sAnd(Vec3Arg inV1, Vec3Arg inV2)
Logical and (component wise)
Definition Vec3.inl:314
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
static JPH_INLINE Vec3 sUnitSpherical(float inTheta, float inPhi)
Definition Vec3.inl:325
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec3.inl:790
Type mValue
Definition Vec3.h:286
JPH_INLINE float GetY() const
Definition Vec3.h:125
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec3.inl:544
JPH_INLINE Vec3 operator-() const
Negate.
Definition Vec3.inl:475
JPH_INLINE void StoreFloat3(Float3 *outV) const
Store 3 floats to memory.
Definition Vec3.inl:771
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec3.inl:665
float mF32[4]
Definition Vec3.h:287
static JPH_INLINE UVec4 sEquals(Vec3Arg inV1, Vec3Arg inV2)
Equals (component wise)
Definition Vec3.inl:177
JPH_INLINE bool IsNearZero(float inMaxDistSq=1.0e-12f) const
Test if vector is near zero.
Definition Vec3.inl:351
static JPH_INLINE Vec3 sZero()
Vector with all zeros.
Definition Vec3.inl:107
static JPH_INLINE UVec4 sLess(Vec3Arg inV1, Vec3Arg inV2)
Less than (component wise)
Definition Vec3.inl:192
static JPH_INLINE Vec3 sReplicate(float inV)
Replicate inV across all components.
Definition Vec3.inl:118
static JPH_INLINE Vec3 sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition Vec3.inl:172
JPH_INLINE Vec3 & operator*=(float inV2)
Multiply vector with float.
Definition Vec3.inl:400
JPH_INLINE Vec3 & operator+=(Vec3Arg inV2)
Add two float vectors (component wise)
Definition Vec3.inl:459
static JPH_INLINE Vec3 sSelect(Vec3Arg inNotSet, Vec3Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition Vec3.inl:269
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec3.inl:756
JPH_INLINE Vec3 Sqrt() const
Component wise square root.
Definition Vec3.inl:695
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec3 as a UVec4 (doesn't change the bits)
Definition Vec3.inl:801
JPH_INLINE Vec3 DotV(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y and Z components.
Definition Vec3.inl:617
static JPH_INLINE Vec3 sLoadFloat3Unsafe(const Float3 &inV)
Load 3 floats from memory (reads 32 bits extra which it doesn't use)
Definition Vec3.inl:134
JPH_INLINE float GetZ() const
Definition Vec3.h:126
JPH_INLINE Vec3 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec3.inl:840
static JPH_INLINE Vec3 sNaN()
Vector with all NaN's.
Definition Vec3.inl:129
Vec3()=default
Constructor.
JPH_INLINE int GetHighestComponentIndex() const
Get index of component with highest value.
Definition Vec3.inl:571
static JPH_INLINE Vec3 sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec3.inl:252
JPH_INLINE Vec3 Swizzle() const
Swizzle the elements in inV.
Definition Vec4.h:14
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:113
JPH_INLINE float GetY() const
Definition Vec4.h:114
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:74
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:778