Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Vec3.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#include <Jolt/Math/Vec4.h>
6#include <Jolt/Math/UVec4.h>
8
10#include <random>
12
13// Create a std::hash for Vec3
14JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
15
17
18void Vec3::CheckW() const
19{
20#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
21 // Avoid asserts when both components are NaN
22 JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
23#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
24}
25
26JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
27{
28#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
29 #if defined(JPH_USE_SSE)
30 return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
31 #elif defined(JPH_USE_NEON)
32 return JPH_NEON_SHUFFLE_F32x4(inValue, inValue, 0, 1, 2, 2);
33 #else
34 Type value;
35 value.mData[0] = inValue.mData[0];
36 value.mData[1] = inValue.mData[1];
37 value.mData[2] = inValue.mData[2];
38 value.mData[3] = inValue.mData[2];
39 return value;
40 #endif
41#else
42 return inValue;
43#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
44}
45
47 mValue(sFixW(inRHS.mValue))
48{
49}
50
51Vec3::Vec3(const Float3 &inV)
52{
53#if defined(JPH_USE_SSE)
54 Type x = _mm_load_ss(&inV.x);
55 Type y = _mm_load_ss(&inV.y);
56 Type z = _mm_load_ss(&inV.z);
57 Type xy = _mm_unpacklo_ps(x, y);
58 mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
59#elif defined(JPH_USE_NEON)
60 float32x2_t xy = vld1_f32(&inV.x);
61 float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
62 mValue = vcombine_f32(xy, zz);
63#else
64 mF32[0] = inV[0];
65 mF32[1] = inV[1];
66 mF32[2] = inV[2];
67 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
68 mF32[3] = inV[2];
69 #endif
70#endif
71}
72
73Vec3::Vec3(float inX, float inY, float inZ)
74{
75#if defined(JPH_USE_SSE)
76 mValue = _mm_set_ps(inZ, inZ, inY, inX);
77#elif defined(JPH_USE_NEON)
78 uint32x2_t xy = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inX)) | (static_cast<uint64>(BitCast<uint32>(inY)) << 32));
79 uint32x2_t zz = vreinterpret_u32_f32(vdup_n_f32(inZ));
80 mValue = vreinterpretq_f32_u32(vcombine_u32(xy, zz));
81#else
82 mF32[0] = inX;
83 mF32[1] = inY;
84 mF32[2] = inZ;
85 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
86 mF32[3] = inZ;
87 #endif
88#endif
89}
90
91template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
93{
94 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
95 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
96 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
97
98#if defined(JPH_USE_SSE)
99 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
100#elif defined(JPH_USE_NEON)
101 return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
102#else
103 return Vec3(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ]);
104#endif
105}
106
108{
109#if defined(JPH_USE_SSE)
110 return _mm_setzero_ps();
111#elif defined(JPH_USE_NEON)
112 return vdupq_n_f32(0);
113#else
114 return Vec3(0, 0, 0);
115#endif
116}
117
119{
120#if defined(JPH_USE_SSE)
121 return _mm_set1_ps(inV);
122#elif defined(JPH_USE_NEON)
123 return vdupq_n_f32(inV);
124#else
125 return Vec3(inV, inV, inV);
126#endif
127}
128
130{
131 return sReplicate(numeric_limits<float>::quiet_NaN());
132}
133
135{
136#if defined(JPH_USE_SSE)
137 Type v = _mm_loadu_ps(&inV.x);
138#elif defined(JPH_USE_NEON)
139 Type v = vld1q_f32(&inV.x);
140#else
141 Type v = { inV.x, inV.y, inV.z };
142#endif
143 return sFixW(v);
144}
145
147{
148#if defined(JPH_USE_SSE)
149 return _mm_min_ps(inV1.mValue, inV2.mValue);
150#elif defined(JPH_USE_NEON)
151 return vminq_f32(inV1.mValue, inV2.mValue);
152#else
153 return Vec3(min(inV1.mF32[0], inV2.mF32[0]),
154 min(inV1.mF32[1], inV2.mF32[1]),
155 min(inV1.mF32[2], inV2.mF32[2]));
156#endif
157}
158
160{
161#if defined(JPH_USE_SSE)
162 return _mm_max_ps(inV1.mValue, inV2.mValue);
163#elif defined(JPH_USE_NEON)
164 return vmaxq_f32(inV1.mValue, inV2.mValue);
165#else
166 return Vec3(max(inV1.mF32[0], inV2.mF32[0]),
167 max(inV1.mF32[1], inV2.mF32[1]),
168 max(inV1.mF32[2], inV2.mF32[2]));
169#endif
170}
171
173{
174 return sMax(sMin(inV, inMax), inMin);
175}
176
178{
179#if defined(JPH_USE_SSE)
180 return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
181#elif defined(JPH_USE_NEON)
182 return vceqq_f32(inV1.mValue, inV2.mValue);
183#else
184 uint32 z = inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0;
185 return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
186 inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
187 z,
188 z);
189#endif
190}
191
193{
194#if defined(JPH_USE_SSE)
195 return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
196#elif defined(JPH_USE_NEON)
197 return vcltq_f32(inV1.mValue, inV2.mValue);
198#else
199 uint32 z = inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0;
200 return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
201 inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
202 z,
203 z);
204#endif
205}
206
208{
209#if defined(JPH_USE_SSE)
210 return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
211#elif defined(JPH_USE_NEON)
212 return vcleq_f32(inV1.mValue, inV2.mValue);
213#else
214 uint32 z = inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0;
215 return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
216 inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
217 z,
218 z);
219#endif
220}
221
223{
224#if defined(JPH_USE_SSE)
225 return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
226#elif defined(JPH_USE_NEON)
227 return vcgtq_f32(inV1.mValue, inV2.mValue);
228#else
229 uint32 z = inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0;
230 return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
231 inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
232 z,
233 z);
234#endif
235}
236
238{
239#if defined(JPH_USE_SSE)
240 return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
241#elif defined(JPH_USE_NEON)
242 return vcgeq_f32(inV1.mValue, inV2.mValue);
243#else
244 uint32 z = inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0;
245 return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
246 inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
247 z,
248 z);
249#endif
250}
251
253{
254#if defined(JPH_USE_SSE)
255 #ifdef JPH_USE_FMADD
256 return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
257 #else
258 return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
259 #endif
260#elif defined(JPH_USE_NEON)
261 return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
262#else
263 return Vec3(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
264 inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
265 inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2]);
266#endif
267}
268
270{
271#if defined(JPH_USE_SSE4_1)
272 Type v = _mm_blendv_ps(inV1.mValue, inV2.mValue, _mm_castsi128_ps(inControl.mValue));
273 return sFixW(v);
274#elif defined(JPH_USE_NEON)
275 Type v = vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inV2.mValue, inV1.mValue);
276 return sFixW(v);
277#else
278 Vec3 result;
279 for (int i = 0; i < 3; i++)
280 result.mF32[i] = inControl.mU32[i] ? inV2.mF32[i] : inV1.mF32[i];
281#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
282 result.mF32[3] = result.mF32[2];
283#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
284 return result;
285#endif
286}
287
289{
290#if defined(JPH_USE_SSE)
291 return _mm_or_ps(inV1.mValue, inV2.mValue);
292#elif defined(JPH_USE_NEON)
293 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
294#else
296#endif
297}
298
300{
301#if defined(JPH_USE_SSE)
302 return _mm_xor_ps(inV1.mValue, inV2.mValue);
303#elif defined(JPH_USE_NEON)
304 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
305#else
307#endif
308}
309
311{
312#if defined(JPH_USE_SSE)
313 return _mm_and_ps(inV1.mValue, inV2.mValue);
314#elif defined(JPH_USE_NEON)
315 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
316#else
318#endif
319}
320
321Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
322{
323 Vec4 s, c;
324 Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
325 return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
326}
327
328template <class Random>
329Vec3 Vec3::sRandom(Random &inRandom)
330{
331 std::uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
332 float theta = JPH_PI * zero_to_one(inRandom);
333 float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
334 return sUnitSpherical(theta, phi);
335}
336
338{
339 return sEquals(*this, inV2).TestAllXYZTrue();
340}
341
342bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
343{
344 return (inV2 - *this).LengthSq() <= inMaxDistSq;
345}
346
347bool Vec3::IsNearZero(float inMaxDistSq) const
348{
349 return LengthSq() <= inMaxDistSq;
350}
351
353{
354#if defined(JPH_USE_SSE)
355 return _mm_mul_ps(mValue, inV2.mValue);
356#elif defined(JPH_USE_NEON)
357 return vmulq_f32(mValue, inV2.mValue);
358#else
359 return Vec3(mF32[0] * inV2.mF32[0], mF32[1] * inV2.mF32[1], mF32[2] * inV2.mF32[2]);
360#endif
361}
362
363Vec3 Vec3::operator * (float inV2) const
364{
365#if defined(JPH_USE_SSE)
366 return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
367#elif defined(JPH_USE_NEON)
368 return vmulq_n_f32(mValue, inV2);
369#else
370 return Vec3(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2);
371#endif
372}
373
374Vec3 operator * (float inV1, Vec3Arg inV2)
375{
376#if defined(JPH_USE_SSE)
377 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
378#elif defined(JPH_USE_NEON)
379 return vmulq_n_f32(inV2.mValue, inV1);
380#else
381 return Vec3(inV1 * inV2.mF32[0], inV1 * inV2.mF32[1], inV1 * inV2.mF32[2]);
382#endif
383}
384
385Vec3 Vec3::operator / (float inV2) const
386{
387#if defined(JPH_USE_SSE)
388 return _mm_div_ps(mValue, _mm_set1_ps(inV2));
389#elif defined(JPH_USE_NEON)
390 return vdivq_f32(mValue, vdupq_n_f32(inV2));
391#else
392 return Vec3(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2);
393#endif
394}
395
397{
398#if defined(JPH_USE_SSE)
399 mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
400#elif defined(JPH_USE_NEON)
401 mValue = vmulq_n_f32(mValue, inV2);
402#else
403 for (int i = 0; i < 3; ++i)
404 mF32[i] *= inV2;
405 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
406 mF32[3] = mF32[2];
407 #endif
408#endif
409 return *this;
410}
411
413{
414#if defined(JPH_USE_SSE)
415 mValue = _mm_mul_ps(mValue, inV2.mValue);
416#elif defined(JPH_USE_NEON)
417 mValue = vmulq_f32(mValue, inV2.mValue);
418#else
419 for (int i = 0; i < 3; ++i)
420 mF32[i] *= inV2.mF32[i];
421 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
422 mF32[3] = mF32[2];
423 #endif
424#endif
425 return *this;
426}
427
429{
430#if defined(JPH_USE_SSE)
431 mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
432#elif defined(JPH_USE_NEON)
433 mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
434#else
435 for (int i = 0; i < 3; ++i)
436 mF32[i] /= inV2;
437 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
438 mF32[3] = mF32[2];
439 #endif
440#endif
441 return *this;
442}
443
445{
446#if defined(JPH_USE_SSE)
447 return _mm_add_ps(mValue, inV2.mValue);
448#elif defined(JPH_USE_NEON)
449 return vaddq_f32(mValue, inV2.mValue);
450#else
451 return Vec3(mF32[0] + inV2.mF32[0], mF32[1] + inV2.mF32[1], mF32[2] + inV2.mF32[2]);
452#endif
453}
454
456{
457#if defined(JPH_USE_SSE)
458 mValue = _mm_add_ps(mValue, inV2.mValue);
459#elif defined(JPH_USE_NEON)
460 mValue = vaddq_f32(mValue, inV2.mValue);
461#else
462 for (int i = 0; i < 3; ++i)
463 mF32[i] += inV2.mF32[i];
464 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
465 mF32[3] = mF32[2];
466 #endif
467#endif
468 return *this;
469}
470
472{
473#if defined(JPH_USE_SSE)
474 return _mm_sub_ps(_mm_setzero_ps(), mValue);
475#elif defined(JPH_USE_NEON)
476 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
477 return vsubq_f32(vdupq_n_f32(0), mValue);
478 #else
479 return vnegq_f32(mValue);
480 #endif
481#else
482 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
483 return Vec3(0.0f - mF32[0], 0.0f - mF32[1], 0.0f - mF32[2]);
484 #else
485 return Vec3(-mF32[0], -mF32[1], -mF32[2]);
486 #endif
487#endif
488}
489
491{
492#if defined(JPH_USE_SSE)
493 return _mm_sub_ps(mValue, inV2.mValue);
494#elif defined(JPH_USE_NEON)
495 return vsubq_f32(mValue, inV2.mValue);
496#else
497 return Vec3(mF32[0] - inV2.mF32[0], mF32[1] - inV2.mF32[1], mF32[2] - inV2.mF32[2]);
498#endif
499}
500
502{
503#if defined(JPH_USE_SSE)
504 mValue = _mm_sub_ps(mValue, inV2.mValue);
505#elif defined(JPH_USE_NEON)
506 mValue = vsubq_f32(mValue, inV2.mValue);
507#else
508 for (int i = 0; i < 3; ++i)
509 mF32[i] -= inV2.mF32[i];
510 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
511 mF32[3] = mF32[2];
512 #endif
513#endif
514 return *this;
515}
516
518{
519 inV2.CheckW(); // Check W equals Z to avoid div by zero
520#if defined(JPH_USE_SSE)
521 return _mm_div_ps(mValue, inV2.mValue);
522#elif defined(JPH_USE_NEON)
523 return vdivq_f32(mValue, inV2.mValue);
524#else
525 return Vec3(mF32[0] / inV2.mF32[0], mF32[1] / inV2.mF32[1], mF32[2] / inV2.mF32[2]);
526#endif
527}
528
530{
531#if defined(JPH_USE_SSE)
532 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
533#elif defined(JPH_USE_NEON)
534 return vdupq_laneq_f32(mValue, 0);
535#else
536 return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
537#endif
538}
539
541{
542#if defined(JPH_USE_SSE)
543 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
544#elif defined(JPH_USE_NEON)
545 return vdupq_laneq_f32(mValue, 1);
546#else
547 return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
548#endif
549}
550
552{
553#if defined(JPH_USE_SSE)
554 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
555#elif defined(JPH_USE_NEON)
556 return vdupq_laneq_f32(mValue, 2);
557#else
558 return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
559#endif
560}
561
563{
564 return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
565}
566
568{
569 return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
570}
571
573{
574#if defined(JPH_USE_AVX512)
575 return _mm_range_ps(mValue, mValue, 0b1000);
576#elif defined(JPH_USE_SSE)
577 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
578#elif defined(JPH_USE_NEON)
579 return vabsq_f32(mValue);
580#else
581 return Vec3(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]));
582#endif
583}
584
586{
587 return sReplicate(1.0f) / mValue;
588}
589
591{
592#if defined(JPH_USE_SSE)
593 Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
594 t1 = _mm_mul_ps(t1, mValue);
595 Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
596 t2 = _mm_mul_ps(t2, inV2.mValue);
597 Type t3 = _mm_sub_ps(t1, t2);
598 return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
599#elif defined(JPH_USE_NEON)
600 Type t1 = JPH_NEON_SHUFFLE_F32x4(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
601 t1 = vmulq_f32(t1, mValue);
602 Type t2 = JPH_NEON_SHUFFLE_F32x4(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
603 t2 = vmulq_f32(t2, inV2.mValue);
604 Type t3 = vsubq_f32(t1, t2);
605 return JPH_NEON_SHUFFLE_F32x4(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
606#else
607 return Vec3(mF32[1] * inV2.mF32[2] - mF32[2] * inV2.mF32[1],
608 mF32[2] * inV2.mF32[0] - mF32[0] * inV2.mF32[2],
609 mF32[0] * inV2.mF32[1] - mF32[1] * inV2.mF32[0]);
610#endif
611}
612
614{
615#if defined(JPH_USE_SSE4_1)
616 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
617#elif defined(JPH_USE_NEON)
618 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
619 mul = vsetq_lane_f32(0, mul, 3);
620 return vdupq_n_f32(vaddvq_f32(mul));
621#else
622 float dot = 0.0f;
623 for (int i = 0; i < 3; i++)
624 dot += mF32[i] * inV2.mF32[i];
625 return Vec3::sReplicate(dot);
626#endif
627}
628
630{
631#if defined(JPH_USE_SSE4_1)
632 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
633#elif defined(JPH_USE_NEON)
634 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
635 mul = vsetq_lane_f32(0, mul, 3);
636 return vdupq_n_f32(vaddvq_f32(mul));
637#else
638 float dot = 0.0f;
639 for (int i = 0; i < 3; i++)
640 dot += mF32[i] * inV2.mF32[i];
641 return Vec4::sReplicate(dot);
642#endif
643}
644
645float Vec3::Dot(Vec3Arg inV2) const
646{
647#if defined(JPH_USE_SSE4_1)
648 return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0x7f));
649#elif defined(JPH_USE_NEON)
650 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
651 mul = vsetq_lane_f32(0, mul, 3);
652 return vaddvq_f32(mul);
653#else
654 float dot = 0.0f;
655 for (int i = 0; i < 3; i++)
656 dot += mF32[i] * inV2.mF32[i];
657 return dot;
658#endif
659}
660
661float Vec3::LengthSq() const
662{
663#if defined(JPH_USE_SSE4_1)
664 return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0x7f));
665#elif defined(JPH_USE_NEON)
666 float32x4_t mul = vmulq_f32(mValue, mValue);
667 mul = vsetq_lane_f32(0, mul, 3);
668 return vaddvq_f32(mul);
669#else
670 float len_sq = 0.0f;
671 for (int i = 0; i < 3; i++)
672 len_sq += mF32[i] * mF32[i];
673 return len_sq;
674#endif
675}
676
677float Vec3::Length() const
678{
679#if defined(JPH_USE_SSE4_1)
680 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0x7f)));
681#elif defined(JPH_USE_NEON)
682 float32x4_t mul = vmulq_f32(mValue, mValue);
683 mul = vsetq_lane_f32(0, mul, 3);
684 float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
685 return vget_lane_f32(vsqrt_f32(sum), 0);
686#else
687 return sqrt(LengthSq());
688#endif
689}
690
692{
693#if defined(JPH_USE_SSE)
694 return _mm_sqrt_ps(mValue);
695#elif defined(JPH_USE_NEON)
696 return vsqrtq_f32(mValue);
697#else
698 return Vec3(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]));
699#endif
700}
701
703{
704#if defined(JPH_USE_SSE4_1)
705 return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0x7f)));
706#elif defined(JPH_USE_NEON)
707 float32x4_t mul = vmulq_f32(mValue, mValue);
708 mul = vsetq_lane_f32(0, mul, 3);
709 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
710 return vdivq_f32(mValue, vsqrtq_f32(sum));
711#else
712 return *this / Length();
713#endif
714}
715
717{
718#if defined(JPH_USE_SSE4_1)
719 Type len_sq = _mm_dp_ps(mValue, mValue, 0x7f);
720 Type is_zero = _mm_cmpeq_ps(len_sq, _mm_setzero_ps());
721#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
722 if (_mm_movemask_ps(is_zero) == 0xf)
723 return inZeroValue;
724 else
725 return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
726#else
727 return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
728#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
729#elif defined(JPH_USE_NEON)
730 float32x4_t mul = vmulq_f32(mValue, mValue);
731 mul = vsetq_lane_f32(0, mul, 3);
732 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
733 float32x4_t len = vsqrtq_f32(sum);
734 uint32x4_t is_zero = vceqq_f32(len, vdupq_n_f32(0));
735 return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, len));
736#else
737 float len_sq = LengthSq();
738 if (len_sq == 0.0f)
739 return inZeroValue;
740 else
741 return *this / sqrt(len_sq);
742#endif
743}
744
745bool Vec3::IsNormalized(float inTolerance) const
746{
747 return abs(LengthSq() - 1.0f) <= inTolerance;
748}
749
750bool Vec3::IsNaN() const
751{
752#if defined(JPH_USE_AVX512)
753 return (_mm_fpclass_ps_mask(mValue, 0b10000001) & 0x7) != 0;
754#elif defined(JPH_USE_SSE)
755 return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
756#elif defined(JPH_USE_NEON)
757 uint32x4_t mask = JPH_NEON_UINT32x4(1, 1, 1, 0);
758 uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
759 return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
760#else
761 return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]);
762#endif
763}
764
765void Vec3::StoreFloat3(Float3 *outV) const
766{
767#if defined(JPH_USE_SSE)
768 _mm_store_ss(&outV->x, mValue);
769 Vec3 t = Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED>();
770 _mm_store_ss(&outV->y, t.mValue);
772 _mm_store_ss(&outV->z, t.mValue);
773#elif defined(JPH_USE_NEON)
774 float32x2_t xy = vget_low_f32(mValue);
775 vst1_f32(&outV->x, xy);
776 vst1q_lane_f32(&outV->z, mValue, 2);
777#else
778 outV->x = mF32[0];
779 outV->y = mF32[1];
780 outV->z = mF32[2];
781#endif
782}
783
785{
786#if defined(JPH_USE_SSE)
787 return _mm_cvttps_epi32(mValue);
788#elif defined(JPH_USE_NEON)
789 return vcvtq_u32_f32(mValue);
790#else
791 return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
792#endif
793}
794
796{
797#if defined(JPH_USE_SSE)
798 return UVec4(_mm_castps_si128(mValue));
799#elif defined(JPH_USE_NEON)
800 return vreinterpretq_u32_f32(mValue);
801#else
802 return *reinterpret_cast<const UVec4 *>(this);
803#endif
804}
805
806float Vec3::ReduceMin() const
807{
808 Vec3 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
810 return v.GetX();
811}
812
813float Vec3::ReduceMax() const
814{
815 Vec3 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
817 return v.GetX();
818}
819
821{
822 if (abs(mF32[0]) > abs(mF32[1]))
823 {
824 float len = sqrt(mF32[0] * mF32[0] + mF32[2] * mF32[2]);
825 return Vec3(mF32[2], 0.0f, -mF32[0]) / len;
826 }
827 else
828 {
829 float len = sqrt(mF32[1] * mF32[1] + mF32[2] * mF32[2]);
830 return Vec3(0.0f, mF32[2], -mF32[1]) / len;
831 }
832}
833
835{
836#if defined(JPH_USE_AVX512)
837 return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
838#elif defined(JPH_USE_SSE)
839 Type minus_one = _mm_set1_ps(-1.0f);
840 Type one = _mm_set1_ps(1.0f);
841 return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
842#elif defined(JPH_USE_NEON)
843 Type minus_one = vdupq_n_f32(-1.0f);
844 Type one = vdupq_n_f32(1.0f);
845 return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(mValue), vreinterpretq_u32_f32(minus_one)), vreinterpretq_u32_f32(one)));
846#else
847 return Vec3(std::signbit(mF32[0])? -1.0f : 1.0f,
848 std::signbit(mF32[1])? -1.0f : 1.0f,
849 std::signbit(mF32[2])? -1.0f : 1.0f);
850#endif
851}
852
#define JPH_SUPPRESS_WARNINGS_STD_BEGIN
Definition Core.h:383
#define JPH_SUPPRESS_WARNINGS_STD_END
Definition Core.h:395
std::uint64_t uint64
Definition Core.h:456
#define JPH_NAMESPACE_END
Definition Core.h:378
std::uint32_t uint32
Definition Core.h:455
#define JPH_NAMESPACE_BEGIN
Definition Core.h:372
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:87
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec3 operator*(float inV1, Vec3Arg inV2)
Definition Vec3.inl:374
Class that holds 3 floats. Used as a storage class. Convert to Vec3 for calculations.
Definition Float3.h:13
float y
Definition Float3.h:39
float z
Definition Float3.h:40
float x
Definition Float3.h:38
Definition UVec4.h:12
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:199
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:171
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:410
Type mValue
Definition UVec4.h:211
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:185
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:337
uint32 mU32[4]
Definition UVec4.h:212
Definition Vec3.h:17
JPH_INLINE bool IsClose(Vec3Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec3.inl:342
static JPH_INLINE Vec3 sMax(Vec3Arg inV1, Vec3Arg inV2)
Return the maximum of each of the components.
Definition Vec3.inl:159
JPH_INLINE float Dot(Vec3Arg inV2) const
Dot product.
Definition Vec3.inl:645
JPH_INLINE Vec3 Normalized() const
Normalize vector.
Definition Vec3.inl:702
static JPH_INLINE Type sFixW(Type inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Vec4::Type Type
Definition Vec3.h:27
JPH_INLINE bool operator==(Vec3Arg inV2) const
Comparison.
Definition Vec3.inl:337
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec3.inl:529
static JPH_INLINE Vec3 sMin(Vec3Arg inV1, Vec3Arg inV2)
Return the minimum value of each of the components.
Definition Vec3.inl:146
JPH_INLINE Vec3 Cross(Vec3Arg inV2) const
Cross product.
Definition Vec3.inl:590
JPH_INLINE Vec3 GetNormalizedPerpendicular() const
Get normalized vector that is perpendicular to this vector.
Definition Vec3.inl:820
static Vec3 sRandom(Random &inRandom)
Get random unit vector.
Definition Vec3.inl:329
JPH_INLINE float GetX() const
Get individual components.
Definition Vec3.h:124
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec3.inl:745
static JPH_INLINE Vec3 sXor(Vec3Arg inV1, Vec3Arg inV2)
Logical xor (component wise)
Definition Vec3.inl:299
JPH_INLINE float Length() const
Length of vector.
Definition Vec3.inl:677
static JPH_INLINE UVec4 sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Greater than or equal (component wise)
Definition Vec3.inl:237
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y and Z.
Definition Vec3.inl:806
JPH_INLINE Vec3 & operator-=(Vec3Arg inV2)
Add two float vectors (component wise)
Definition Vec3.inl:501
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y and Z.
Definition Vec3.inl:813
static JPH_INLINE UVec4 sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Less than or equal (component wise)
Definition Vec3.inl:207
JPH_INLINE Vec3 operator/(float inV2) const
Divide vector by float.
Definition Vec3.inl:385
friend JPH_INLINE Vec3 operator*(float inV1, Vec3Arg inV2)
Multiply vector with float.
Definition Vec3.inl:374
JPH_INLINE int GetLowestComponentIndex() const
Get index of component with lowest value.
Definition Vec3.inl:562
JPH_INLINE Vec3 & operator/=(float inV2)
Divide vector by float.
Definition Vec3.inl:428
JPH_INLINE Vec4 DotV4(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition Vec3.inl:629
JPH_INLINE Vec3 Abs() const
Return the absolute value of each of the components.
Definition Vec3.inl:572
JPH_INLINE Vec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec3.inl:585
JPH_INLINE Vec3 NormalizedOr(Vec3Arg inZeroValue) const
Normalize vector or return inZeroValue if the length of the vector is zero.
Definition Vec3.inl:716
JPH_INLINE Vec3 operator+(Vec3Arg inV2) const
Add two float vectors (component wise)
Definition Vec3.inl:444
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec3.inl:551
static JPH_INLINE Vec3 sOr(Vec3Arg inV1, Vec3Arg inV2)
Logical or (component wise)
Definition Vec3.inl:288
static JPH_INLINE UVec4 sGreater(Vec3Arg inV1, Vec3Arg inV2)
Greater than (component wise)
Definition Vec3.inl:222
static JPH_INLINE Vec3 sAnd(Vec3Arg inV1, Vec3Arg inV2)
Logical and (component wise)
Definition Vec3.inl:310
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
static JPH_INLINE Vec3 sSelect(Vec3Arg inV1, Vec3Arg inV2, UVec4Arg inControl)
Component wise select, returns inV1 when highest bit of inControl = 0 and inV2 when highest bit of in...
Definition Vec3.inl:269
static JPH_INLINE Vec3 sUnitSpherical(float inTheta, float inPhi)
Definition Vec3.inl:321
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec3.inl:784
Type mValue
Definition Vec3.h:286
JPH_INLINE float GetY() const
Definition Vec3.h:125
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec3.inl:540
JPH_INLINE Vec3 operator-() const
Negate.
Definition Vec3.inl:471
JPH_INLINE void StoreFloat3(Float3 *outV) const
Store 3 floats to memory.
Definition Vec3.inl:765
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec3.inl:661
float mF32[4]
Definition Vec3.h:287
static JPH_INLINE UVec4 sEquals(Vec3Arg inV1, Vec3Arg inV2)
Equals (component wise)
Definition Vec3.inl:177
JPH_INLINE bool IsNearZero(float inMaxDistSq=1.0e-12f) const
Test if vector is near zero.
Definition Vec3.inl:347
static JPH_INLINE Vec3 sZero()
Vector with all zeros.
Definition Vec3.inl:107
static JPH_INLINE UVec4 sLess(Vec3Arg inV1, Vec3Arg inV2)
Less than (component wise)
Definition Vec3.inl:192
static JPH_INLINE Vec3 sReplicate(float inV)
Replicate inV across all components.
Definition Vec3.inl:118
static JPH_INLINE Vec3 sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition Vec3.inl:172
JPH_INLINE Vec3 & operator*=(float inV2)
Multiply vector with float.
Definition Vec3.inl:396
JPH_INLINE Vec3 & operator+=(Vec3Arg inV2)
Add two float vectors (component wise)
Definition Vec3.inl:455
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec3.inl:750
JPH_INLINE Vec3 Sqrt() const
Component wise square root.
Definition Vec3.inl:691
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec3 as a UVec4 (doesn't change the bits)
Definition Vec3.inl:795
JPH_INLINE Vec3 DotV(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y and Z components.
Definition Vec3.inl:613
static JPH_INLINE Vec3 sLoadFloat3Unsafe(const Float3 &inV)
Load 3 floats from memory (reads 32 bits extra which it doesn't use)
Definition Vec3.inl:134
JPH_INLINE float GetZ() const
Definition Vec3.h:126
JPH_INLINE Vec3 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec3.inl:834
static JPH_INLINE Vec3 sNaN()
Vector with all NaN's.
Definition Vec3.inl:129
Vec3()=default
Constructor.
JPH_INLINE int GetHighestComponentIndex() const
Get index of component with highest value.
Definition Vec3.inl:567
static JPH_INLINE Vec3 sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec3.inl:252
JPH_INLINE Vec3 Swizzle() const
Swizzle the elements in inV.
Definition Vec4.h:14
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:113
JPH_INLINE float GetY() const
Definition Vec4.h:114
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:74
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:775