Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Vec4.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
6#include <Jolt/Math/Vec3.h>
7#include <Jolt/Math/UVec4.h>
8
10
11// Constructor
13 mValue(inRHS.mValue)
14{
15}
16
17Vec4::Vec4(Vec3Arg inRHS, float inW)
18{
19#if defined(JPH_USE_SSE4_1)
20 mValue = _mm_blend_ps(inRHS.mValue, _mm_set1_ps(inW), 8);
21#elif defined(JPH_USE_NEON)
22 mValue = vsetq_lane_f32(inW, inRHS.mValue, 3);
23#else
24 for (int i = 0; i < 3; i++)
25 mF32[i] = inRHS.mF32[i];
26 mF32[3] = inW;
27#endif
28}
29
30Vec4::Vec4(float inX, float inY, float inZ, float inW)
31{
32#if defined(JPH_USE_SSE)
33 mValue = _mm_set_ps(inW, inZ, inY, inX);
34#elif defined(JPH_USE_NEON)
35 uint32x2_t xy = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inX)) | (static_cast<uint64>(BitCast<uint32>(inY)) << 32));
36 uint32x2_t zw = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inZ)) | (static_cast<uint64>(BitCast<uint32>(inW)) << 32));
37 mValue = vreinterpretq_f32_u32(vcombine_u32(xy, zw));
38#else
39 mF32[0] = inX;
40 mF32[1] = inY;
41 mF32[2] = inZ;
42 mF32[3] = inW;
43#endif
44}
45
46template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
48{
49 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
50 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
51 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
52 static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
53
54#if defined(JPH_USE_SSE)
55 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
56#elif defined(JPH_USE_NEON)
57 return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
58#else
59 return Vec4(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ], mF32[SwizzleW]);
60#endif
61}
62
64{
65#if defined(JPH_USE_SSE)
66 return _mm_setzero_ps();
67#elif defined(JPH_USE_NEON)
68 return vdupq_n_f32(0);
69#else
70 return Vec4(0, 0, 0, 0);
71#endif
72}
73
75{
76#if defined(JPH_USE_SSE)
77 return _mm_set1_ps(inV);
78#elif defined(JPH_USE_NEON)
79 return vdupq_n_f32(inV);
80#else
81 return Vec4(inV, inV, inV, inV);
82#endif
83}
84
86{
87 return sReplicate(1.0f);
88}
89
91{
92 return sReplicate(numeric_limits<float>::quiet_NaN());
93}
94
96{
97#if defined(JPH_USE_SSE)
98 return _mm_loadu_ps(&inV->x);
99#elif defined(JPH_USE_NEON)
100 return vld1q_f32(&inV->x);
101#else
102 return Vec4(inV->x, inV->y, inV->z, inV->w);
103#endif
104}
105
107{
108#if defined(JPH_USE_SSE)
109 return _mm_load_ps(&inV->x);
110#elif defined(JPH_USE_NEON)
111 return vld1q_f32(&inV->x);
112#else
113 return Vec4(inV->x, inV->y, inV->z, inV->w);
114#endif
115}
116
117template <const int Scale>
118Vec4 Vec4::sGatherFloat4(const float *inBase, UVec4Arg inOffsets)
119{
120#if defined(JPH_USE_SSE)
121 #ifdef JPH_USE_AVX2
122 return _mm_i32gather_ps(inBase, inOffsets.mValue, Scale);
123 #else
124 const uint8 *base = reinterpret_cast<const uint8 *>(inBase);
125 Type x = _mm_load_ss(reinterpret_cast<const float *>(base + inOffsets.GetX() * Scale));
126 Type y = _mm_load_ss(reinterpret_cast<const float *>(base + inOffsets.GetY() * Scale));
127 Type xy = _mm_unpacklo_ps(x, y);
128 Type z = _mm_load_ss(reinterpret_cast<const float *>(base + inOffsets.GetZ() * Scale));
129 Type w = _mm_load_ss(reinterpret_cast<const float *>(base + inOffsets.GetW() * Scale));
130 Type zw = _mm_unpacklo_ps(z, w);
131 return _mm_movelh_ps(xy, zw);
132 #endif
133#else
134 const uint8 *base = reinterpret_cast<const uint8 *>(inBase);
135 float x = *reinterpret_cast<const float *>(base + inOffsets.GetX() * Scale);
136 float y = *reinterpret_cast<const float *>(base + inOffsets.GetY() * Scale);
137 float z = *reinterpret_cast<const float *>(base + inOffsets.GetZ() * Scale);
138 float w = *reinterpret_cast<const float *>(base + inOffsets.GetW() * Scale);
139 return Vec4(x, y, z, w);
140#endif
141}
142
144{
145#if defined(JPH_USE_SSE)
146 return _mm_min_ps(inV1.mValue, inV2.mValue);
147#elif defined(JPH_USE_NEON)
148 return vminq_f32(inV1.mValue, inV2.mValue);
149#else
150 return Vec4(min(inV1.mF32[0], inV2.mF32[0]),
151 min(inV1.mF32[1], inV2.mF32[1]),
152 min(inV1.mF32[2], inV2.mF32[2]),
153 min(inV1.mF32[3], inV2.mF32[3]));
154#endif
155}
156
158{
159#if defined(JPH_USE_SSE)
160 return _mm_max_ps(inV1.mValue, inV2.mValue);
161#elif defined(JPH_USE_NEON)
162 return vmaxq_f32(inV1.mValue, inV2.mValue);
163#else
164 return Vec4(max(inV1.mF32[0], inV2.mF32[0]),
165 max(inV1.mF32[1], inV2.mF32[1]),
166 max(inV1.mF32[2], inV2.mF32[2]),
167 max(inV1.mF32[3], inV2.mF32[3]));
168#endif
169}
170
172{
173 return sMax(sMin(inV, inMax), inMin);
174}
175
177{
178#if defined(JPH_USE_SSE)
179 return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
180#elif defined(JPH_USE_NEON)
181 return vceqq_f32(inV1.mValue, inV2.mValue);
182#else
183 return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
184 inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
185 inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0,
186 inV1.mF32[3] == inV2.mF32[3]? 0xffffffffu : 0);
187#endif
188}
189
191{
192#if defined(JPH_USE_SSE)
193 return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
194#elif defined(JPH_USE_NEON)
195 return vcltq_f32(inV1.mValue, inV2.mValue);
196#else
197 return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
198 inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
199 inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0,
200 inV1.mF32[3] < inV2.mF32[3]? 0xffffffffu : 0);
201#endif
202}
203
205{
206#if defined(JPH_USE_SSE)
207 return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
208#elif defined(JPH_USE_NEON)
209 return vcleq_f32(inV1.mValue, inV2.mValue);
210#else
211 return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
212 inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
213 inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0,
214 inV1.mF32[3] <= inV2.mF32[3]? 0xffffffffu : 0);
215#endif
216}
217
219{
220#if defined(JPH_USE_SSE)
221 return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
222#elif defined(JPH_USE_NEON)
223 return vcgtq_f32(inV1.mValue, inV2.mValue);
224#else
225 return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
226 inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
227 inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0,
228 inV1.mF32[3] > inV2.mF32[3]? 0xffffffffu : 0);
229#endif
230}
231
233{
234#if defined(JPH_USE_SSE)
235 return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
236#elif defined(JPH_USE_NEON)
237 return vcgeq_f32(inV1.mValue, inV2.mValue);
238#else
239 return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
240 inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
241 inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0,
242 inV1.mF32[3] >= inV2.mF32[3]? 0xffffffffu : 0);
243#endif
244}
245
247{
248#if defined(JPH_USE_SSE)
249 #ifdef JPH_USE_FMADD
250 return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
251 #else
252 return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
253 #endif
254#elif defined(JPH_USE_NEON)
255 return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
256#else
257 return Vec4(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
258 inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
259 inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2],
260 inMul1.mF32[3] * inMul2.mF32[3] + inAdd.mF32[3]);
261#endif
262}
263
264Vec4 Vec4::sSelect(Vec4Arg inNotSet, Vec4Arg inSet, UVec4Arg inControl)
265{
266#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
267 return _mm_blendv_ps(inNotSet.mValue, inSet.mValue, _mm_castsi128_ps(inControl.mValue));
268#elif defined(JPH_USE_SSE)
269 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.mValue, 31));
270 return _mm_or_ps(_mm_and_ps(is_set, inSet.mValue), _mm_andnot_ps(is_set, inNotSet.mValue));
271#elif defined(JPH_USE_NEON)
272 return vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inSet.mValue, inNotSet.mValue);
273#else
274 Vec4 result;
275 for (int i = 0; i < 4; i++)
276 result.mF32[i] = (inControl.mU32[i] & 0x80000000u) ? inSet.mF32[i] : inNotSet.mF32[i];
277 return result;
278#endif
279}
280
282{
283#if defined(JPH_USE_SSE)
284 return _mm_or_ps(inV1.mValue, inV2.mValue);
285#elif defined(JPH_USE_NEON)
286 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
287#else
289#endif
290}
291
293{
294#if defined(JPH_USE_SSE)
295 return _mm_xor_ps(inV1.mValue, inV2.mValue);
296#elif defined(JPH_USE_NEON)
297 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
298#else
300#endif
301}
302
304{
305#if defined(JPH_USE_SSE)
306 return _mm_and_ps(inV1.mValue, inV2.mValue);
307#elif defined(JPH_USE_NEON)
308 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
309#else
311#endif
312}
313
314void Vec4::sSort4(Vec4 &ioValue, UVec4 &ioIndex)
315{
316 // Pass 1, test 1st vs 3rd, 2nd vs 4th
319 UVec4 c1 = sLess(ioValue, v1).Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W>();
320 ioValue = sSelect(ioValue, v1, c1);
321 ioIndex = UVec4::sSelect(ioIndex, i1, c1);
322
323 // Pass 2, test 1st vs 2nd, 3rd vs 4th
326 UVec4 c2 = sLess(ioValue, v2).Swizzle<SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_W, SWIZZLE_W>();
327 ioValue = sSelect(ioValue, v2, c2);
328 ioIndex = UVec4::sSelect(ioIndex, i2, c2);
329
330 // Pass 3, test 2nd vs 3rd component
333 UVec4 c3 = sLess(ioValue, v3).Swizzle<SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_W>();
334 ioValue = sSelect(ioValue, v3, c3);
335 ioIndex = UVec4::sSelect(ioIndex, i3, c3);
336}
337
338void Vec4::sSort4Reverse(Vec4 &ioValue, UVec4 &ioIndex)
339{
340 // Pass 1, test 1st vs 3rd, 2nd vs 4th
344 ioValue = sSelect(ioValue, v1, c1);
345 ioIndex = UVec4::sSelect(ioIndex, i1, c1);
346
347 // Pass 2, test 1st vs 2nd, 3rd vs 4th
351 ioValue = sSelect(ioValue, v2, c2);
352 ioIndex = UVec4::sSelect(ioIndex, i2, c2);
353
354 // Pass 3, test 2nd vs 3rd component
358 ioValue = sSelect(ioValue, v3, c3);
359 ioIndex = UVec4::sSelect(ioIndex, i3, c3);
360}
361
363{
364 return sEquals(*this, inV2).TestAllTrue();
365}
366
367bool Vec4::IsClose(Vec4Arg inV2, float inMaxDistSq) const
368{
369 return (inV2 - *this).LengthSq() <= inMaxDistSq;
370}
371
372bool Vec4::IsNearZero(float inMaxDistSq) const
373{
374 return LengthSq() <= inMaxDistSq;
375}
376
377bool Vec4::IsNormalized(float inTolerance) const
378{
379 return abs(LengthSq() - 1.0f) <= inTolerance;
380}
381
382bool Vec4::IsNaN() const
383{
384#if defined(JPH_USE_AVX512)
385 return _mm_fpclass_ps_mask(mValue, 0b10000001) != 0;
386#elif defined(JPH_USE_SSE)
387 return _mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) != 0;
388#elif defined(JPH_USE_NEON)
389 uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
390 return vaddvq_u32(vshrq_n_u32(is_equal, 31)) != 4;
391#else
392 return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]) || isnan(mF32[3]);
393#endif
394}
395
397{
398#if defined(JPH_USE_SSE)
399 return _mm_mul_ps(mValue, inV2.mValue);
400#elif defined(JPH_USE_NEON)
401 return vmulq_f32(mValue, inV2.mValue);
402#else
403 return Vec4(mF32[0] * inV2.mF32[0],
404 mF32[1] * inV2.mF32[1],
405 mF32[2] * inV2.mF32[2],
406 mF32[3] * inV2.mF32[3]);
407#endif
408}
409
410Vec4 Vec4::operator * (float inV2) const
411{
412#if defined(JPH_USE_SSE)
413 return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
414#elif defined(JPH_USE_NEON)
415 return vmulq_n_f32(mValue, inV2);
416#else
417 return Vec4(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2, mF32[3] * inV2);
418#endif
419}
420
422Vec4 operator * (float inV1, Vec4Arg inV2)
423{
424#if defined(JPH_USE_SSE)
425 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
426#elif defined(JPH_USE_NEON)
427 return vmulq_n_f32(inV2.mValue, inV1);
428#else
429 return Vec4(inV1 * inV2.mF32[0],
430 inV1 * inV2.mF32[1],
431 inV1 * inV2.mF32[2],
432 inV1 * inV2.mF32[3]);
433#endif
434}
435
436Vec4 Vec4::operator / (float inV2) const
437{
438#if defined(JPH_USE_SSE)
439 return _mm_div_ps(mValue, _mm_set1_ps(inV2));
440#elif defined(JPH_USE_NEON)
441 return vdivq_f32(mValue, vdupq_n_f32(inV2));
442#else
443 return Vec4(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2, mF32[3] / inV2);
444#endif
445}
446
448{
449#if defined(JPH_USE_SSE)
450 mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
451#elif defined(JPH_USE_NEON)
452 mValue = vmulq_n_f32(mValue, inV2);
453#else
454 for (int i = 0; i < 4; ++i)
455 mF32[i] *= inV2;
456#endif
457 return *this;
458}
459
461{
462#if defined(JPH_USE_SSE)
463 mValue = _mm_mul_ps(mValue, inV2.mValue);
464#elif defined(JPH_USE_NEON)
465 mValue = vmulq_f32(mValue, inV2.mValue);
466#else
467 for (int i = 0; i < 4; ++i)
468 mF32[i] *= inV2.mF32[i];
469#endif
470 return *this;
471}
472
474{
475#if defined(JPH_USE_SSE)
476 mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
477#elif defined(JPH_USE_NEON)
478 mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
479#else
480 for (int i = 0; i < 4; ++i)
481 mF32[i] /= inV2;
482#endif
483 return *this;
484}
485
487{
488#if defined(JPH_USE_SSE)
489 return _mm_add_ps(mValue, inV2.mValue);
490#elif defined(JPH_USE_NEON)
491 return vaddq_f32(mValue, inV2.mValue);
492#else
493 return Vec4(mF32[0] + inV2.mF32[0],
494 mF32[1] + inV2.mF32[1],
495 mF32[2] + inV2.mF32[2],
496 mF32[3] + inV2.mF32[3]);
497#endif
498}
499
501{
502#if defined(JPH_USE_SSE)
503 mValue = _mm_add_ps(mValue, inV2.mValue);
504#elif defined(JPH_USE_NEON)
505 mValue = vaddq_f32(mValue, inV2.mValue);
506#else
507 for (int i = 0; i < 4; ++i)
508 mF32[i] += inV2.mF32[i];
509#endif
510 return *this;
511}
512
514{
515#if defined(JPH_USE_SSE)
516 return _mm_sub_ps(_mm_setzero_ps(), mValue);
517#elif defined(JPH_USE_NEON)
518 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
519 return vsubq_f32(vdupq_n_f32(0), mValue);
520 #else
521 return vnegq_f32(mValue);
522 #endif
523#else
524 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
525 return Vec4(0.0f - mF32[0], 0.0f - mF32[1], 0.0f - mF32[2], 0.0f - mF32[3]);
526 #else
527 return Vec4(-mF32[0], -mF32[1], -mF32[2], -mF32[3]);
528 #endif
529#endif
530}
531
533{
534#if defined(JPH_USE_SSE)
535 return _mm_sub_ps(mValue, inV2.mValue);
536#elif defined(JPH_USE_NEON)
537 return vsubq_f32(mValue, inV2.mValue);
538#else
539 return Vec4(mF32[0] - inV2.mF32[0],
540 mF32[1] - inV2.mF32[1],
541 mF32[2] - inV2.mF32[2],
542 mF32[3] - inV2.mF32[3]);
543#endif
544}
545
547{
548#if defined(JPH_USE_SSE)
549 mValue = _mm_sub_ps(mValue, inV2.mValue);
550#elif defined(JPH_USE_NEON)
551 mValue = vsubq_f32(mValue, inV2.mValue);
552#else
553 for (int i = 0; i < 4; ++i)
554 mF32[i] -= inV2.mF32[i];
555#endif
556 return *this;
557}
558
560{
561#if defined(JPH_USE_SSE)
562 return _mm_div_ps(mValue, inV2.mValue);
563#elif defined(JPH_USE_NEON)
564 return vdivq_f32(mValue, inV2.mValue);
565#else
566 return Vec4(mF32[0] / inV2.mF32[0],
567 mF32[1] / inV2.mF32[1],
568 mF32[2] / inV2.mF32[2],
569 mF32[3] / inV2.mF32[3]);
570#endif
571}
572
574{
575#if defined(JPH_USE_SSE)
576 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
577#elif defined(JPH_USE_NEON)
578 return vdupq_laneq_f32(mValue, 0);
579#else
580 return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
581#endif
582}
583
585{
586#if defined(JPH_USE_SSE)
587 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
588#elif defined(JPH_USE_NEON)
589 return vdupq_laneq_f32(mValue, 1);
590#else
591 return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
592#endif
593}
594
596{
597#if defined(JPH_USE_SSE)
598 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
599#elif defined(JPH_USE_NEON)
600 return vdupq_laneq_f32(mValue, 2);
601#else
602 return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
603#endif
604}
605
607{
608#if defined(JPH_USE_SSE)
609 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(3, 3, 3, 3));
610#elif defined(JPH_USE_NEON)
611 return vdupq_laneq_f32(mValue, 3);
612#else
613 return Vec4(mF32[3], mF32[3], mF32[3], mF32[3]);
614#endif
615}
616
618{
619#if defined(JPH_USE_SSE)
620 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
621#elif defined(JPH_USE_NEON)
622 return vdupq_laneq_f32(mValue, 0);
623#else
624 return Vec3(mF32[0], mF32[0], mF32[0]);
625#endif
626}
627
629{
630#if defined(JPH_USE_SSE)
631 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
632#elif defined(JPH_USE_NEON)
633 return vdupq_laneq_f32(mValue, 1);
634#else
635 return Vec3(mF32[1], mF32[1], mF32[1]);
636#endif
637}
638
640{
641#if defined(JPH_USE_SSE)
642 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
643#elif defined(JPH_USE_NEON)
644 return vdupq_laneq_f32(mValue, 2);
645#else
646 return Vec3(mF32[2], mF32[2], mF32[2]);
647#endif
648}
649
651{
652#if defined(JPH_USE_SSE)
653 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(3, 3, 3, 3));
654#elif defined(JPH_USE_NEON)
655 return vdupq_laneq_f32(mValue, 3);
656#else
657 return Vec3(mF32[3], mF32[3], mF32[3]);
658#endif
659}
660
662{
663 // Get the minimum value in all 4 components
665 value = Vec4::sMin(value, value.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y>());
666
667 // Compare with the original vector to find which component is equal to the minimum value
668 return CountTrailingZeros(Vec4::sEquals(*this, value).GetTrues());
669}
670
672{
673 // Get the maximum value in all 4 components
675 value = Vec4::sMax(value, value.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y>());
676
677 // Compare with the original vector to find which component is equal to the maximum value
678 return CountTrailingZeros(Vec4::sEquals(*this, value).GetTrues());
679}
680
682{
683#if defined(JPH_USE_AVX512)
684 return _mm_range_ps(mValue, mValue, 0b1000);
685#elif defined(JPH_USE_SSE)
686 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
687#elif defined(JPH_USE_NEON)
688 return vabsq_f32(mValue);
689#else
690 return Vec4(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]), abs(mF32[3]));
691#endif
692}
693
695{
696 return sOne() / mValue;
697}
698
700{
701#if defined(JPH_USE_SSE4_1)
702 return _mm_dp_ps(mValue, inV2.mValue, 0xff);
703#elif defined(JPH_USE_NEON)
704 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
705 return vdupq_n_f32(vaddvq_f32(mul));
706#else
707 // Brackets placed so that the order is consistent with the vectorized version
708 return Vec4::sReplicate((mF32[0] * inV2.mF32[0] + mF32[1] * inV2.mF32[1]) + (mF32[2] * inV2.mF32[2] + mF32[3] * inV2.mF32[3]));
709#endif
710}
711
712float Vec4::Dot(Vec4Arg inV2) const
713{
714#if defined(JPH_USE_SSE4_1)
715 return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0xff));
716#elif defined(JPH_USE_NEON)
717 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
718 return vaddvq_f32(mul);
719#else
720 // Brackets placed so that the order is consistent with the vectorized version
721 return (mF32[0] * inV2.mF32[0] + mF32[1] * inV2.mF32[1]) + (mF32[2] * inV2.mF32[2] + mF32[3] * inV2.mF32[3]);
722#endif
723}
724
725float Vec4::LengthSq() const
726{
727#if defined(JPH_USE_SSE4_1)
728 return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0xff));
729#elif defined(JPH_USE_NEON)
730 float32x4_t mul = vmulq_f32(mValue, mValue);
731 return vaddvq_f32(mul);
732#else
733 // Brackets placed so that the order is consistent with the vectorized version
734 return (mF32[0] * mF32[0] + mF32[1] * mF32[1]) + (mF32[2] * mF32[2] + mF32[3] * mF32[3]);
735#endif
736}
737
738float Vec4::Length() const
739{
740#if defined(JPH_USE_SSE4_1)
741 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0xff)));
742#elif defined(JPH_USE_NEON)
743 float32x4_t mul = vmulq_f32(mValue, mValue);
744 float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
745 return vget_lane_f32(vsqrt_f32(sum), 0);
746#else
747 // Brackets placed so that the order is consistent with the vectorized version
748 return sqrt((mF32[0] * mF32[0] + mF32[1] * mF32[1]) + (mF32[2] * mF32[2] + mF32[3] * mF32[3]));
749#endif
750}
751
753{
754#if defined(JPH_USE_SSE)
755 return _mm_sqrt_ps(mValue);
756#elif defined(JPH_USE_NEON)
757 return vsqrtq_f32(mValue);
758#else
759 return Vec4(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]), sqrt(mF32[3]));
760#endif
761}
762
763
765{
766#if defined(JPH_USE_AVX512)
767 return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
768#elif defined(JPH_USE_SSE)
769 Type minus_one = _mm_set1_ps(-1.0f);
770 Type one = _mm_set1_ps(1.0f);
771 return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
772#elif defined(JPH_USE_NEON)
773 Type minus_one = vdupq_n_f32(-1.0f);
774 Type one = vdupq_n_f32(1.0f);
775 return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(mValue), vreinterpretq_u32_f32(minus_one)), vreinterpretq_u32_f32(one)));
776#else
777 return Vec4(std::signbit(mF32[0])? -1.0f : 1.0f,
778 std::signbit(mF32[1])? -1.0f : 1.0f,
779 std::signbit(mF32[2])? -1.0f : 1.0f,
780 std::signbit(mF32[3])? -1.0f : 1.0f);
781#endif
782}
783
784template <int X, int Y, int Z, int W>
785JPH_INLINE Vec4 Vec4::FlipSign() const
786{
787 static_assert(X == 1 || X == -1, "X must be 1 or -1");
788 static_assert(Y == 1 || Y == -1, "Y must be 1 or -1");
789 static_assert(Z == 1 || Z == -1, "Z must be 1 or -1");
790 static_assert(W == 1 || W == -1, "W must be 1 or -1");
791 return Vec4::sXor(*this, Vec4(X > 0? 0.0f : -0.0f, Y > 0? 0.0f : -0.0f, Z > 0? 0.0f : -0.0f, W > 0? 0.0f : -0.0f));
792}
793
795{
796#if defined(JPH_USE_SSE4_1)
797 return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0xff)));
798#elif defined(JPH_USE_NEON)
799 float32x4_t mul = vmulq_f32(mValue, mValue);
800 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
801 return vdivq_f32(mValue, vsqrtq_f32(sum));
802#else
803 return *this / Length();
804#endif
805}
806
807void Vec4::StoreFloat4(Float4 *outV) const
808{
809#if defined(JPH_USE_SSE)
810 _mm_storeu_ps(&outV->x, mValue);
811#elif defined(JPH_USE_NEON)
812 vst1q_f32(&outV->x, mValue);
813#else
814 for (int i = 0; i < 4; ++i)
815 (&outV->x)[i] = mF32[i];
816#endif
817}
818
820{
821#if defined(JPH_USE_SSE)
822 return _mm_cvttps_epi32(mValue);
823#elif defined(JPH_USE_NEON)
824 return vcvtq_u32_f32(mValue);
825#else
826 return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
827#endif
828}
829
831{
832#if defined(JPH_USE_SSE)
833 return UVec4(_mm_castps_si128(mValue));
834#elif defined(JPH_USE_NEON)
835 return vreinterpretq_u32_f32(mValue);
836#else
837 return *reinterpret_cast<const UVec4 *>(this);
838#endif
839}
840
842{
843#if defined(JPH_USE_SSE)
844 return _mm_movemask_ps(mValue);
845#elif defined(JPH_USE_NEON)
846 int32x4_t shift = JPH_NEON_INT32x4(0, 1, 2, 3);
847 return vaddvq_u32(vshlq_u32(vshrq_n_u32(vreinterpretq_u32_f32(mValue), 31), shift));
848#else
849 return (std::signbit(mF32[0])? 1 : 0) | (std::signbit(mF32[1])? 2 : 0) | (std::signbit(mF32[2])? 4 : 0) | (std::signbit(mF32[3])? 8 : 0);
850#endif
851}
852
859
866
867void Vec4::SinCos(Vec4 &outSin, Vec4 &outCos) const
868{
869 // Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function, changes octants to quadrants and vectorizes it
870 // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
871
872 // Make argument positive and remember sign for sin only since cos is symmetric around x (highest bit of a float is the sign bit)
873 UVec4 sin_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
874 Vec4 x = Vec4::sXor(*this, sin_sign.ReinterpretAsFloat());
875
876 // x / (PI / 2) rounded to nearest int gives us the quadrant closest to x
877 UVec4 quadrant = (0.6366197723675814f * x + Vec4::sReplicate(0.5f)).ToInt();
878
879 // Make x relative to the closest quadrant.
880 // This does x = x - quadrant * PI / 2 using a two step Cody-Waite argument reduction.
881 // This improves the accuracy of the result by avoiding loss of significant bits in the subtraction.
882 // We start with x = x - quadrant * PI / 2, PI / 2 in hexadecimal notation is 0x3fc90fdb, we remove the lowest 16 bits to
883 // get 0x3fc90000 (= 1.5703125) this means we can now multiply with a number of up to 2^16 without losing any bits.
884 // This leaves us with: x = (x - quadrant * 1.5703125) - quadrant * (PI / 2 - 1.5703125).
885 // PI / 2 - 1.5703125 in hexadecimal is 0x39fdaa22, stripping the lowest 12 bits we get 0x39fda000 (= 0.0004837512969970703125)
886 // This leaves uw with: x = ((x - quadrant * 1.5703125) - quadrant * 0.0004837512969970703125) - quadrant * (PI / 2 - 1.5703125 - 0.0004837512969970703125)
887 // See: https://stackoverflow.com/questions/42455143/sine-cosine-modular-extended-precision-arithmetic
888 // After this we have x in the range [-PI / 4, PI / 4].
889 Vec4 float_quadrant = quadrant.ToFloat();
890 x = ((x - float_quadrant * 1.5703125f) - float_quadrant * 0.0004837512969970703125f) - float_quadrant * 7.549789948768648e-8f;
891
892 // Calculate x2 = x^2
893 Vec4 x2 = x * x;
894
895 // Taylor expansion:
896 // Cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8! + ... = (((x2/8!- 1/6!) * x2 + 1/4!) * x2 - 1/2!) * x2 + 1
897 Vec4 taylor_cos = ((2.443315711809948e-5f * x2 - Vec4::sReplicate(1.388731625493765e-3f)) * x2 + Vec4::sReplicate(4.166664568298827e-2f)) * x2 * x2 - 0.5f * x2 + Vec4::sOne();
898 // Sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... = ((-x2/7! + 1/5!) * x2 - 1/3!) * x2 * x + x
899 Vec4 taylor_sin = ((-1.9515295891e-4f * x2 + Vec4::sReplicate(8.3321608736e-3f)) * x2 - Vec4::sReplicate(1.6666654611e-1f)) * x2 * x + x;
900
901 // The lowest 2 bits of quadrant indicate the quadrant that we are in.
902 // Let x be the original input value and x' our value that has been mapped to the range [-PI / 4, PI / 4].
903 // since cos(x) = sin(x - PI / 2) and since we want to use the Taylor expansion as close as possible to 0,
904 // we can alternate between using the Taylor expansion for sin and cos according to the following table:
905 //
906 // quadrant sin(x) cos(x)
907 // XXX00b sin(x') cos(x')
908 // XXX01b cos(x') -sin(x')
909 // XXX10b -sin(x') -cos(x')
910 // XXX11b -cos(x') sin(x')
911 //
912 // So: sin_sign = bit2, cos_sign = bit1 ^ bit2, bit1 determines if we use sin or cos Taylor expansion
913 UVec4 bit1 = quadrant.LogicalShiftLeft<31>();
914 UVec4 bit2 = UVec4::sAnd(quadrant.LogicalShiftLeft<30>(), UVec4::sReplicate(0x80000000U));
915
916 // Select which one of the results is sin and which one is cos
917 Vec4 s = Vec4::sSelect(taylor_sin, taylor_cos, bit1);
918 Vec4 c = Vec4::sSelect(taylor_cos, taylor_sin, bit1);
919
920 // Update the signs
921 sin_sign = UVec4::sXor(sin_sign, bit2);
922 UVec4 cos_sign = UVec4::sXor(bit1, bit2);
923
924 // Correct the signs
925 outSin = Vec4::sXor(s, sin_sign.ReinterpretAsFloat());
926 outCos = Vec4::sXor(c, cos_sign.ReinterpretAsFloat());
927}
928
930{
931 // Implementation based on tanf.c from the cephes library, see Vec4::SinCos for further details
932 // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
933
934 // Make argument positive
935 UVec4 tan_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
936 Vec4 x = Vec4::sXor(*this, tan_sign.ReinterpretAsFloat());
937
938 // x / (PI / 2) rounded to nearest int gives us the quadrant closest to x
939 UVec4 quadrant = (0.6366197723675814f * x + Vec4::sReplicate(0.5f)).ToInt();
940
941 // Remap x to range [-PI / 4, PI / 4], see Vec4::SinCos
942 Vec4 float_quadrant = quadrant.ToFloat();
943 x = ((x - float_quadrant * 1.5703125f) - float_quadrant * 0.0004837512969970703125f) - float_quadrant * 7.549789948768648e-8f;
944
945 // Calculate x2 = x^2
946 Vec4 x2 = x * x;
947
948 // Roughly equivalent to the Taylor expansion:
949 // Tan(x) = x + x^3/3 + 2*x^5/15 + 17*x^7/315 + 62*x^9/2835 + ...
950 Vec4 tan =
951 (((((9.38540185543e-3f * x2 + Vec4::sReplicate(3.11992232697e-3f)) * x2 + Vec4::sReplicate(2.44301354525e-2f)) * x2
952 + Vec4::sReplicate(5.34112807005e-2f)) * x2 + Vec4::sReplicate(1.33387994085e-1f)) * x2 + Vec4::sReplicate(3.33331568548e-1f)) * x2 * x + x;
953
954 // For the 2nd and 4th quadrant we need to invert the value
955 UVec4 bit1 = quadrant.LogicalShiftLeft<31>();
956 tan = Vec4::sSelect(tan, Vec4::sReplicate(-1.0f) / (tan JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(+ Vec4::sReplicate(FLT_MIN))), bit1); // Add small epsilon to prevent div by zero, works because tan is always positive
957
958 // Put the sign back
959 return Vec4::sXor(tan, tan_sign.ReinterpretAsFloat());
960}
961
963{
964 // Implementation based on asinf.c from the cephes library
965 // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
966
967 // Make argument positive
968 UVec4 asin_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
969 Vec4 a = Vec4::sXor(*this, asin_sign.ReinterpretAsFloat());
970
971 // ASin is not defined outside the range [-1, 1] but it often happens that a value is slightly above 1 so we just clamp here
972 a = Vec4::sMin(a, Vec4::sOne());
973
974 // When |x| <= 0.5 we use the asin approximation as is
975 Vec4 z1 = a * a;
976 Vec4 x1 = a;
977
978 // When |x| > 0.5 we use the identity asin(x) = PI / 2 - 2 * asin(sqrt((1 - x) / 2))
979 Vec4 z2 = 0.5f * (Vec4::sOne() - a);
980 Vec4 x2 = z2.Sqrt();
981
982 // Select which of the two situations we have
983 UVec4 greater = Vec4::sGreater(a, Vec4::sReplicate(0.5f));
984 Vec4 z = Vec4::sSelect(z1, z2, greater);
985 Vec4 x = Vec4::sSelect(x1, x2, greater);
986
987 // Polynomial approximation of asin
988 z = ((((4.2163199048e-2f * z + Vec4::sReplicate(2.4181311049e-2f)) * z + Vec4::sReplicate(4.5470025998e-2f)) * z + Vec4::sReplicate(7.4953002686e-2f)) * z + Vec4::sReplicate(1.6666752422e-1f)) * z * x + x;
989
990 // If |x| > 0.5 we need to apply the remainder of the identity above
991 z = Vec4::sSelect(z, Vec4::sReplicate(0.5f * JPH_PI) - (z + z), greater);
992
993 // Put the sign back
994 return Vec4::sXor(z, asin_sign.ReinterpretAsFloat());
995}
996
998{
999 // Not the most accurate, but simple
1000 return Vec4::sReplicate(0.5f * JPH_PI) - ASin();
1001}
1002
1004{
1005 // Implementation based on atanf.c from the cephes library
1006 // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
1007
1008 // Make argument positive
1009 UVec4 atan_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
1010 Vec4 x = Vec4::sXor(*this, atan_sign.ReinterpretAsFloat());
1011 Vec4 y = Vec4::sZero();
1012
1013 // If x > Tan(PI / 8)
1014 UVec4 greater1 = Vec4::sGreater(x, Vec4::sReplicate(0.4142135623730950f));
1015 Vec4 x1 = (x - Vec4::sOne()) / (x + Vec4::sOne());
1016
1017 // If x > Tan(3 * PI / 8)
1018 UVec4 greater2 = Vec4::sGreater(x, Vec4::sReplicate(2.414213562373095f));
1019 Vec4 x2 = Vec4::sReplicate(-1.0f) / (x JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(+ Vec4::sReplicate(FLT_MIN))); // Add small epsilon to prevent div by zero, works because x is always positive
1020
1021 // Apply first if
1022 x = Vec4::sSelect(x, x1, greater1);
1023 y = Vec4::sSelect(y, Vec4::sReplicate(0.25f * JPH_PI), greater1);
1024
1025 // Apply second if
1026 x = Vec4::sSelect(x, x2, greater2);
1027 y = Vec4::sSelect(y, Vec4::sReplicate(0.5f * JPH_PI), greater2);
1028
1029 // Polynomial approximation
1030 Vec4 z = x * x;
1031 y += (((8.05374449538e-2f * z - Vec4::sReplicate(1.38776856032e-1f)) * z + Vec4::sReplicate(1.99777106478e-1f)) * z - Vec4::sReplicate(3.33329491539e-1f)) * z * x + x;
1032
1033 // Put the sign back
1034 return Vec4::sXor(y, atan_sign.ReinterpretAsFloat());
1035}
1036
1038{
1039 UVec4 sign_mask = UVec4::sReplicate(0x80000000U);
1040
1041 // Determine absolute value and sign of y
1042 UVec4 y_sign = UVec4::sAnd(inY.ReinterpretAsInt(), sign_mask);
1043 Vec4 y_abs = Vec4::sXor(inY, y_sign.ReinterpretAsFloat());
1044
1045 // Determine absolute value and sign of x
1046 UVec4 x_sign = UVec4::sAnd(inX.ReinterpretAsInt(), sign_mask);
1047 Vec4 x_abs = Vec4::sXor(inX, x_sign.ReinterpretAsFloat());
1048
1049 // Always divide smallest / largest to avoid dividing by zero
1050 UVec4 x_is_numerator = Vec4::sLess(x_abs, y_abs);
1051 Vec4 numerator = Vec4::sSelect(y_abs, x_abs, x_is_numerator);
1052 Vec4 denominator = Vec4::sSelect(x_abs, y_abs, x_is_numerator);
1053 Vec4 atan = (numerator / denominator).ATan();
1054
1055 // If we calculated x / y instead of y / x the result is PI / 2 - result (note that this is true because we know the result is positive because the input was positive)
1056 atan = Vec4::sSelect(atan, Vec4::sReplicate(0.5f * JPH_PI) - atan, x_is_numerator);
1057
1058 // Now we need to map to the correct quadrant
1059 // x_sign y_sign result
1060 // +1 +1 atan
1061 // -1 +1 -atan + PI
1062 // -1 -1 atan - PI
1063 // +1 -1 -atan
1064 // This can be written as: x_sign * y_sign * (atan - (x_sign < 0? PI : 0))
1065 atan -= Vec4::sAnd(x_sign.ArithmeticShiftRight<31>().ReinterpretAsFloat(), Vec4::sReplicate(JPH_PI));
1066 atan = Vec4::sXor(atan, UVec4::sXor(x_sign, y_sign).ReinterpretAsFloat());
1067 return atan;
1068}
1069
1071{
1072 constexpr float cOneOverSqrt2 = 0.70710678f;
1073 constexpr uint cNumBits = 9;
1074 constexpr uint cMask = (1 << cNumBits) - 1;
1075
1076 // Store sign bit
1077 Vec4 v = *this;
1078 uint32 max_element = v.Abs().GetHighestComponentIndex();
1079 uint32 value = 0;
1080 if (v[max_element] < 0.0f)
1081 {
1082 value = 0x80000000u;
1083 v = -v;
1084 }
1085
1086 // Store highest component
1087 value |= max_element << 29;
1088
1089 // Store the other three components in a compressed format
1090 UVec4 compressed = Vec4::sClamp((v + Vec4::sReplicate(cOneOverSqrt2)) * (float(cMask) / (2.0f * cOneOverSqrt2)) + Vec4::sReplicate(0.5f), Vec4::sZero(), Vec4::sReplicate(cMask)).ToInt();
1091 switch (max_element)
1092 {
1093 case 0:
1094 compressed = compressed.Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED>();
1095 break;
1096
1097 case 1:
1098 compressed = compressed.Swizzle<SWIZZLE_X, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED>();
1099 break;
1100
1101 case 2:
1102 compressed = compressed.Swizzle<SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W, SWIZZLE_UNUSED>();
1103 break;
1104 }
1105
1106 value |= compressed.GetX();
1107 value |= compressed.GetY() << cNumBits;
1108 value |= compressed.GetZ() << 2 * cNumBits;
1109 return value;
1110}
1111
1113{
1114 constexpr float cOneOverSqrt2 = 0.70710678f;
1115 constexpr uint cNumBits = 9;
1116 constexpr uint cMask = (1u << cNumBits) - 1;
1117
1118 // Restore three components
1119 Vec4 v = Vec4(UVec4(inValue & cMask, (inValue >> cNumBits) & cMask, (inValue >> (2 * cNumBits)) & cMask, 0).ToFloat()) * (2.0f * cOneOverSqrt2 / float(cMask)) - Vec4(cOneOverSqrt2, cOneOverSqrt2, cOneOverSqrt2, 0.0f);
1120 JPH_ASSERT(v.GetW() == 0.0f);
1121
1122 // Restore the highest component
1123 v.SetW(sqrt(max(1.0f - v.LengthSq(), 0.0f)));
1124
1125 // Extract sign
1126 if ((inValue & 0x80000000u) != 0)
1127 v = -v;
1128
1129 // Swizzle the components in place
1130 switch ((inValue >> 29) & 3)
1131 {
1132 case 0:
1134 break;
1135
1136 case 1:
1138 break;
1139
1140 case 2:
1142 break;
1143 }
1144
1145 return v;
1146}
1147
std::uint8_t uint8
Definition Core.h:493
std::uint64_t uint64
Definition Core.h:496
unsigned int uint
Definition Core.h:492
#define JPH_NAMESPACE_END
Definition Core.h:419
std::uint32_t uint32
Definition Core.h:495
#define JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(...)
Definition Core.h:560
#define JPH_NAMESPACE_BEGIN
Definition Core.h:413
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
uint CountTrailingZeros(uint32 inValue)
Compute number of trailing zero bits (how many low bits are zero)
Definition Math.h:98
JPH_INLINE To BitCast(const From &inValue)
Definition Math.h:192
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_W
Use the W component.
Definition Swizzle.h:15
@ SWIZZLE_X
Use the X component.
Definition Swizzle.h:12
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec4 operator*(float inV1, Vec4Arg inV2)
Multiply vector with float.
Definition Vec4.inl:422
Class that holds 4 float values. Convert to Vec4 to perform calculations.
Definition Float4.h:11
float x
Definition Float4.h:26
float y
Definition Float4.h:27
float z
Definition Float4.h:28
float w
Definition Float4.h:29
Definition UVec4.h:12
JPH_INLINE UVec4 Swizzle() const
Swizzle the elements in inV.
JPH_INLINE uint32 GetZ() const
Definition UVec4.h:104
JPH_INLINE UVec4 LogicalShiftLeft() const
Shift all components by Count bits to the left (filling with zeros from the left)
static JPH_INLINE UVec4 sSelect(UVec4Arg inNotSet, UVec4Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition UVec4.inl:157
JPH_INLINE uint32 GetY() const
Definition UVec4.h:103
static JPH_INLINE UVec4 sReplicate(uint32 inV)
Replicate int inV across all components.
Definition UVec4.inl:56
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition UVec4.inl:463
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:202
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:174
JPH_INLINE uint32 GetW() const
Definition UVec4.h:105
Type mValue
Definition UVec4.h:223
JPH_INLINE uint32 GetX() const
Get individual components.
Definition UVec4.h:102
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:188
JPH_INLINE UVec4 ArithmeticShiftRight() const
Shift all components by Count bits to the right (shifting in the value of the highest bit)
JPH_INLINE Vec4 ToFloat() const
Convert each component from an int to a float.
Definition UVec4.inl:356
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:367
uint32 mU32[4]
Definition UVec4.h:224
Definition Vec3.h:17
Type mValue
Definition Vec3.h:299
float mF32[4]
Definition Vec3.h:300
Definition Vec4.h:14
JPH_INLINE bool IsNearZero(float inMaxDistSq=1.0e-12f) const
Test if vector is near zero.
Definition Vec4.inl:372
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec4.inl:573
static JPH_INLINE void sSort4(Vec4 &ioValue, UVec4 &ioIndex)
Definition Vec4.inl:314
Vec4 ATan() const
Calculate the arc tangent for each element of this vector (returns value in the range [-PI / 2,...
Definition Vec4.inl:1003
static JPH_INLINE UVec4 sGreater(Vec4Arg inV1, Vec4Arg inV2)
Greater than (component wise)
Definition Vec4.inl:218
float mF32[4]
Definition Vec4.h:312
JPH_INLINE Vec3 SplatW3() const
Replicate the W component to all components.
Definition Vec4.inl:650
JPH_INLINE Vec4 operator-() const
Negate.
Definition Vec4.inl:513
Vec4()=default
Constructor.
static JPH_INLINE Vec4 sAnd(Vec4Arg inV1, Vec4Arg inV2)
Logical and (component wise)
Definition Vec4.inl:303
static JPH_INLINE Vec4 sLoadFloat4Aligned(const Float4 *inV)
Load 4 floats from memory, 16 bytes aligned.
Definition Vec4.inl:106
static Vec4 sATan2(Vec4Arg inY, Vec4Arg inX)
Calculate the arc tangent of y / x using the signs of the arguments to determine the correct quadrant...
Definition Vec4.inl:1037
JPH_INLINE void SetW(float inW)
Definition Vec4.h:129
JPH_INLINE Vec4 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec4.inl:764
Vec4 ASin() const
Definition Vec4.inl:962
JPH_INLINE Vec4 FlipSign() const
Flips the signs of the components, e.g. FlipSign<-1, 1, -1, 1>() will flip the signs of the X and Z c...
Definition Vec4.inl:785
static JPH_INLINE Vec4 sXor(Vec4Arg inV1, Vec4Arg inV2)
Logical xor (component wise)
Definition Vec4.inl:292
JPH_INLINE Vec4 Abs() const
Return the absolute value of each of the components.
Definition Vec4.inl:681
JPH_INLINE Vec4 operator/(float inV2) const
Divide vector by float.
Definition Vec4.inl:436
Vec4 Tan() const
Calculate the tangent for each element of this vector (input in radians)
Definition Vec4.inl:929
JPH_INLINE float GetW() const
Definition Vec4.h:122
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec4.inl:819
JPH_INLINE Vec4 & operator+=(Vec4Arg inV2)
Add two float vectors (component wise)
Definition Vec4.inl:500
static JPH_INLINE UVec4 sLessOrEqual(Vec4Arg inV1, Vec4Arg inV2)
Less than or equal (component wise)
Definition Vec4.inl:204
static JPH_INLINE UVec4 sLess(Vec4Arg inV1, Vec4Arg inV2)
Less than (component wise)
Definition Vec4.inl:190
JPH_INLINE int GetLowestComponentIndex() const
Get index of component with lowest value.
Definition Vec4.inl:661
JPH_INLINE float Length() const
Length of vector.
Definition Vec4.inl:738
static JPH_INLINE void sSort4Reverse(Vec4 &ioValue, UVec4 &ioIndex)
Definition Vec4.inl:338
static JPH_INLINE Vec4 sOne()
Vector with all ones.
Definition Vec4.inl:85
static JPH_INLINE Vec4 sFusedMultiplyAdd(Vec4Arg inMul1, Vec4Arg inMul2, Vec4Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec4.inl:246
JPH_INLINE Vec4 Normalized() const
Normalize vector.
Definition Vec4.inl:794
static JPH_INLINE UVec4 sEquals(Vec4Arg inV1, Vec4Arg inV2)
Equals (component wise)
Definition Vec4.inl:176
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y, Z and W.
Definition Vec4.inl:860
JPH_INLINE Vec4 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec4.inl:694
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec4.inl:584
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec4 as a UVec4 (doesn't change the bits)
Definition Vec4.inl:830
static JPH_INLINE UVec4 sGreaterOrEqual(Vec4Arg inV1, Vec4Arg inV2)
Greater than or equal (component wise)
Definition Vec4.inl:232
static JPH_INLINE Vec4 sMin(Vec4Arg inV1, Vec4Arg inV2)
Return the minimum value of each of the components.
Definition Vec4.inl:143
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec4.inl:595
JPH_INLINE Vec4 Sqrt() const
Component wise square root.
Definition Vec4.inl:752
JPH_INLINE Vec4 & operator*=(float inV2)
Multiply vector with float.
Definition Vec4.inl:447
static JPH_INLINE Vec4 sGatherFloat4(const float *inBase, UVec4Arg inOffsets)
Gather 4 floats from memory at inBase + inOffsets[i] * Scale.
JPH_INLINE Vec4 operator+(Vec4Arg inV2) const
Add two float vectors (component wise)
Definition Vec4.inl:486
JPH_INLINE Vec4 & operator/=(float inV2)
Divide vector by float.
Definition Vec4.inl:473
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec4.inl:377
JPH_INLINE bool operator==(Vec4Arg inV2) const
Comparison.
Definition Vec4.inl:362
JPH_INLINE Vec4 SplatW() const
Replicate the W component to all components.
Definition Vec4.inl:606
JPH_INLINE Vec4 DotV(Vec4Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition Vec4.inl:699
JPH_INLINE bool IsClose(Vec4Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec4.inl:367
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:119
static JPH_INLINE Vec4 sLoadFloat4(const Float4 *inV)
Load 4 floats from memory.
Definition Vec4.inl:95
static JPH_INLINE Vec4 sZero()
Vector with all zeros.
Definition Vec4.inl:63
JPH_INLINE Vec4 Swizzle() const
Swizzle the elements in inV.
struct { float mData[4];} Type
Definition Vec4.h:24
static JPH_INLINE Vec4 sOr(Vec4Arg inV1, Vec4Arg inV2)
Logical or (component wise)
Definition Vec4.inl:281
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y, Z and W.
Definition Vec4.inl:853
Type mValue
Definition Vec4.h:311
static JPH_INLINE Vec4 sDecompressUnitVector(uint32 inValue)
Decompress a unit vector from a 32 bit value.
Definition Vec4.inl:1112
JPH_INLINE uint32 CompressUnitVector() const
Compress a unit vector to a 32 bit value, precision is around 0.5 * 10^-3.
Definition Vec4.inl:1070
JPH_INLINE Vec4 & operator-=(Vec4Arg inV2)
Subtract two float vectors (component wise)
Definition Vec4.inl:546
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec4.inl:725
static JPH_INLINE Vec4 sMax(Vec4Arg inV1, Vec4Arg inV2)
Return the maximum of each of the components.
Definition Vec4.inl:157
JPH_INLINE float Dot(Vec4Arg inV2) const
Dot product.
Definition Vec4.inl:712
JPH_INLINE Vec3 SplatZ3() const
Replicate the Z component to all components.
Definition Vec4.inl:639
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec4.inl:382
JPH_INLINE Vec3 SplatX3() const
Replicate the X component to all components.
Definition Vec4.inl:617
static JPH_INLINE Vec4 sNaN()
Vector with all NaN's.
Definition Vec4.inl:90
Vec4 ACos() const
Definition Vec4.inl:997
static JPH_INLINE Vec4 sSelect(Vec4Arg inNotSet, Vec4Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition Vec4.inl:264
JPH_INLINE int GetSignBits() const
Store if X is negative in bit 0, Y in bit 1, Z in bit 2 and W in bit 3.
Definition Vec4.inl:841
JPH_INLINE int GetHighestComponentIndex() const
Get index of component with highest value.
Definition Vec4.inl:671
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:74
JPH_INLINE Vec3 SplatY3() const
Replicate the Y component to all components.
Definition Vec4.inl:628
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:867
JPH_INLINE void StoreFloat4(Float4 *outV) const
Store 4 floats to memory.
Definition Vec4.inl:807
static JPH_INLINE Vec4 sClamp(Vec4Arg inV, Vec4Arg inMin, Vec4Arg inMax)
Clamp a vector between min and max (component wise)
Definition Vec4.inl:171
friend JPH_INLINE Vec4 operator*(float inV1, Vec4Arg inV2)
Multiply vector with float.
Definition Vec4.inl:422