Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Vec4.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
6#include <Jolt/Math/Vec3.h>
7#include <Jolt/Math/UVec4.h>
8
10
11// Constructor
13 mValue(inRHS.mValue)
14{
15}
16
17Vec4::Vec4(Vec3Arg inRHS, float inW)
18{
19#if defined(JPH_USE_SSE4_1)
20 mValue = _mm_blend_ps(inRHS.mValue, _mm_set1_ps(inW), 8);
21#elif defined(JPH_USE_NEON)
22 mValue = vsetq_lane_f32(inW, inRHS.mValue, 3);
23#else
24 for (int i = 0; i < 3; i++)
25 mF32[i] = inRHS.mF32[i];
26 mF32[3] = inW;
27#endif
28}
29
30Vec4::Vec4(float inX, float inY, float inZ, float inW)
31{
32#if defined(JPH_USE_SSE)
33 mValue = _mm_set_ps(inW, inZ, inY, inX);
34#elif defined(JPH_USE_NEON)
35 uint32x2_t xy = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inX)) | (static_cast<uint64>(BitCast<uint32>(inY)) << 32));
36 uint32x2_t zw = vcreate_u32(static_cast<uint64>(BitCast<uint32>(inZ)) | (static_cast<uint64>(BitCast<uint32>(inW)) << 32));
37 mValue = vreinterpretq_f32_u32(vcombine_u32(xy, zw));
38#else
39 mF32[0] = inX;
40 mF32[1] = inY;
41 mF32[2] = inZ;
42 mF32[3] = inW;
43#endif
44}
45
46template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
48{
49 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
50 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
51 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
52 static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
53
54#if defined(JPH_USE_SSE)
55 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
56#elif defined(JPH_USE_NEON)
57 return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
58#else
59 return Vec4(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ], mF32[SwizzleW]);
60#endif
61}
62
64{
65#if defined(JPH_USE_SSE)
66 return _mm_setzero_ps();
67#elif defined(JPH_USE_NEON)
68 return vdupq_n_f32(0);
69#else
70 return Vec4(0, 0, 0, 0);
71#endif
72}
73
75{
76#if defined(JPH_USE_SSE)
77 return _mm_set1_ps(inV);
78#elif defined(JPH_USE_NEON)
79 return vdupq_n_f32(inV);
80#else
81 return Vec4(inV, inV, inV, inV);
82#endif
83}
84
86{
87 return sReplicate(numeric_limits<float>::quiet_NaN());
88}
89
91{
92#if defined(JPH_USE_SSE)
93 return _mm_loadu_ps(&inV->x);
94#elif defined(JPH_USE_NEON)
95 return vld1q_f32(&inV->x);
96#else
97 return Vec4(inV->x, inV->y, inV->z, inV->w);
98#endif
99}
100
102{
103#if defined(JPH_USE_SSE)
104 return _mm_load_ps(&inV->x);
105#elif defined(JPH_USE_NEON)
106 return vld1q_f32(&inV->x);
107#else
108 return Vec4(inV->x, inV->y, inV->z, inV->w);
109#endif
110}
111
112template <const int Scale>
113Vec4 Vec4::sGatherFloat4(const float *inBase, UVec4Arg inOffsets)
114{
115#if defined(JPH_USE_SSE)
116 #ifdef JPH_USE_AVX2
117 return _mm_i32gather_ps(inBase, inOffsets.mValue, Scale);
118 #else
119 const uint8 *base = reinterpret_cast<const uint8 *>(inBase);
120 Type x = _mm_load_ss(reinterpret_cast<const float *>(base + inOffsets.GetX() * Scale));
121 Type y = _mm_load_ss(reinterpret_cast<const float *>(base + inOffsets.GetY() * Scale));
122 Type xy = _mm_unpacklo_ps(x, y);
123 Type z = _mm_load_ss(reinterpret_cast<const float *>(base + inOffsets.GetZ() * Scale));
124 Type w = _mm_load_ss(reinterpret_cast<const float *>(base + inOffsets.GetW() * Scale));
125 Type zw = _mm_unpacklo_ps(z, w);
126 return _mm_movelh_ps(xy, zw);
127 #endif
128#else
129 const uint8 *base = reinterpret_cast<const uint8 *>(inBase);
130 float x = *reinterpret_cast<const float *>(base + inOffsets.GetX() * Scale);
131 float y = *reinterpret_cast<const float *>(base + inOffsets.GetY() * Scale);
132 float z = *reinterpret_cast<const float *>(base + inOffsets.GetZ() * Scale);
133 float w = *reinterpret_cast<const float *>(base + inOffsets.GetW() * Scale);
134 return Vec4(x, y, z, w);
135#endif
136}
137
139{
140#if defined(JPH_USE_SSE)
141 return _mm_min_ps(inV1.mValue, inV2.mValue);
142#elif defined(JPH_USE_NEON)
143 return vminq_f32(inV1.mValue, inV2.mValue);
144#else
145 return Vec4(min(inV1.mF32[0], inV2.mF32[0]),
146 min(inV1.mF32[1], inV2.mF32[1]),
147 min(inV1.mF32[2], inV2.mF32[2]),
148 min(inV1.mF32[3], inV2.mF32[3]));
149#endif
150}
151
153{
154#if defined(JPH_USE_SSE)
155 return _mm_max_ps(inV1.mValue, inV2.mValue);
156#elif defined(JPH_USE_NEON)
157 return vmaxq_f32(inV1.mValue, inV2.mValue);
158#else
159 return Vec4(max(inV1.mF32[0], inV2.mF32[0]),
160 max(inV1.mF32[1], inV2.mF32[1]),
161 max(inV1.mF32[2], inV2.mF32[2]),
162 max(inV1.mF32[3], inV2.mF32[3]));
163#endif
164}
165
167{
168#if defined(JPH_USE_SSE)
169 return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
170#elif defined(JPH_USE_NEON)
171 return vceqq_f32(inV1.mValue, inV2.mValue);
172#else
173 return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
174 inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
175 inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0,
176 inV1.mF32[3] == inV2.mF32[3]? 0xffffffffu : 0);
177#endif
178}
179
181{
182#if defined(JPH_USE_SSE)
183 return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
184#elif defined(JPH_USE_NEON)
185 return vcltq_f32(inV1.mValue, inV2.mValue);
186#else
187 return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
188 inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
189 inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0,
190 inV1.mF32[3] < inV2.mF32[3]? 0xffffffffu : 0);
191#endif
192}
193
195{
196#if defined(JPH_USE_SSE)
197 return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
198#elif defined(JPH_USE_NEON)
199 return vcleq_f32(inV1.mValue, inV2.mValue);
200#else
201 return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
202 inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
203 inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0,
204 inV1.mF32[3] <= inV2.mF32[3]? 0xffffffffu : 0);
205#endif
206}
207
209{
210#if defined(JPH_USE_SSE)
211 return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
212#elif defined(JPH_USE_NEON)
213 return vcgtq_f32(inV1.mValue, inV2.mValue);
214#else
215 return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
216 inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
217 inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0,
218 inV1.mF32[3] > inV2.mF32[3]? 0xffffffffu : 0);
219#endif
220}
221
223{
224#if defined(JPH_USE_SSE)
225 return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
226#elif defined(JPH_USE_NEON)
227 return vcgeq_f32(inV1.mValue, inV2.mValue);
228#else
229 return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
230 inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
231 inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0,
232 inV1.mF32[3] >= inV2.mF32[3]? 0xffffffffu : 0);
233#endif
234}
235
237{
238#if defined(JPH_USE_SSE)
239 #ifdef JPH_USE_FMADD
240 return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
241 #else
242 return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
243 #endif
244#elif defined(JPH_USE_NEON)
245 return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
246#else
247 return Vec4(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
248 inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
249 inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2],
250 inMul1.mF32[3] * inMul2.mF32[3] + inAdd.mF32[3]);
251#endif
252}
253
254Vec4 Vec4::sSelect(Vec4Arg inNotSet, Vec4Arg inSet, UVec4Arg inControl)
255{
256#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
257 return _mm_blendv_ps(inNotSet.mValue, inSet.mValue, _mm_castsi128_ps(inControl.mValue));
258#elif defined(JPH_USE_SSE)
259 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.mValue, 31));
260 return _mm_or_ps(_mm_and_ps(is_set, inSet.mValue), _mm_andnot_ps(is_set, inNotSet.mValue));
261#elif defined(JPH_USE_NEON)
262 return vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inSet.mValue, inNotSet.mValue);
263#else
264 Vec4 result;
265 for (int i = 0; i < 4; i++)
266 result.mF32[i] = (inControl.mU32[i] & 0x80000000u) ? inSet.mF32[i] : inNotSet.mF32[i];
267 return result;
268#endif
269}
270
272{
273#if defined(JPH_USE_SSE)
274 return _mm_or_ps(inV1.mValue, inV2.mValue);
275#elif defined(JPH_USE_NEON)
276 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
277#else
279#endif
280}
281
283{
284#if defined(JPH_USE_SSE)
285 return _mm_xor_ps(inV1.mValue, inV2.mValue);
286#elif defined(JPH_USE_NEON)
287 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
288#else
290#endif
291}
292
294{
295#if defined(JPH_USE_SSE)
296 return _mm_and_ps(inV1.mValue, inV2.mValue);
297#elif defined(JPH_USE_NEON)
298 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(inV1.mValue), vreinterpretq_u32_f32(inV2.mValue)));
299#else
301#endif
302}
303
304void Vec4::sSort4(Vec4 &ioValue, UVec4 &ioIndex)
305{
306 // Pass 1, test 1st vs 3rd, 2nd vs 4th
309 UVec4 c1 = sLess(ioValue, v1).Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W>();
310 ioValue = sSelect(ioValue, v1, c1);
311 ioIndex = UVec4::sSelect(ioIndex, i1, c1);
312
313 // Pass 2, test 1st vs 2nd, 3rd vs 4th
316 UVec4 c2 = sLess(ioValue, v2).Swizzle<SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_W, SWIZZLE_W>();
317 ioValue = sSelect(ioValue, v2, c2);
318 ioIndex = UVec4::sSelect(ioIndex, i2, c2);
319
320 // Pass 3, test 2nd vs 3rd component
323 UVec4 c3 = sLess(ioValue, v3).Swizzle<SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_W>();
324 ioValue = sSelect(ioValue, v3, c3);
325 ioIndex = UVec4::sSelect(ioIndex, i3, c3);
326}
327
328void Vec4::sSort4Reverse(Vec4 &ioValue, UVec4 &ioIndex)
329{
330 // Pass 1, test 1st vs 3rd, 2nd vs 4th
334 ioValue = sSelect(ioValue, v1, c1);
335 ioIndex = UVec4::sSelect(ioIndex, i1, c1);
336
337 // Pass 2, test 1st vs 2nd, 3rd vs 4th
341 ioValue = sSelect(ioValue, v2, c2);
342 ioIndex = UVec4::sSelect(ioIndex, i2, c2);
343
344 // Pass 3, test 2nd vs 3rd component
348 ioValue = sSelect(ioValue, v3, c3);
349 ioIndex = UVec4::sSelect(ioIndex, i3, c3);
350}
351
353{
354 return sEquals(*this, inV2).TestAllTrue();
355}
356
357bool Vec4::IsClose(Vec4Arg inV2, float inMaxDistSq) const
358{
359 return (inV2 - *this).LengthSq() <= inMaxDistSq;
360}
361
362bool Vec4::IsNormalized(float inTolerance) const
363{
364 return abs(LengthSq() - 1.0f) <= inTolerance;
365}
366
367bool Vec4::IsNaN() const
368{
369#if defined(JPH_USE_AVX512)
370 return _mm_fpclass_ps_mask(mValue, 0b10000001) != 0;
371#elif defined(JPH_USE_SSE)
372 return _mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) != 0;
373#elif defined(JPH_USE_NEON)
374 uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
375 return vaddvq_u32(vshrq_n_u32(is_equal, 31)) != 4;
376#else
377 return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]) || isnan(mF32[3]);
378#endif
379}
380
382{
383#if defined(JPH_USE_SSE)
384 return _mm_mul_ps(mValue, inV2.mValue);
385#elif defined(JPH_USE_NEON)
386 return vmulq_f32(mValue, inV2.mValue);
387#else
388 return Vec4(mF32[0] * inV2.mF32[0],
389 mF32[1] * inV2.mF32[1],
390 mF32[2] * inV2.mF32[2],
391 mF32[3] * inV2.mF32[3]);
392#endif
393}
394
395Vec4 Vec4::operator * (float inV2) const
396{
397#if defined(JPH_USE_SSE)
398 return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
399#elif defined(JPH_USE_NEON)
400 return vmulq_n_f32(mValue, inV2);
401#else
402 return Vec4(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2, mF32[3] * inV2);
403#endif
404}
405
407Vec4 operator * (float inV1, Vec4Arg inV2)
408{
409#if defined(JPH_USE_SSE)
410 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
411#elif defined(JPH_USE_NEON)
412 return vmulq_n_f32(inV2.mValue, inV1);
413#else
414 return Vec4(inV1 * inV2.mF32[0],
415 inV1 * inV2.mF32[1],
416 inV1 * inV2.mF32[2],
417 inV1 * inV2.mF32[3]);
418#endif
419}
420
421Vec4 Vec4::operator / (float inV2) const
422{
423#if defined(JPH_USE_SSE)
424 return _mm_div_ps(mValue, _mm_set1_ps(inV2));
425#elif defined(JPH_USE_NEON)
426 return vdivq_f32(mValue, vdupq_n_f32(inV2));
427#else
428 return Vec4(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2, mF32[3] / inV2);
429#endif
430}
431
433{
434#if defined(JPH_USE_SSE)
435 mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
436#elif defined(JPH_USE_NEON)
437 mValue = vmulq_n_f32(mValue, inV2);
438#else
439 for (int i = 0; i < 4; ++i)
440 mF32[i] *= inV2;
441#endif
442 return *this;
443}
444
446{
447#if defined(JPH_USE_SSE)
448 mValue = _mm_mul_ps(mValue, inV2.mValue);
449#elif defined(JPH_USE_NEON)
450 mValue = vmulq_f32(mValue, inV2.mValue);
451#else
452 for (int i = 0; i < 4; ++i)
453 mF32[i] *= inV2.mF32[i];
454#endif
455 return *this;
456}
457
459{
460#if defined(JPH_USE_SSE)
461 mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
462#elif defined(JPH_USE_NEON)
463 mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
464#else
465 for (int i = 0; i < 4; ++i)
466 mF32[i] /= inV2;
467#endif
468 return *this;
469}
470
472{
473#if defined(JPH_USE_SSE)
474 return _mm_add_ps(mValue, inV2.mValue);
475#elif defined(JPH_USE_NEON)
476 return vaddq_f32(mValue, inV2.mValue);
477#else
478 return Vec4(mF32[0] + inV2.mF32[0],
479 mF32[1] + inV2.mF32[1],
480 mF32[2] + inV2.mF32[2],
481 mF32[3] + inV2.mF32[3]);
482#endif
483}
484
486{
487#if defined(JPH_USE_SSE)
488 mValue = _mm_add_ps(mValue, inV2.mValue);
489#elif defined(JPH_USE_NEON)
490 mValue = vaddq_f32(mValue, inV2.mValue);
491#else
492 for (int i = 0; i < 4; ++i)
493 mF32[i] += inV2.mF32[i];
494#endif
495 return *this;
496}
497
499{
500#if defined(JPH_USE_SSE)
501 return _mm_sub_ps(_mm_setzero_ps(), mValue);
502#elif defined(JPH_USE_NEON)
503 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
504 return vsubq_f32(vdupq_n_f32(0), mValue);
505 #else
506 return vnegq_f32(mValue);
507 #endif
508#else
509 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
510 return Vec4(0.0f - mF32[0], 0.0f - mF32[1], 0.0f - mF32[2], 0.0f - mF32[3]);
511 #else
512 return Vec4(-mF32[0], -mF32[1], -mF32[2], -mF32[3]);
513 #endif
514#endif
515}
516
518{
519#if defined(JPH_USE_SSE)
520 return _mm_sub_ps(mValue, inV2.mValue);
521#elif defined(JPH_USE_NEON)
522 return vsubq_f32(mValue, inV2.mValue);
523#else
524 return Vec4(mF32[0] - inV2.mF32[0],
525 mF32[1] - inV2.mF32[1],
526 mF32[2] - inV2.mF32[2],
527 mF32[3] - inV2.mF32[3]);
528#endif
529}
530
532{
533#if defined(JPH_USE_SSE)
534 mValue = _mm_sub_ps(mValue, inV2.mValue);
535#elif defined(JPH_USE_NEON)
536 mValue = vsubq_f32(mValue, inV2.mValue);
537#else
538 for (int i = 0; i < 4; ++i)
539 mF32[i] -= inV2.mF32[i];
540#endif
541 return *this;
542}
543
545{
546#if defined(JPH_USE_SSE)
547 return _mm_div_ps(mValue, inV2.mValue);
548#elif defined(JPH_USE_NEON)
549 return vdivq_f32(mValue, inV2.mValue);
550#else
551 return Vec4(mF32[0] / inV2.mF32[0],
552 mF32[1] / inV2.mF32[1],
553 mF32[2] / inV2.mF32[2],
554 mF32[3] / inV2.mF32[3]);
555#endif
556}
557
559{
560#if defined(JPH_USE_SSE)
561 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
562#elif defined(JPH_USE_NEON)
563 return vdupq_laneq_f32(mValue, 0);
564#else
565 return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
566#endif
567}
568
570{
571#if defined(JPH_USE_SSE)
572 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
573#elif defined(JPH_USE_NEON)
574 return vdupq_laneq_f32(mValue, 1);
575#else
576 return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
577#endif
578}
579
581{
582#if defined(JPH_USE_SSE)
583 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
584#elif defined(JPH_USE_NEON)
585 return vdupq_laneq_f32(mValue, 2);
586#else
587 return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
588#endif
589}
590
592{
593#if defined(JPH_USE_SSE)
594 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(3, 3, 3, 3));
595#elif defined(JPH_USE_NEON)
596 return vdupq_laneq_f32(mValue, 3);
597#else
598 return Vec4(mF32[3], mF32[3], mF32[3], mF32[3]);
599#endif
600}
601
603{
604#if defined(JPH_USE_AVX512)
605 return _mm_range_ps(mValue, mValue, 0b1000);
606#elif defined(JPH_USE_SSE)
607 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
608#elif defined(JPH_USE_NEON)
609 return vabsq_f32(mValue);
610#else
611 return Vec4(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]), abs(mF32[3]));
612#endif
613}
614
616{
617 return sReplicate(1.0f) / mValue;
618}
619
621{
622#if defined(JPH_USE_SSE4_1)
623 return _mm_dp_ps(mValue, inV2.mValue, 0xff);
624#elif defined(JPH_USE_NEON)
625 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
626 return vdupq_n_f32(vaddvq_f32(mul));
627#else
628 // Brackets placed so that the order is consistent with the vectorized version
629 return Vec4::sReplicate((mF32[0] * inV2.mF32[0] + mF32[1] * inV2.mF32[1]) + (mF32[2] * inV2.mF32[2] + mF32[3] * inV2.mF32[3]));
630#endif
631}
632
633float Vec4::Dot(Vec4Arg inV2) const
634{
635#if defined(JPH_USE_SSE4_1)
636 return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0xff));
637#elif defined(JPH_USE_NEON)
638 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
639 return vaddvq_f32(mul);
640#else
641 // Brackets placed so that the order is consistent with the vectorized version
642 return (mF32[0] * inV2.mF32[0] + mF32[1] * inV2.mF32[1]) + (mF32[2] * inV2.mF32[2] + mF32[3] * inV2.mF32[3]);
643#endif
644}
645
646float Vec4::LengthSq() const
647{
648#if defined(JPH_USE_SSE4_1)
649 return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0xff));
650#elif defined(JPH_USE_NEON)
651 float32x4_t mul = vmulq_f32(mValue, mValue);
652 return vaddvq_f32(mul);
653#else
654 // Brackets placed so that the order is consistent with the vectorized version
655 return (mF32[0] * mF32[0] + mF32[1] * mF32[1]) + (mF32[2] * mF32[2] + mF32[3] * mF32[3]);
656#endif
657}
658
659float Vec4::Length() const
660{
661#if defined(JPH_USE_SSE4_1)
662 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0xff)));
663#elif defined(JPH_USE_NEON)
664 float32x4_t mul = vmulq_f32(mValue, mValue);
665 float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
666 return vget_lane_f32(vsqrt_f32(sum), 0);
667#else
668 // Brackets placed so that the order is consistent with the vectorized version
669 return sqrt((mF32[0] * mF32[0] + mF32[1] * mF32[1]) + (mF32[2] * mF32[2] + mF32[3] * mF32[3]));
670#endif
671}
672
674{
675#if defined(JPH_USE_SSE)
676 return _mm_sqrt_ps(mValue);
677#elif defined(JPH_USE_NEON)
678 return vsqrtq_f32(mValue);
679#else
680 return Vec4(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]), sqrt(mF32[3]));
681#endif
682}
683
684
686{
687#if defined(JPH_USE_AVX512)
688 return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
689#elif defined(JPH_USE_SSE)
690 Type minus_one = _mm_set1_ps(-1.0f);
691 Type one = _mm_set1_ps(1.0f);
692 return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
693#elif defined(JPH_USE_NEON)
694 Type minus_one = vdupq_n_f32(-1.0f);
695 Type one = vdupq_n_f32(1.0f);
696 return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(mValue), vreinterpretq_u32_f32(minus_one)), vreinterpretq_u32_f32(one)));
697#else
698 return Vec4(std::signbit(mF32[0])? -1.0f : 1.0f,
699 std::signbit(mF32[1])? -1.0f : 1.0f,
700 std::signbit(mF32[2])? -1.0f : 1.0f,
701 std::signbit(mF32[3])? -1.0f : 1.0f);
702#endif
703}
704
706{
707#if defined(JPH_USE_SSE4_1)
708 return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0xff)));
709#elif defined(JPH_USE_NEON)
710 float32x4_t mul = vmulq_f32(mValue, mValue);
711 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
712 return vdivq_f32(mValue, vsqrtq_f32(sum));
713#else
714 return *this / Length();
715#endif
716}
717
718void Vec4::StoreFloat4(Float4 *outV) const
719{
720#if defined(JPH_USE_SSE)
721 _mm_storeu_ps(&outV->x, mValue);
722#elif defined(JPH_USE_NEON)
723 vst1q_f32(&outV->x, mValue);
724#else
725 for (int i = 0; i < 4; ++i)
726 (&outV->x)[i] = mF32[i];
727#endif
728}
729
731{
732#if defined(JPH_USE_SSE)
733 return _mm_cvttps_epi32(mValue);
734#elif defined(JPH_USE_NEON)
735 return vcvtq_u32_f32(mValue);
736#else
737 return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
738#endif
739}
740
742{
743#if defined(JPH_USE_SSE)
744 return UVec4(_mm_castps_si128(mValue));
745#elif defined(JPH_USE_NEON)
746 return vreinterpretq_u32_f32(mValue);
747#else
748 return *reinterpret_cast<const UVec4 *>(this);
749#endif
750}
751
753{
754#if defined(JPH_USE_SSE)
755 return _mm_movemask_ps(mValue);
756#elif defined(JPH_USE_NEON)
757 int32x4_t shift = JPH_NEON_INT32x4(0, 1, 2, 3);
758 return vaddvq_u32(vshlq_u32(vshrq_n_u32(vreinterpretq_u32_f32(mValue), 31), shift));
759#else
760 return (std::signbit(mF32[0])? 1 : 0) | (std::signbit(mF32[1])? 2 : 0) | (std::signbit(mF32[2])? 4 : 0) | (std::signbit(mF32[3])? 8 : 0);
761#endif
762}
763
764float Vec4::ReduceMin() const
765{
766 Vec4 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_W, SWIZZLE_UNUSED>());
768 return v.GetX();
769}
770
771float Vec4::ReduceMax() const
772{
773 Vec4 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_W, SWIZZLE_UNUSED>());
775 return v.GetX();
776}
777
778void Vec4::SinCos(Vec4 &outSin, Vec4 &outCos) const
779{
780 // Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function, changes octants to quadrants and vectorizes it
781 // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
782
783 // Make argument positive and remember sign for sin only since cos is symmetric around x (highest bit of a float is the sign bit)
784 UVec4 sin_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
785 Vec4 x = Vec4::sXor(*this, sin_sign.ReinterpretAsFloat());
786
787 // x / (PI / 2) rounded to nearest int gives us the quadrant closest to x
788 UVec4 quadrant = (0.6366197723675814f * x + Vec4::sReplicate(0.5f)).ToInt();
789
790 // Make x relative to the closest quadrant.
791 // This does x = x - quadrant * PI / 2 using a two step Cody-Waite argument reduction.
792 // This improves the accuracy of the result by avoiding loss of significant bits in the subtraction.
793 // We start with x = x - quadrant * PI / 2, PI / 2 in hexadecimal notation is 0x3fc90fdb, we remove the lowest 16 bits to
794 // get 0x3fc90000 (= 1.5703125) this means we can now multiply with a number of up to 2^16 without losing any bits.
795 // This leaves us with: x = (x - quadrant * 1.5703125) - quadrant * (PI / 2 - 1.5703125).
796 // PI / 2 - 1.5703125 in hexadecimal is 0x39fdaa22, stripping the lowest 12 bits we get 0x39fda000 (= 0.0004837512969970703125)
797 // This leaves uw with: x = ((x - quadrant * 1.5703125) - quadrant * 0.0004837512969970703125) - quadrant * (PI / 2 - 1.5703125 - 0.0004837512969970703125)
798 // See: https://stackoverflow.com/questions/42455143/sine-cosine-modular-extended-precision-arithmetic
799 // After this we have x in the range [-PI / 4, PI / 4].
800 Vec4 float_quadrant = quadrant.ToFloat();
801 x = ((x - float_quadrant * 1.5703125f) - float_quadrant * 0.0004837512969970703125f) - float_quadrant * 7.549789948768648e-8f;
802
803 // Calculate x2 = x^2
804 Vec4 x2 = x * x;
805
806 // Taylor expansion:
807 // Cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8! + ... = (((x2/8!- 1/6!) * x2 + 1/4!) * x2 - 1/2!) * x2 + 1
808 Vec4 taylor_cos = ((2.443315711809948e-5f * x2 - Vec4::sReplicate(1.388731625493765e-3f)) * x2 + Vec4::sReplicate(4.166664568298827e-2f)) * x2 * x2 - 0.5f * x2 + Vec4::sReplicate(1.0f);
809 // Sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... = ((-x2/7! + 1/5!) * x2 - 1/3!) * x2 * x + x
810 Vec4 taylor_sin = ((-1.9515295891e-4f * x2 + Vec4::sReplicate(8.3321608736e-3f)) * x2 - Vec4::sReplicate(1.6666654611e-1f)) * x2 * x + x;
811
812 // The lowest 2 bits of quadrant indicate the quadrant that we are in.
813 // Let x be the original input value and x' our value that has been mapped to the range [-PI / 4, PI / 4].
814 // since cos(x) = sin(x - PI / 2) and since we want to use the Taylor expansion as close as possible to 0,
815 // we can alternate between using the Taylor expansion for sin and cos according to the following table:
816 //
817 // quadrant sin(x) cos(x)
818 // XXX00b sin(x') cos(x')
819 // XXX01b cos(x') -sin(x')
820 // XXX10b -sin(x') -cos(x')
821 // XXX11b -cos(x') sin(x')
822 //
823 // So: sin_sign = bit2, cos_sign = bit1 ^ bit2, bit1 determines if we use sin or cos Taylor expansion
824 UVec4 bit1 = quadrant.LogicalShiftLeft<31>();
825 UVec4 bit2 = UVec4::sAnd(quadrant.LogicalShiftLeft<30>(), UVec4::sReplicate(0x80000000U));
826
827 // Select which one of the results is sin and which one is cos
828 Vec4 s = Vec4::sSelect(taylor_sin, taylor_cos, bit1);
829 Vec4 c = Vec4::sSelect(taylor_cos, taylor_sin, bit1);
830
831 // Update the signs
832 sin_sign = UVec4::sXor(sin_sign, bit2);
833 UVec4 cos_sign = UVec4::sXor(bit1, bit2);
834
835 // Correct the signs
836 outSin = Vec4::sXor(s, sin_sign.ReinterpretAsFloat());
837 outCos = Vec4::sXor(c, cos_sign.ReinterpretAsFloat());
838}
839
841{
842 // Implementation based on tanf.c from the cephes library, see Vec4::SinCos for further details
843 // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
844
845 // Make argument positive
846 UVec4 tan_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
847 Vec4 x = Vec4::sXor(*this, tan_sign.ReinterpretAsFloat());
848
849 // x / (PI / 2) rounded to nearest int gives us the quadrant closest to x
850 UVec4 quadrant = (0.6366197723675814f * x + Vec4::sReplicate(0.5f)).ToInt();
851
852 // Remap x to range [-PI / 4, PI / 4], see Vec4::SinCos
853 Vec4 float_quadrant = quadrant.ToFloat();
854 x = ((x - float_quadrant * 1.5703125f) - float_quadrant * 0.0004837512969970703125f) - float_quadrant * 7.549789948768648e-8f;
855
856 // Calculate x2 = x^2
857 Vec4 x2 = x * x;
858
859 // Roughly equivalent to the Taylor expansion:
860 // Tan(x) = x + x^3/3 + 2*x^5/15 + 17*x^7/315 + 62*x^9/2835 + ...
861 Vec4 tan =
862 (((((9.38540185543e-3f * x2 + Vec4::sReplicate(3.11992232697e-3f)) * x2 + Vec4::sReplicate(2.44301354525e-2f)) * x2
863 + Vec4::sReplicate(5.34112807005e-2f)) * x2 + Vec4::sReplicate(1.33387994085e-1f)) * x2 + Vec4::sReplicate(3.33331568548e-1f)) * x2 * x + x;
864
865 // For the 2nd and 4th quadrant we need to invert the value
866 UVec4 bit1 = quadrant.LogicalShiftLeft<31>();
867 tan = Vec4::sSelect(tan, Vec4::sReplicate(-1.0f) / (tan JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(+ Vec4::sReplicate(FLT_MIN))), bit1); // Add small epsilon to prevent div by zero, works because tan is always positive
868
869 // Put the sign back
870 return Vec4::sXor(tan, tan_sign.ReinterpretAsFloat());
871}
872
874{
875 // Implementation based on asinf.c from the cephes library
876 // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
877
878 // Make argument positive
879 UVec4 asin_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
880 Vec4 a = Vec4::sXor(*this, asin_sign.ReinterpretAsFloat());
881
882 // ASin is not defined outside the range [-1, 1] but it often happens that a value is slightly above 1 so we just clamp here
883 a = Vec4::sMin(a, Vec4::sReplicate(1.0f));
884
885 // When |x| <= 0.5 we use the asin approximation as is
886 Vec4 z1 = a * a;
887 Vec4 x1 = a;
888
889 // When |x| > 0.5 we use the identity asin(x) = PI / 2 - 2 * asin(sqrt((1 - x) / 2))
890 Vec4 z2 = 0.5f * (Vec4::sReplicate(1.0f) - a);
891 Vec4 x2 = z2.Sqrt();
892
893 // Select which of the two situations we have
894 UVec4 greater = Vec4::sGreater(a, Vec4::sReplicate(0.5f));
895 Vec4 z = Vec4::sSelect(z1, z2, greater);
896 Vec4 x = Vec4::sSelect(x1, x2, greater);
897
898 // Polynomial approximation of asin
899 z = ((((4.2163199048e-2f * z + Vec4::sReplicate(2.4181311049e-2f)) * z + Vec4::sReplicate(4.5470025998e-2f)) * z + Vec4::sReplicate(7.4953002686e-2f)) * z + Vec4::sReplicate(1.6666752422e-1f)) * z * x + x;
900
901 // If |x| > 0.5 we need to apply the remainder of the identity above
902 z = Vec4::sSelect(z, Vec4::sReplicate(0.5f * JPH_PI) - (z + z), greater);
903
904 // Put the sign back
905 return Vec4::sXor(z, asin_sign.ReinterpretAsFloat());
906}
907
909{
910 // Not the most accurate, but simple
911 return Vec4::sReplicate(0.5f * JPH_PI) - ASin();
912}
913
915{
916 // Implementation based on atanf.c from the cephes library
917 // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
918
919 // Make argument positive
920 UVec4 atan_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
921 Vec4 x = Vec4::sXor(*this, atan_sign.ReinterpretAsFloat());
922 Vec4 y = Vec4::sZero();
923
924 // If x > Tan(PI / 8)
925 UVec4 greater1 = Vec4::sGreater(x, Vec4::sReplicate(0.4142135623730950f));
926 Vec4 x1 = (x - Vec4::sReplicate(1.0f)) / (x + Vec4::sReplicate(1.0f));
927
928 // If x > Tan(3 * PI / 8)
929 UVec4 greater2 = Vec4::sGreater(x, Vec4::sReplicate(2.414213562373095f));
930 Vec4 x2 = Vec4::sReplicate(-1.0f) / (x JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(+ Vec4::sReplicate(FLT_MIN))); // Add small epsilon to prevent div by zero, works because x is always positive
931
932 // Apply first if
933 x = Vec4::sSelect(x, x1, greater1);
934 y = Vec4::sSelect(y, Vec4::sReplicate(0.25f * JPH_PI), greater1);
935
936 // Apply second if
937 x = Vec4::sSelect(x, x2, greater2);
938 y = Vec4::sSelect(y, Vec4::sReplicate(0.5f * JPH_PI), greater2);
939
940 // Polynomial approximation
941 Vec4 z = x * x;
942 y += (((8.05374449538e-2f * z - Vec4::sReplicate(1.38776856032e-1f)) * z + Vec4::sReplicate(1.99777106478e-1f)) * z - Vec4::sReplicate(3.33329491539e-1f)) * z * x + x;
943
944 // Put the sign back
945 return Vec4::sXor(y, atan_sign.ReinterpretAsFloat());
946}
947
949{
950 UVec4 sign_mask = UVec4::sReplicate(0x80000000U);
951
952 // Determine absolute value and sign of y
953 UVec4 y_sign = UVec4::sAnd(inY.ReinterpretAsInt(), sign_mask);
954 Vec4 y_abs = Vec4::sXor(inY, y_sign.ReinterpretAsFloat());
955
956 // Determine absolute value and sign of x
957 UVec4 x_sign = UVec4::sAnd(inX.ReinterpretAsInt(), sign_mask);
958 Vec4 x_abs = Vec4::sXor(inX, x_sign.ReinterpretAsFloat());
959
960 // Always divide smallest / largest to avoid dividing by zero
961 UVec4 x_is_numerator = Vec4::sLess(x_abs, y_abs);
962 Vec4 numerator = Vec4::sSelect(y_abs, x_abs, x_is_numerator);
963 Vec4 denominator = Vec4::sSelect(x_abs, y_abs, x_is_numerator);
964 Vec4 atan = (numerator / denominator).ATan();
965
966 // If we calculated x / y instead of y / x the result is PI / 2 - result (note that this is true because we know the result is positive because the input was positive)
967 atan = Vec4::sSelect(atan, Vec4::sReplicate(0.5f * JPH_PI) - atan, x_is_numerator);
968
969 // Now we need to map to the correct quadrant
970 // x_sign y_sign result
971 // +1 +1 atan
972 // -1 +1 -atan + PI
973 // -1 -1 atan - PI
974 // +1 -1 -atan
975 // This can be written as: x_sign * y_sign * (atan - (x_sign < 0? PI : 0))
977 atan = Vec4::sXor(atan, UVec4::sXor(x_sign, y_sign).ReinterpretAsFloat());
978 return atan;
979}
980
std::uint8_t uint8
Definition Core.h:482
std::uint64_t uint64
Definition Core.h:485
#define JPH_NAMESPACE_END
Definition Core.h:414
std::uint32_t uint32
Definition Core.h:484
#define JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(...)
Definition Core.h:549
#define JPH_NAMESPACE_BEGIN
Definition Core.h:408
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_W
Use the W component.
Definition Swizzle.h:15
@ SWIZZLE_X
Use the X component.
Definition Swizzle.h:12
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec4 operator*(float inV1, Vec4Arg inV2)
Multiply vector with float.
Definition Vec4.inl:407
Class that holds 4 float values. Convert to Vec4 to perform calculations.
Definition Float4.h:11
float x
Definition Float4.h:25
float y
Definition Float4.h:26
float z
Definition Float4.h:27
float w
Definition Float4.h:28
Definition UVec4.h:12
JPH_INLINE UVec4 Swizzle() const
Swizzle the elements in inV.
JPH_INLINE uint32 GetZ() const
Definition UVec4.h:104
JPH_INLINE UVec4 LogicalShiftLeft() const
Shift all components by Count bits to the left (filling with zeros from the left)
static JPH_INLINE UVec4 sSelect(UVec4Arg inNotSet, UVec4Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition UVec4.inl:157
JPH_INLINE uint32 GetY() const
Definition UVec4.h:103
static JPH_INLINE UVec4 sReplicate(uint32 inV)
Replicate int inV across all components.
Definition UVec4.inl:56
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition UVec4.inl:408
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:202
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:174
JPH_INLINE uint32 GetW() const
Definition UVec4.h:105
Type mValue
Definition UVec4.h:211
JPH_INLINE uint32 GetX() const
Get individual components.
Definition UVec4.h:102
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:188
JPH_INLINE UVec4 ArithmeticShiftRight() const
Shift all components by Count bits to the right (shifting in the value of the highest bit)
JPH_INLINE Vec4 ToFloat() const
Convert each component from an int to a float.
Definition UVec4.inl:329
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:340
uint32 mU32[4]
Definition UVec4.h:212
Definition Vec3.h:17
Type mValue
Definition Vec3.h:286
float mF32[4]
Definition Vec3.h:287
Definition Vec4.h:14
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec4.inl:558
static JPH_INLINE void sSort4(Vec4 &ioValue, UVec4 &ioIndex)
Definition Vec4.inl:304
Vec4 ATan() const
Calculate the arc tangent for each element of this vector (returns value in the range [-PI / 2,...
Definition Vec4.inl:914
static JPH_INLINE UVec4 sGreater(Vec4Arg inV1, Vec4Arg inV2)
Greater than (component wise)
Definition Vec4.inl:208
float mF32[4]
Definition Vec4.h:275
JPH_INLINE Vec4 operator-() const
Negate.
Definition Vec4.inl:498
Vec4()=default
Constructor.
static JPH_INLINE Vec4 sAnd(Vec4Arg inV1, Vec4Arg inV2)
Logical and (component wise)
Definition Vec4.inl:293
static JPH_INLINE Vec4 sLoadFloat4Aligned(const Float4 *inV)
Load 4 floats from memory, 16 bytes aligned.
Definition Vec4.inl:101
static Vec4 sATan2(Vec4Arg inY, Vec4Arg inX)
Calculate the arc tangent of y / x using the signs of the arguments to determine the correct quadrant...
Definition Vec4.inl:948
JPH_INLINE Vec4 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec4.inl:685
Vec4 ASin() const
Definition Vec4.inl:873
static JPH_INLINE Vec4 sXor(Vec4Arg inV1, Vec4Arg inV2)
Logical xor (component wise)
Definition Vec4.inl:282
JPH_INLINE Vec4 Abs() const
Return the absolute value of each of the components.
Definition Vec4.inl:602
JPH_INLINE Vec4 operator/(float inV2) const
Divide vector by float.
Definition Vec4.inl:421
Vec4 Tan() const
Calculate the tangent for each element of this vector (input in radians)
Definition Vec4.inl:840
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec4.inl:730
JPH_INLINE Vec4 & operator+=(Vec4Arg inV2)
Add two float vectors (component wise)
Definition Vec4.inl:485
static JPH_INLINE UVec4 sLessOrEqual(Vec4Arg inV1, Vec4Arg inV2)
Less than or equal (component wise)
Definition Vec4.inl:194
static JPH_INLINE UVec4 sLess(Vec4Arg inV1, Vec4Arg inV2)
Less than (component wise)
Definition Vec4.inl:180
JPH_INLINE float Length() const
Length of vector.
Definition Vec4.inl:659
static JPH_INLINE void sSort4Reverse(Vec4 &ioValue, UVec4 &ioIndex)
Definition Vec4.inl:328
static JPH_INLINE Vec4 sFusedMultiplyAdd(Vec4Arg inMul1, Vec4Arg inMul2, Vec4Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec4.inl:236
JPH_INLINE Vec4 Normalized() const
Normalize vector.
Definition Vec4.inl:705
static JPH_INLINE UVec4 sEquals(Vec4Arg inV1, Vec4Arg inV2)
Equals (component wise)
Definition Vec4.inl:166
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y, Z and W.
Definition Vec4.inl:771
JPH_INLINE Vec4 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec4.inl:615
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec4.inl:569
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec4 as a UVec4 (doesn't change the bits)
Definition Vec4.inl:741
static JPH_INLINE UVec4 sGreaterOrEqual(Vec4Arg inV1, Vec4Arg inV2)
Greater than or equal (component wise)
Definition Vec4.inl:222
static JPH_INLINE Vec4 sMin(Vec4Arg inV1, Vec4Arg inV2)
Return the minimum value of each of the components.
Definition Vec4.inl:138
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec4.inl:580
JPH_INLINE Vec4 Sqrt() const
Component wise square root.
Definition Vec4.inl:673
JPH_INLINE Vec4 & operator*=(float inV2)
Multiply vector with float.
Definition Vec4.inl:432
static JPH_INLINE Vec4 sGatherFloat4(const float *inBase, UVec4Arg inOffsets)
Gather 4 floats from memory at inBase + inOffsets[i] * Scale.
JPH_INLINE Vec4 operator+(Vec4Arg inV2) const
Add two float vectors (component wise)
Definition Vec4.inl:471
JPH_INLINE Vec4 & operator/=(float inV2)
Divide vector by float.
Definition Vec4.inl:458
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec4.inl:362
JPH_INLINE bool operator==(Vec4Arg inV2) const
Comparison.
Definition Vec4.inl:352
JPH_INLINE Vec4 SplatW() const
Replicate the W component to all components.
Definition Vec4.inl:591
JPH_INLINE Vec4 DotV(Vec4Arg inV2) const
Dot product, returns the dot product in X, Y and Z components.
Definition Vec4.inl:620
JPH_INLINE bool IsClose(Vec4Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec4.inl:357
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:113
static JPH_INLINE Vec4 sLoadFloat4(const Float4 *inV)
Load 4 floats from memory.
Definition Vec4.inl:90
{ float mData[4] Type
Definition Vec4.h:24
static JPH_INLINE Vec4 sZero()
Vector with all zeros.
Definition Vec4.inl:63
JPH_INLINE Vec4 Swizzle() const
Swizzle the elements in inV.
static JPH_INLINE Vec4 sOr(Vec4Arg inV1, Vec4Arg inV2)
Logical or (component wise)
Definition Vec4.inl:271
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y, Z and W.
Definition Vec4.inl:764
Type mValue
Definition Vec4.h:274
JPH_INLINE Vec4 & operator-=(Vec4Arg inV2)
Subtract two float vectors (component wise)
Definition Vec4.inl:531
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec4.inl:646
static JPH_INLINE Vec4 sMax(Vec4Arg inV1, Vec4Arg inV2)
Return the maximum of each of the components.
Definition Vec4.inl:152
JPH_INLINE float Dot(Vec4Arg inV2) const
Dot product.
Definition Vec4.inl:633
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec4.inl:367
static JPH_INLINE Vec4 sNaN()
Vector with all NaN's.
Definition Vec4.inl:85
Vec4 ACos() const
Definition Vec4.inl:908
static JPH_INLINE Vec4 sSelect(Vec4Arg inNotSet, Vec4Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition Vec4.inl:254
JPH_INLINE int GetSignBits() const
Store if X is negative in bit 0, Y in bit 1, Z in bit 2 and W in bit 3.
Definition Vec4.inl:752
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:74
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:778
JPH_INLINE void StoreFloat4(Float4 *outV) const
Store 4 floats to memory.
Definition Vec4.inl:718
friend JPH_INLINE Vec4 operator*(float inV1, Vec4Arg inV2)
Multiply vector with float.
Definition Vec4.inl:407