Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
UVec4.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
6
8{
9#if defined(JPH_USE_SSE)
10 mValue = _mm_set_epi32(int(inW), int(inZ), int(inY), int(inX));
11#elif defined(JPH_USE_NEON)
12 uint32x2_t xy = vcreate_u32(static_cast<uint64>(inX) | (static_cast<uint64>(inY) << 32));
13 uint32x2_t zw = vcreate_u32(static_cast<uint64>(inZ) | (static_cast<uint64>(inW) << 32));
14 mValue = vcombine_u32(xy, zw);
15#else
16 mU32[0] = inX;
17 mU32[1] = inY;
18 mU32[2] = inZ;
19 mU32[3] = inW;
20#endif
21}
22
24{
25 return sEquals(*this, inV2).TestAllTrue();
26}
27
28template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
30{
31 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
32 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
33 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
34 static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
35
36#if defined(JPH_USE_SSE)
37 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
38#elif defined(JPH_USE_NEON)
39 return JPH_NEON_SHUFFLE_U32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
40#else
41 return UVec4(mU32[SwizzleX], mU32[SwizzleY], mU32[SwizzleZ], mU32[SwizzleW]);
42#endif
43}
44
46{
47#if defined(JPH_USE_SSE)
48 return _mm_setzero_si128();
49#elif defined(JPH_USE_NEON)
50 return vdupq_n_u32(0);
51#else
52 return UVec4(0, 0, 0, 0);
53#endif
54}
55
57{
58#if defined(JPH_USE_SSE)
59 return _mm_set1_epi32(int(inV));
60#elif defined(JPH_USE_NEON)
61 return vdupq_n_u32(inV);
62#else
63 return UVec4(inV, inV, inV, inV);
64#endif
65}
66
68{
69#if defined(JPH_USE_SSE)
70 return _mm_castps_si128(_mm_load_ss(reinterpret_cast<const float*>(inV)));
71#elif defined(JPH_USE_NEON)
72 return vsetq_lane_u32(*inV, vdupq_n_u32(0), 0);
73#else
74 return UVec4(*inV, 0, 0, 0);
75#endif
76}
77
79{
80#if defined(JPH_USE_SSE)
81 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(inV));
82#elif defined(JPH_USE_NEON)
83 return vld1q_u32(inV);
84#else
85 return UVec4(inV[0], inV[1], inV[2], inV[3]);
86#endif
87}
88
90{
91#if defined(JPH_USE_SSE)
92 return _mm_load_si128(reinterpret_cast<const __m128i *>(inV));
93#elif defined(JPH_USE_NEON)
94 return vld1q_u32(inV); // ARM doesn't make distinction between aligned or not
95#else
96 return UVec4(inV[0], inV[1], inV[2], inV[3]);
97#endif
98}
99
100template <const int Scale>
101UVec4 UVec4::sGatherInt4(const uint32 *inBase, UVec4Arg inOffsets)
102{
103#ifdef JPH_USE_AVX2
104 return _mm_i32gather_epi32(reinterpret_cast<const int *>(inBase), inOffsets.mValue, Scale);
105#else
106 const uint8 *base = reinterpret_cast<const uint8 *>(inBase);
107 uint32 x = *reinterpret_cast<const uint32 *>(base + inOffsets.GetX() * Scale);
108 uint32 y = *reinterpret_cast<const uint32 *>(base + inOffsets.GetY() * Scale);
109 uint32 z = *reinterpret_cast<const uint32 *>(base + inOffsets.GetZ() * Scale);
110 uint32 w = *reinterpret_cast<const uint32 *>(base + inOffsets.GetW() * Scale);
111 return UVec4(x, y, z, w);
112#endif
113}
114
116{
117#if defined(JPH_USE_SSE4_1)
118 return _mm_min_epu32(inV1.mValue, inV2.mValue);
119#elif defined(JPH_USE_NEON)
120 return vminq_u32(inV1.mValue, inV2.mValue);
121#else
122 UVec4 result;
123 for (int i = 0; i < 4; i++)
124 result.mU32[i] = min(inV1.mU32[i], inV2.mU32[i]);
125 return result;
126#endif
127}
128
130{
131#if defined(JPH_USE_SSE4_1)
132 return _mm_max_epu32(inV1.mValue, inV2.mValue);
133#elif defined(JPH_USE_NEON)
134 return vmaxq_u32(inV1.mValue, inV2.mValue);
135#else
136 UVec4 result;
137 for (int i = 0; i < 4; i++)
138 result.mU32[i] = max(inV1.mU32[i], inV2.mU32[i]);
139 return result;
140#endif
141}
142
144{
145#if defined(JPH_USE_SSE)
146 return _mm_cmpeq_epi32(inV1.mValue, inV2.mValue);
147#elif defined(JPH_USE_NEON)
148 return vceqq_u32(inV1.mValue, inV2.mValue);
149#else
150 return UVec4(inV1.mU32[0] == inV2.mU32[0]? 0xffffffffu : 0,
151 inV1.mU32[1] == inV2.mU32[1]? 0xffffffffu : 0,
152 inV1.mU32[2] == inV2.mU32[2]? 0xffffffffu : 0,
153 inV1.mU32[3] == inV2.mU32[3]? 0xffffffffu : 0);
154#endif
155}
156
157UVec4 UVec4::sSelect(UVec4Arg inNotSet, UVec4Arg inSet, UVec4Arg inControl)
158{
159#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
160 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(inNotSet.mValue), _mm_castsi128_ps(inSet.mValue), _mm_castsi128_ps(inControl.mValue)));
161#elif defined(JPH_USE_SSE)
162 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.mValue, 31));
163 return _mm_castps_si128(_mm_or_ps(_mm_and_ps(is_set, _mm_castsi128_ps(inSet.mValue)), _mm_andnot_ps(is_set, _mm_castsi128_ps(inNotSet.mValue))));
164#elif defined(JPH_USE_NEON)
165 return vbslq_u32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inSet.mValue, inNotSet.mValue);
166#else
167 UVec4 result;
168 for (int i = 0; i < 4; i++)
169 result.mU32[i] = (inControl.mU32[i] & 0x80000000u) ? inSet.mU32[i] : inNotSet.mU32[i];
170 return result;
171#endif
172}
173
175{
176#if defined(JPH_USE_SSE)
177 return _mm_or_si128(inV1.mValue, inV2.mValue);
178#elif defined(JPH_USE_NEON)
179 return vorrq_u32(inV1.mValue, inV2.mValue);
180#else
181 return UVec4(inV1.mU32[0] | inV2.mU32[0],
182 inV1.mU32[1] | inV2.mU32[1],
183 inV1.mU32[2] | inV2.mU32[2],
184 inV1.mU32[3] | inV2.mU32[3]);
185#endif
186}
187
189{
190#if defined(JPH_USE_SSE)
191 return _mm_xor_si128(inV1.mValue, inV2.mValue);
192#elif defined(JPH_USE_NEON)
193 return veorq_u32(inV1.mValue, inV2.mValue);
194#else
195 return UVec4(inV1.mU32[0] ^ inV2.mU32[0],
196 inV1.mU32[1] ^ inV2.mU32[1],
197 inV1.mU32[2] ^ inV2.mU32[2],
198 inV1.mU32[3] ^ inV2.mU32[3]);
199#endif
200}
201
203{
204#if defined(JPH_USE_SSE)
205 return _mm_and_si128(inV1.mValue, inV2.mValue);
206#elif defined(JPH_USE_NEON)
207 return vandq_u32(inV1.mValue, inV2.mValue);
208#else
209 return UVec4(inV1.mU32[0] & inV2.mU32[0],
210 inV1.mU32[1] & inV2.mU32[1],
211 inV1.mU32[2] & inV2.mU32[2],
212 inV1.mU32[3] & inV2.mU32[3]);
213#endif
214}
215
216
218{
219#if defined(JPH_USE_AVX512)
220 return _mm_ternarylogic_epi32(inV1.mValue, inV1.mValue, inV1.mValue, 0b01010101);
221#elif defined(JPH_USE_SSE)
222 return sXor(inV1, sReplicate(0xffffffff));
223#elif defined(JPH_USE_NEON)
224 return vmvnq_u32(inV1.mValue);
225#else
226 return UVec4(~inV1.mU32[0], ~inV1.mU32[1], ~inV1.mU32[2], ~inV1.mU32[3]);
227#endif
228}
229
231{
232 // If inValue.z is false then shift W to Z
233 UVec4 v = UVec4::sSelect(inIndex.Swizzle<SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W, SWIZZLE_W>(), inIndex, inValue.SplatZ());
234
235 // If inValue.y is false then shift Z and further to Y and further
237
238 // If inValue.x is false then shift X and further to Y and further
240
241 return v;
242}
243
245{
246#if defined(JPH_USE_SSE4_1)
247 return _mm_mullo_epi32(mValue, inV2.mValue);
248#elif defined(JPH_USE_NEON)
249 return vmulq_u32(mValue, inV2.mValue);
250#else
251 UVec4 result;
252 for (int i = 0; i < 4; i++)
253 result.mU32[i] = mU32[i] * inV2.mU32[i];
254 return result;
255#endif
256}
257
259{
260#if defined(JPH_USE_SSE)
261 return _mm_add_epi32(mValue, inV2.mValue);
262#elif defined(JPH_USE_NEON)
263 return vaddq_u32(mValue, inV2.mValue);
264#else
265 return UVec4(mU32[0] + inV2.mU32[0],
266 mU32[1] + inV2.mU32[1],
267 mU32[2] + inV2.mU32[2],
268 mU32[3] + inV2.mU32[3]);
269#endif
270}
271
273{
274#if defined(JPH_USE_SSE)
275 mValue = _mm_add_epi32(mValue, inV2.mValue);
276#elif defined(JPH_USE_NEON)
277 mValue = vaddq_u32(mValue, inV2.mValue);
278#else
279 for (int i = 0; i < 4; ++i)
280 mU32[i] += inV2.mU32[i];
281#endif
282 return *this;
283}
284
286{
287#if defined(JPH_USE_SSE)
288 return _mm_sub_epi32(mValue, inV2.mValue);
289#elif defined(JPH_USE_NEON)
290 return vsubq_u32(mValue, inV2.mValue);
291#else
292 return UVec4(mU32[0] - inV2.mU32[0],
293 mU32[1] - inV2.mU32[1],
294 mU32[2] - inV2.mU32[2],
295 mU32[3] - inV2.mU32[3]);
296#endif
297}
298
300{
301#if defined(JPH_USE_SSE)
302 mValue = _mm_sub_epi32(mValue, inV2.mValue);
303#elif defined(JPH_USE_NEON)
304 mValue = vsubq_u32(mValue, inV2.mValue);
305#else
306 for (int i = 0; i < 4; ++i)
307 mU32[i] -= inV2.mU32[i];
308#endif
309 return *this;
310}
311
313{
314#if defined(JPH_USE_SSE)
315 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(0, 0, 0, 0));
316#elif defined(JPH_USE_NEON)
317 return vdupq_laneq_u32(mValue, 0);
318#else
319 return UVec4(mU32[0], mU32[0], mU32[0], mU32[0]);
320#endif
321}
322
324{
325#if defined(JPH_USE_SSE)
326 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(1, 1, 1, 1));
327#elif defined(JPH_USE_NEON)
328 return vdupq_laneq_u32(mValue, 1);
329#else
330 return UVec4(mU32[1], mU32[1], mU32[1], mU32[1]);
331#endif
332}
333
335{
336#if defined(JPH_USE_SSE)
337 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(2, 2, 2, 2));
338#elif defined(JPH_USE_NEON)
339 return vdupq_laneq_u32(mValue, 2);
340#else
341 return UVec4(mU32[2], mU32[2], mU32[2], mU32[2]);
342#endif
343}
344
346{
347#if defined(JPH_USE_SSE)
348 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(3, 3, 3, 3));
349#elif defined(JPH_USE_NEON)
350 return vdupq_laneq_u32(mValue, 3);
351#else
352 return UVec4(mU32[3], mU32[3], mU32[3], mU32[3]);
353#endif
354}
355
357{
358#if defined(JPH_USE_SSE)
359 return _mm_cvtepi32_ps(mValue);
360#elif defined(JPH_USE_NEON)
361 return vcvtq_f32_u32(mValue);
362#else
363 return Vec4((float)mU32[0], (float)mU32[1], (float)mU32[2], (float)mU32[3]);
364#endif
365}
366
368{
369#if defined(JPH_USE_SSE)
370 return Vec4(_mm_castsi128_ps(mValue));
371#elif defined(JPH_USE_NEON)
372 return vreinterpretq_f32_u32(mValue);
373#else
374 return *reinterpret_cast<const Vec4 *>(this);
375#endif
376}
377
379{
380#if defined(JPH_USE_SSE4_1)
381 __m128i mul = _mm_mullo_epi32(mValue, inV2.mValue);
382 __m128i sum = _mm_add_epi32(mul, _mm_shuffle_epi32(mul, _MM_SHUFFLE(2, 3, 0, 1)));
383 return _mm_add_epi32(sum, _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)));
384#elif defined(JPH_USE_NEON)
385 uint32x4_t mul = vmulq_u32(mValue, inV2.mValue);
386 return vdupq_n_u32(vaddvq_u32(mul));
387#else
388 return UVec4::sReplicate(mU32[0] * inV2.mU32[0] + mU32[1] * inV2.mU32[1] + mU32[2] * inV2.mU32[2] + mU32[3] * inV2.mU32[3]);
389#endif
390}
391
393{
394#if defined(JPH_USE_SSE4_1)
395 __m128i mul = _mm_mullo_epi32(mValue, inV2.mValue);
396 __m128i sum = _mm_add_epi32(mul, _mm_shuffle_epi32(mul, _MM_SHUFFLE(2, 3, 0, 1)));
397 return _mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2))));
398#elif defined(JPH_USE_NEON)
399 uint32x4_t mul = vmulq_u32(mValue, inV2.mValue);
400 return vaddvq_u32(mul);
401#else
402 return mU32[0] * inV2.mU32[0] + mU32[1] * inV2.mU32[1] + mU32[2] * inV2.mU32[2] + mU32[3] * inV2.mU32[3];
403#endif
404}
405
406void UVec4::StoreInt4(uint32 *outV) const
407{
408#if defined(JPH_USE_SSE)
409 _mm_storeu_si128(reinterpret_cast<__m128i *>(outV), mValue);
410#elif defined(JPH_USE_NEON)
411 vst1q_u32(outV, mValue);
412#else
413 for (int i = 0; i < 4; ++i)
414 outV[i] = mU32[i];
415#endif
416}
417
419{
420#if defined(JPH_USE_SSE)
421 _mm_store_si128(reinterpret_cast<__m128i *>(outV), mValue);
422#elif defined(JPH_USE_NEON)
423 vst1q_u32(outV, mValue); // ARM doesn't make distinction between aligned or not
424#else
425 for (int i = 0; i < 4; ++i)
426 outV[i] = mU32[i];
427#endif
428}
429
431{
432#if defined(JPH_USE_SSE)
433 return CountBits(_mm_movemask_ps(_mm_castsi128_ps(mValue)));
434#elif defined(JPH_USE_NEON)
435 return vaddvq_u32(vshrq_n_u32(mValue, 31));
436#else
437 return (mU32[0] >> 31) + (mU32[1] >> 31) + (mU32[2] >> 31) + (mU32[3] >> 31);
438#endif
439}
440
442{
443#if defined(JPH_USE_SSE)
444 return _mm_movemask_ps(_mm_castsi128_ps(mValue));
445#elif defined(JPH_USE_NEON)
446 int32x4_t shift = JPH_NEON_INT32x4(0, 1, 2, 3);
447 return vaddvq_u32(vshlq_u32(vshrq_n_u32(mValue, 31), shift));
448#else
449 return (mU32[0] >> 31) | ((mU32[1] >> 31) << 1) | ((mU32[2] >> 31) << 2) | ((mU32[3] >> 31) << 3);
450#endif
451}
452
454{
455 return GetTrues() != 0;
456}
457
459{
460 return (GetTrues() & 0b111) != 0;
461}
462
464{
465 return GetTrues() == 0b1111;
466}
467
469{
470 return (GetTrues() & 0b111) == 0b111;
471}
472
473template <const uint Count>
475{
476 static_assert(Count <= 31, "Invalid shift");
477
478#if defined(JPH_USE_SSE)
479 return _mm_slli_epi32(mValue, Count);
480#elif defined(JPH_USE_NEON)
481 return vshlq_n_u32(mValue, Count);
482#else
483 return UVec4(mU32[0] << Count, mU32[1] << Count, mU32[2] << Count, mU32[3] << Count);
484#endif
485}
486
487template <const uint Count>
489{
490 static_assert(Count <= 31, "Invalid shift");
491
492#if defined(JPH_USE_SSE)
493 return _mm_srli_epi32(mValue, Count);
494#elif defined(JPH_USE_NEON)
495 return vshrq_n_u32(mValue, Count);
496#else
497 return UVec4(mU32[0] >> Count, mU32[1] >> Count, mU32[2] >> Count, mU32[3] >> Count);
498#endif
499}
500
501template <const uint Count>
503{
504 static_assert(Count <= 31, "Invalid shift");
505
506#if defined(JPH_USE_SSE)
507 return _mm_srai_epi32(mValue, Count);
508#elif defined(JPH_USE_NEON)
509 return vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(mValue), Count));
510#else
511 return UVec4(uint32(int32_t(mU32[0]) >> Count),
512 uint32(int32_t(mU32[1]) >> Count),
513 uint32(int32_t(mU32[2]) >> Count),
514 uint32(int32_t(mU32[3]) >> Count));
515#endif
516}
517
519{
520#if defined(JPH_USE_SSE)
521 return _mm_unpacklo_epi16(mValue, _mm_castps_si128(_mm_setzero_ps()));
522#elif defined(JPH_USE_NEON)
523 uint16x4_t value = vget_low_u16(vreinterpretq_u16_u32(mValue));
524 uint16x4_t zero = vdup_n_u16(0);
525 return vreinterpretq_u32_u16(vcombine_u16(vzip1_u16(value, zero), vzip2_u16(value, zero)));
526#else
527 return UVec4(mU32[0] & 0xffff,
528 (mU32[0] >> 16) & 0xffff,
529 mU32[1] & 0xffff,
530 (mU32[1] >> 16) & 0xffff);
531#endif
532}
533
535{
536#if defined(JPH_USE_SSE)
537 return _mm_unpackhi_epi16(mValue, _mm_castps_si128(_mm_setzero_ps()));
538#elif defined(JPH_USE_NEON)
539 uint16x4_t value = vget_high_u16(vreinterpretq_u16_u32(mValue));
540 uint16x4_t zero = vdup_n_u16(0);
541 return vreinterpretq_u32_u16(vcombine_u16(vzip1_u16(value, zero), vzip2_u16(value, zero)));
542#else
543 return UVec4(mU32[2] & 0xffff,
544 (mU32[2] >> 16) & 0xffff,
545 mU32[3] & 0xffff,
546 (mU32[3] >> 16) & 0xffff);
547#endif
548}
549
551{
552#if defined(JPH_USE_SSE4_1)
553 return _mm_shuffle_epi8(mValue, _mm_set_epi32(int(0xffffff03), int(0xffffff02), int(0xffffff01), int(0xffffff00)));
554#elif defined(JPH_USE_NEON)
555 uint8x16_t idx = JPH_NEON_UINT8x16(0x00, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, 0x7f, 0x7f, 0x02, 0x7f, 0x7f, 0x7f, 0x03, 0x7f, 0x7f, 0x7f);
556 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
557#else
558 UVec4 result;
559 for (int i = 0; i < 4; i++)
560 result.mU32[i] = (mU32[0] >> (i * 8)) & 0xff;
561 return result;
562#endif
563}
564
566{
567#if defined(JPH_USE_SSE4_1)
568 return _mm_shuffle_epi8(mValue, _mm_set_epi32(int(0xffffff07), int(0xffffff06), int(0xffffff05), int(0xffffff04)));
569#elif defined(JPH_USE_NEON)
570 uint8x16_t idx = JPH_NEON_UINT8x16(0x04, 0x7f, 0x7f, 0x7f, 0x05, 0x7f, 0x7f, 0x7f, 0x06, 0x7f, 0x7f, 0x7f, 0x07, 0x7f, 0x7f, 0x7f);
571 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
572#else
573 UVec4 result;
574 for (int i = 0; i < 4; i++)
575 result.mU32[i] = (mU32[1] >> (i * 8)) & 0xff;
576 return result;
577#endif
578}
579
581{
582#if defined(JPH_USE_SSE4_1)
583 return _mm_shuffle_epi8(mValue, _mm_set_epi32(int(0xffffff0b), int(0xffffff0a), int(0xffffff09), int(0xffffff08)));
584#elif defined(JPH_USE_NEON)
585 uint8x16_t idx = JPH_NEON_UINT8x16(0x08, 0x7f, 0x7f, 0x7f, 0x09, 0x7f, 0x7f, 0x7f, 0x0a, 0x7f, 0x7f, 0x7f, 0x0b, 0x7f, 0x7f, 0x7f);
586 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
587#else
588 UVec4 result;
589 for (int i = 0; i < 4; i++)
590 result.mU32[i] = (mU32[2] >> (i * 8)) & 0xff;
591 return result;
592#endif
593}
594
596{
597#if defined(JPH_USE_SSE4_1)
598 return _mm_shuffle_epi8(mValue, _mm_set_epi32(int(0xffffff0f), int(0xffffff0e), int(0xffffff0d), int(0xffffff0c)));
599#elif defined(JPH_USE_NEON)
600 uint8x16_t idx = JPH_NEON_UINT8x16(0x0c, 0x7f, 0x7f, 0x7f, 0x0d, 0x7f, 0x7f, 0x7f, 0x0e, 0x7f, 0x7f, 0x7f, 0x0f, 0x7f, 0x7f, 0x7f);
601 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
602#else
603 UVec4 result;
604 for (int i = 0; i < 4; i++)
605 result.mU32[i] = (mU32[3] >> (i * 8)) & 0xff;
606 return result;
607#endif
608}
609
611{
612#if defined(JPH_USE_SSE4_1) || defined(JPH_USE_NEON)
613 alignas(UVec4) static constexpr uint32 sFourMinusXShuffle[5][4] =
614 {
615 { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
616 { 0x0f0e0d0c, 0xffffffff, 0xffffffff, 0xffffffff },
617 { 0x0b0a0908, 0x0f0e0d0c, 0xffffffff, 0xffffffff },
618 { 0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0xffffffff },
619 { 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c }
620 };
621#endif
622
623#if defined(JPH_USE_SSE4_1)
624 return _mm_shuffle_epi8(mValue, *reinterpret_cast<const UVec4::Type *>(sFourMinusXShuffle[inCount]));
625#elif defined(JPH_USE_NEON)
626 uint8x16_t idx = vreinterpretq_u8_u32(*reinterpret_cast<const UVec4::Type *>(sFourMinusXShuffle[inCount]));
627 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
628#else
629 UVec4 result = UVec4::sZero();
630 for (int i = 0; i < inCount; i++)
631 result.mU32[i] = mU32[i + 4 - inCount];
632 return result;
633#endif
634}
635
std::uint8_t uint8
Definition Core.h:493
std::uint64_t uint64
Definition Core.h:496
#define JPH_NAMESPACE_END
Definition Core.h:419
std::uint32_t uint32
Definition Core.h:495
#define JPH_NAMESPACE_BEGIN
Definition Core.h:413
uint CountBits(uint32 inValue)
Count the number of 1 bits in a value.
Definition Math.h:164
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_W
Use the W component.
Definition Swizzle.h:15
@ SWIZZLE_X
Use the X component.
Definition Swizzle.h:12
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Definition UVec4.h:12
JPH_INLINE UVec4 operator-(UVec4Arg inV2) const
Subtract two integer vectors (component wise)
Definition UVec4.inl:285
JPH_INLINE UVec4 Swizzle() const
Swizzle the elements in inV.
static JPH_INLINE UVec4 sNot(UVec4Arg inV1)
Logical not (component wise)
Definition UVec4.inl:217
JPH_INLINE uint32 GetZ() const
Definition UVec4.h:104
static JPH_INLINE UVec4 sMin(UVec4Arg inV1, UVec4Arg inV2)
Return the minimum value of each of the components.
Definition UVec4.inl:115
JPH_INLINE UVec4 LogicalShiftLeft() const
Shift all components by Count bits to the left (filling with zeros from the left)
JPH_INLINE int CountTrues() const
Count the number of components that are true (true is when highest bit of component is set)
Definition UVec4.inl:430
JPH_INLINE UVec4 & operator-=(UVec4Arg inV2)
Subtract two integer vectors (component wise)
Definition UVec4.inl:299
JPH_INLINE UVec4 SplatY() const
Replicate the Y component to all components.
Definition UVec4.inl:323
static JPH_INLINE UVec4 sSelect(UVec4Arg inNotSet, UVec4Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition UVec4.inl:157
static JPH_INLINE UVec4 sLoadInt(const uint32 *inV)
Load 1 int from memory and place it in the X component, zeros Y, Z and W.
Definition UVec4.inl:67
JPH_INLINE UVec4 Expand4Uint16Lo() const
Takes the lower 4 16 bits and expands them to X, Y, Z and W.
Definition UVec4.inl:518
static JPH_INLINE UVec4 sSort4True(UVec4Arg inValue, UVec4Arg inIndex)
Definition UVec4.inl:230
JPH_INLINE UVec4 operator+(UVec4Arg inV2) const
Add two integer vectors (component wise)
Definition UVec4.inl:258
JPH_INLINE uint32 GetY() const
Definition UVec4.h:103
JPH_INLINE UVec4 LogicalShiftRight() const
Shift all components by Count bits to the right (filling with zeros from the right)
static JPH_INLINE UVec4 sReplicate(uint32 inV)
Replicate int inV across all components.
Definition UVec4.inl:56
JPH_INLINE UVec4 SplatX() const
Replicate the X component to all components.
Definition UVec4.inl:312
JPH_INLINE UVec4 Expand4Byte4() const
Takes byte 4 .. 7 and expands them to X, Y, Z and W.
Definition UVec4.inl:565
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition UVec4.inl:463
JPH_INLINE UVec4 Expand4Byte0() const
Takes byte 0 .. 3 and expands them to X, Y, Z and W.
Definition UVec4.inl:550
JPH_INLINE int GetTrues() const
Store if X is true in bit 0, Y in bit 1, Z in bit 2 and W in bit 3 (true is when highest bit of compo...
Definition UVec4.inl:441
JPH_INLINE bool TestAnyXYZTrue() const
Test if any of X, Y or Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:458
JPH_INLINE UVec4 & operator+=(UVec4Arg inV2)
Add two integer vectors (component wise)
Definition UVec4.inl:272
static JPH_INLINE UVec4 sGatherInt4(const uint32 *inBase, UVec4Arg inOffsets)
Gather 4 ints from memory at inBase + inOffsets[i] * Scale.
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:202
static JPH_INLINE UVec4 sEquals(UVec4Arg inV1, UVec4Arg inV2)
Equals (component wise)
Definition UVec4.inl:143
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:174
struct { uint32 mData[4];} Type
Definition UVec4.h:22
JPH_INLINE uint32 GetW() const
Definition UVec4.h:105
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:468
JPH_INLINE UVec4 ShiftComponents4Minus(int inCount) const
Shift vector components by 4 - Count floats to the left, so if Count = 1 the resulting vector is (W,...
Definition UVec4.inl:610
JPH_INLINE bool operator==(UVec4Arg inV2) const
Comparison.
Definition UVec4.inl:23
static JPH_INLINE UVec4 sMax(UVec4Arg inV1, UVec4Arg inV2)
Return the maximum of each of the components.
Definition UVec4.inl:129
JPH_INLINE UVec4 SplatZ() const
Replicate the Z component to all components.
Definition UVec4.inl:334
Type mValue
Definition UVec4.h:223
JPH_INLINE UVec4 SplatW() const
Replicate the W component to all components.
Definition UVec4.inl:345
JPH_INLINE void StoreInt4(uint32 *outV) const
Store 4 ints to memory.
Definition UVec4.inl:406
JPH_INLINE uint32 GetX() const
Get individual components.
Definition UVec4.h:102
JPH_INLINE UVec4 Expand4Byte8() const
Takes byte 8 .. 11 and expands them to X, Y, Z and W.
Definition UVec4.inl:580
static JPH_INLINE UVec4 sLoadInt4Aligned(const uint32 *inV)
Load 4 ints from memory, aligned to 16 bytes.
Definition UVec4.inl:89
static JPH_INLINE UVec4 sLoadInt4(const uint32 *inV)
Load 4 ints from memory.
Definition UVec4.inl:78
JPH_INLINE UVec4 Expand4Byte12() const
Takes byte 12 .. 15 and expands them to X, Y, Z and W.
Definition UVec4.inl:595
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:188
JPH_INLINE UVec4 Expand4Uint16Hi() const
Takes the upper 4 16 bits and expands them to X, Y, Z and W.
Definition UVec4.inl:534
static JPH_INLINE UVec4 sZero()
Vector with all zeros.
Definition UVec4.inl:45
JPH_INLINE uint32 Dot(UVec4Arg inV2) const
Dot product.
Definition UVec4.inl:392
JPH_INLINE UVec4 DotV(UVec4Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition UVec4.inl:378
JPH_INLINE UVec4 ArithmeticShiftRight() const
Shift all components by Count bits to the right (shifting in the value of the highest bit)
UVec4()=default
Constructor.
JPH_INLINE UVec4 operator*(UVec4Arg inV2) const
Component wise multiplication of two integer vectors (stores low 32 bits of result only)
Definition UVec4.inl:244
JPH_INLINE Vec4 ToFloat() const
Convert each component from an int to a float.
Definition UVec4.inl:356
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:367
JPH_INLINE void StoreInt4Aligned(uint32 *outV) const
Store 4 ints to memory, aligned to 16 bytes.
Definition UVec4.inl:418
JPH_INLINE bool TestAnyTrue() const
Test if any of the components are true (true is when highest bit of component is set)
Definition UVec4.inl:453
uint32 mU32[4]
Definition UVec4.h:224
Definition Vec4.h:14