Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
UVec4.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
6
8{
9#if defined(JPH_USE_SSE)
10 mValue = _mm_set_epi32(int(inW), int(inZ), int(inY), int(inX));
11#elif defined(JPH_USE_NEON)
12 uint32x2_t xy = vcreate_u32(static_cast<uint64>(inX) | (static_cast<uint64>(inY) << 32));
13 uint32x2_t zw = vcreate_u32(static_cast<uint64>(inZ) | (static_cast<uint64>(inW) << 32));
14 mValue = vcombine_u32(xy, zw);
15#elif defined(JPH_USE_RVV)
16 vuint32m1_t v = __riscv_vmv_v_x_u32m1(inW, 4);
17 v = __riscv_vslide1up_vx_u32m1(v, inZ, 4);
18 v = __riscv_vslide1up_vx_u32m1(v, inY, 4);
19 v = __riscv_vslide1up_vx_u32m1(v, inX, 4);
20 __riscv_vse32_v_u32m1(mU32, v, 4);
21#else
22 mU32[0] = inX;
23 mU32[1] = inY;
24 mU32[2] = inZ;
25 mU32[3] = inW;
26#endif
27}
28
30{
31 return sEquals(*this, inV2).TestAllTrue();
32}
33
34template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
36{
37 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
38 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
39 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
40 static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
41
42#if defined(JPH_USE_SSE)
43 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
44#elif defined(JPH_USE_NEON)
45 return JPH_NEON_SHUFFLE_U32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
46#elif defined(JPH_USE_RVV)
47 UVec4 v;
48 const vuint32m1_t data = __riscv_vle32_v_u32m1(mU32, 4);
49 const uint32 stored_indices[4] = { SwizzleX, SwizzleY, SwizzleZ, SwizzleW };
50 const vuint32m1_t index = __riscv_vle32_v_u32m1(stored_indices, 4);
51 const vuint32m1_t swizzled = __riscv_vrgather_vv_u32m1(data, index, 4);
52 __riscv_vse32_v_u32m1(v.mU32, swizzled, 4);
53 return v;
54#else
55 return UVec4(mU32[SwizzleX], mU32[SwizzleY], mU32[SwizzleZ], mU32[SwizzleW]);
56#endif
57}
58
60{
61#if defined(JPH_USE_SSE)
62 return _mm_setzero_si128();
63#elif defined(JPH_USE_NEON)
64 return vdupq_n_u32(0);
65#elif defined(JPH_USE_RVV)
66 UVec4 v;
67 const vuint32m1_t zero_vec = __riscv_vmv_v_x_u32m1(0, 4);
68 __riscv_vse32_v_u32m1(v.mU32, zero_vec, 4);
69 return v;
70#else
71 return UVec4(0, 0, 0, 0);
72#endif
73}
74
76{
77#if defined(JPH_USE_SSE)
78 return _mm_set1_epi32(int(inV));
79#elif defined(JPH_USE_NEON)
80 return vdupq_n_u32(inV);
81#elif defined(JPH_USE_RVV)
82 UVec4 vec;
83 const vuint32m1_t v = __riscv_vmv_v_x_u32m1(inV, 4);
84 __riscv_vse32_v_u32m1(vec.mU32, v, 4);
85 return vec;
86#else
87 return UVec4(inV, inV, inV, inV);
88#endif
89}
90
92{
93#if defined(JPH_USE_SSE)
94 return _mm_castps_si128(_mm_load_ss(reinterpret_cast<const float*>(inV)));
95#elif defined(JPH_USE_NEON)
96 return vsetq_lane_u32(*inV, vdupq_n_u32(0), 0);
97#else
98 return UVec4(*inV, 0, 0, 0);
99#endif
100}
101
103{
104#if defined(JPH_USE_SSE)
105 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(inV));
106#elif defined(JPH_USE_NEON)
107 return vld1q_u32(inV);
108#elif defined(JPH_USE_RVV)
109 UVec4 vector;
110 const vuint32m1_t v = __riscv_vle32_v_u32m1(inV, 4);
111 __riscv_vse32_v_u32m1(vector.mU32, v, 4);
112 return vector;
113#else
114 return UVec4(inV[0], inV[1], inV[2], inV[3]);
115#endif
116}
117
119{
120#if defined(JPH_USE_SSE)
121 return _mm_load_si128(reinterpret_cast<const __m128i *>(inV));
122#elif defined(JPH_USE_NEON)
123 return vld1q_u32(inV); // ARM doesn't make distinction between aligned or not
124#elif defined(JPH_USE_RVV)
125 UVec4 vector;
126 const vuint32m1_t v = __riscv_vle32_v_u32m1(inV, 4);
127 __riscv_vse32_v_u32m1(vector.mU32, v, 4);
128 return vector;
129#else
130 return UVec4(inV[0], inV[1], inV[2], inV[3]);
131#endif
132}
133
134template <const int Scale>
135UVec4 UVec4::sGatherInt4(const uint32 *inBase, UVec4Arg inOffsets)
136{
137#ifdef JPH_USE_AVX2
138 return _mm_i32gather_epi32(reinterpret_cast<const int *>(inBase), inOffsets.mValue, Scale);
139#elif defined(JPH_USE_RVV)
140 UVec4 v;
141 const vuint32m1_t offsets = __riscv_vle32_v_u32m1(inOffsets.mU32, 4);
142 const vuint32m1_t scaled_offsets = __riscv_vmul_vx_u32m1(offsets, Scale, 4);
143 const vuint32m1_t gathered = __riscv_vluxei32_v_u32m1(inBase, scaled_offsets, 4);
144 __riscv_vse32_v_u32m1(v.mU32, gathered, 4);
145 return v;
146#else
147 const uint8 *base = reinterpret_cast<const uint8 *>(inBase);
148 uint32 x = *reinterpret_cast<const uint32 *>(base + inOffsets.GetX() * Scale);
149 uint32 y = *reinterpret_cast<const uint32 *>(base + inOffsets.GetY() * Scale);
150 uint32 z = *reinterpret_cast<const uint32 *>(base + inOffsets.GetZ() * Scale);
151 uint32 w = *reinterpret_cast<const uint32 *>(base + inOffsets.GetW() * Scale);
152 return UVec4(x, y, z, w);
153#endif
154}
155
157{
158#if defined(JPH_USE_SSE4_1)
159 return _mm_min_epu32(inV1.mValue, inV2.mValue);
160#elif defined(JPH_USE_NEON)
161 return vminq_u32(inV1.mValue, inV2.mValue);
162#elif defined(JPH_USE_RVV)
163 UVec4 res;
164 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(inV1.mU32, 4);
165 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
166 const vuint32m1_t min = __riscv_vminu_vv_u32m1(v1, v2, 4);
167 __riscv_vse32_v_u32m1(res.mU32, min, 4);
168 return res;
169#else
170 UVec4 result;
171 for (int i = 0; i < 4; i++)
172 result.mU32[i] = min(inV1.mU32[i], inV2.mU32[i]);
173 return result;
174#endif
175}
176
178{
179#if defined(JPH_USE_SSE4_1)
180 return _mm_max_epu32(inV1.mValue, inV2.mValue);
181#elif defined(JPH_USE_NEON)
182 return vmaxq_u32(inV1.mValue, inV2.mValue);
183#elif defined(JPH_USE_RVV)
184 UVec4 res;
185 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(inV1.mU32, 4);
186 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
187 const vuint32m1_t max = __riscv_vmaxu_vv_u32m1(v1, v2, 4);
188 __riscv_vse32_v_u32m1(res.mU32, max, 4);
189 return res;
190#else
191 UVec4 result;
192 for (int i = 0; i < 4; i++)
193 result.mU32[i] = max(inV1.mU32[i], inV2.mU32[i]);
194 return result;
195#endif
196}
197
199{
200#if defined(JPH_USE_SSE)
201 return _mm_cmpeq_epi32(inV1.mValue, inV2.mValue);
202#elif defined(JPH_USE_NEON)
203 return vceqq_u32(inV1.mValue, inV2.mValue);
204#elif defined(JPH_USE_RVV)
205 UVec4 res;
206 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(inV1.mU32, 4);
207 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
208 const vbool32_t mask = __riscv_vmseq_vv_u32m1_b32(v1, v2, 4);
209 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 4);
210 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 4);
211 __riscv_vse32_v_u32m1(res.mU32, merged, 4);
212 return res;
213#else
214 return UVec4(inV1.mU32[0] == inV2.mU32[0]? 0xffffffffu : 0,
215 inV1.mU32[1] == inV2.mU32[1]? 0xffffffffu : 0,
216 inV1.mU32[2] == inV2.mU32[2]? 0xffffffffu : 0,
217 inV1.mU32[3] == inV2.mU32[3]? 0xffffffffu : 0);
218#endif
219}
220
221UVec4 UVec4::sSelect(UVec4Arg inNotSet, UVec4Arg inSet, UVec4Arg inControl)
222{
223#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM) // _mm_blendv_ps has problems on FireFox
224 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(inNotSet.mValue), _mm_castsi128_ps(inSet.mValue), _mm_castsi128_ps(inControl.mValue)));
225#elif defined(JPH_USE_SSE)
226 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.mValue, 31));
227 return _mm_castps_si128(_mm_or_ps(_mm_and_ps(is_set, _mm_castsi128_ps(inSet.mValue)), _mm_andnot_ps(is_set, _mm_castsi128_ps(inNotSet.mValue))));
228#elif defined(JPH_USE_NEON)
229 return vbslq_u32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.mValue), 31)), inSet.mValue, inNotSet.mValue);
230#elif defined(JPH_USE_RVV)
231 UVec4 masked;
232 const vuint32m1_t control = __riscv_vle32_v_u32m1(inControl.mU32, 4);
233 const vuint32m1_t not_set = __riscv_vle32_v_u32m1(inNotSet.mU32, 4);
234 const vuint32m1_t set = __riscv_vle32_v_u32m1(inSet.mU32, 4);
235
236 // Generate RVV bool mask from UVec4
237 const vuint32m1_t r = __riscv_vand_vx_u32m1(control, 0x80000000u, 4);
238 const vbool32_t rvv_mask = __riscv_vmsne_vx_u32m1_b32(r, 0x0, 4);
239 const vuint32m1_t merged = __riscv_vmerge_vvm_u32m1(not_set, set, rvv_mask, 4);
240 __riscv_vse32_v_u32m1(masked.mU32, merged, 4);
241 return masked;
242#else
243 UVec4 result;
244 for (int i = 0; i < 4; i++)
245 result.mU32[i] = (inControl.mU32[i] & 0x80000000u) ? inSet.mU32[i] : inNotSet.mU32[i];
246 return result;
247#endif
248}
249
251{
252#if defined(JPH_USE_SSE)
253 return _mm_or_si128(inV1.mValue, inV2.mValue);
254#elif defined(JPH_USE_NEON)
255 return vorrq_u32(inV1.mValue, inV2.mValue);
256#elif defined(JPH_USE_RVV)
257 UVec4 or_result;
258 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(inV1.mU32, 4);
259 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
260 const vuint32m1_t res = __riscv_vor_vv_u32m1(v1, v2, 4);
261 __riscv_vse32_v_u32m1(or_result.mU32, res, 4);
262 return or_result;
263#else
264 return UVec4(inV1.mU32[0] | inV2.mU32[0],
265 inV1.mU32[1] | inV2.mU32[1],
266 inV1.mU32[2] | inV2.mU32[2],
267 inV1.mU32[3] | inV2.mU32[3]);
268#endif
269}
270
272{
273#if defined(JPH_USE_SSE)
274 return _mm_xor_si128(inV1.mValue, inV2.mValue);
275#elif defined(JPH_USE_NEON)
276 return veorq_u32(inV1.mValue, inV2.mValue);
277#elif defined(JPH_USE_RVV)
278 UVec4 xor_result;
279 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(inV1.mU32, 4);
280 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
281 const vuint32m1_t res = __riscv_vxor_vv_u32m1(v1, v2, 4);
282 __riscv_vse32_v_u32m1(xor_result.mU32, res, 4);
283 return xor_result;
284#else
285 return UVec4(inV1.mU32[0] ^ inV2.mU32[0],
286 inV1.mU32[1] ^ inV2.mU32[1],
287 inV1.mU32[2] ^ inV2.mU32[2],
288 inV1.mU32[3] ^ inV2.mU32[3]);
289#endif
290}
291
293{
294#if defined(JPH_USE_SSE)
295 return _mm_and_si128(inV1.mValue, inV2.mValue);
296#elif defined(JPH_USE_NEON)
297 return vandq_u32(inV1.mValue, inV2.mValue);
298#elif defined(JPH_USE_RVV)
299 UVec4 and_result;
300 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(inV1.mU32, 4);
301 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
302 const vuint32m1_t res = __riscv_vand_vv_u32m1(v1, v2, 4);
303 __riscv_vse32_v_u32m1(and_result.mU32, res, 4);
304 return and_result;
305#else
306 return UVec4(inV1.mU32[0] & inV2.mU32[0],
307 inV1.mU32[1] & inV2.mU32[1],
308 inV1.mU32[2] & inV2.mU32[2],
309 inV1.mU32[3] & inV2.mU32[3]);
310#endif
311}
312
313
315{
316#if defined(JPH_USE_AVX512)
317 return _mm_ternarylogic_epi32(inV1.mValue, inV1.mValue, inV1.mValue, 0b01010101);
318#elif defined(JPH_USE_SSE)
319 return sXor(inV1, sReplicate(0xffffffff));
320#elif defined(JPH_USE_NEON)
321 return vmvnq_u32(inV1.mValue);
322#elif defined(JPH_USE_RVV)
323 UVec4 v;
324 const vuint32m1_t src = __riscv_vle32_v_u32m1(inV1.mU32, 4);
325 const vuint32m1_t rvv_not = __riscv_vxor_vx_u32m1(src, -1, 4);
326 __riscv_vse32_v_u32m1(v.mU32, rvv_not, 4);
327 return v;
328#else
329 return UVec4(~inV1.mU32[0], ~inV1.mU32[1], ~inV1.mU32[2], ~inV1.mU32[3]);
330#endif
331}
332
334{
335 // If inValue.z is false then shift W to Z
336 UVec4 v = UVec4::sSelect(inIndex.Swizzle<SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W, SWIZZLE_W>(), inIndex, inValue.SplatZ());
337
338 // If inValue.y is false then shift Z and further to Y and further
340
341 // If inValue.x is false then shift X and further to Y and further
343
344 return v;
345}
346
348{
349#if defined(JPH_USE_SSE4_1)
350 return _mm_mullo_epi32(mValue, inV2.mValue);
351#elif defined(JPH_USE_NEON)
352 return vmulq_u32(mValue, inV2.mValue);
353#elif defined(JPH_USE_RVV)
354 UVec4 res;
355 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(mU32, 4);
356 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
357 const vuint32m1_t mul = __riscv_vmul_vv_u32m1(v1, v2, 4);
358 __riscv_vse32_v_u32m1(res.mU32, mul, 4);
359 return res;
360#else
361 UVec4 result;
362 for (int i = 0; i < 4; i++)
363 result.mU32[i] = mU32[i] * inV2.mU32[i];
364 return result;
365#endif
366}
367
369{
370#if defined(JPH_USE_SSE)
371 return _mm_add_epi32(mValue, inV2.mValue);
372#elif defined(JPH_USE_NEON)
373 return vaddq_u32(mValue, inV2.mValue);
374#elif defined(JPH_USE_RVV)
375 UVec4 res;
376 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(mU32, 4);
377 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
378 const vuint32m1_t rvv_add = __riscv_vadd_vv_u32m1(v1, v2, 4);
379 __riscv_vse32_v_u32m1(res.mU32, rvv_add, 4);
380 return res;
381#else
382 return UVec4(mU32[0] + inV2.mU32[0],
383 mU32[1] + inV2.mU32[1],
384 mU32[2] + inV2.mU32[2],
385 mU32[3] + inV2.mU32[3]);
386#endif
387}
388
390{
391#if defined(JPH_USE_SSE)
392 mValue = _mm_add_epi32(mValue, inV2.mValue);
393#elif defined(JPH_USE_NEON)
394 mValue = vaddq_u32(mValue, inV2.mValue);
395#elif defined(JPH_USE_RVV)
396 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(mU32, 4);
397 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
398 const vuint32m1_t rvv_add = __riscv_vadd_vv_u32m1(v1, v2, 4);
399 __riscv_vse32_v_u32m1(mU32, rvv_add, 4);
400#else
401 for (int i = 0; i < 4; ++i)
402 mU32[i] += inV2.mU32[i];
403#endif
404 return *this;
405}
406
408{
409#if defined(JPH_USE_SSE)
410 return _mm_sub_epi32(mValue, inV2.mValue);
411#elif defined(JPH_USE_NEON)
412 return vsubq_u32(mValue, inV2.mValue);
413#elif defined(JPH_USE_RVV)
414 UVec4 res;
415 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(mU32, 4);
416 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
417 const vuint32m1_t rvv_add = __riscv_vsub_vv_u32m1(v1, v2, 4);
418 __riscv_vse32_v_u32m1(res.mU32, rvv_add, 4);
419 return res;
420#else
421 return UVec4(mU32[0] - inV2.mU32[0],
422 mU32[1] - inV2.mU32[1],
423 mU32[2] - inV2.mU32[2],
424 mU32[3] - inV2.mU32[3]);
425#endif
426}
427
429{
430#if defined(JPH_USE_SSE)
431 mValue = _mm_sub_epi32(mValue, inV2.mValue);
432#elif defined(JPH_USE_NEON)
433 mValue = vsubq_u32(mValue, inV2.mValue);
434#elif defined(JPH_USE_RVV)
435 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(mU32, 4);
436 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
437 const vuint32m1_t rvv_sub = __riscv_vsub_vv_u32m1(v1, v2, 4);
438 __riscv_vse32_v_u32m1(mU32, rvv_sub, 4);
439#else
440 for (int i = 0; i < 4; ++i)
441 mU32[i] -= inV2.mU32[i];
442#endif
443 return *this;
444}
445
447{
448#if defined(JPH_USE_SSE)
449 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(0, 0, 0, 0));
450#elif defined(JPH_USE_NEON)
451 return vdupq_laneq_u32(mValue, 0);
452#elif defined(JPH_USE_RVV)
453 UVec4 vec;
454 const vuint32m1_t splat = __riscv_vmv_v_x_u32m1(mU32[0], 4);
455 __riscv_vse32_v_u32m1(vec.mU32, splat, 4);
456 return vec;
457#else
458 return UVec4(mU32[0], mU32[0], mU32[0], mU32[0]);
459#endif
460}
461
463{
464#if defined(JPH_USE_SSE)
465 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(1, 1, 1, 1));
466#elif defined(JPH_USE_NEON)
467 return vdupq_laneq_u32(mValue, 1);
468#elif defined(JPH_USE_RVV)
469 UVec4 vec;
470 const vuint32m1_t splat = __riscv_vmv_v_x_u32m1(mU32[1], 4);
471 __riscv_vse32_v_u32m1(vec.mU32, splat, 4);
472 return vec;
473#else
474 return UVec4(mU32[1], mU32[1], mU32[1], mU32[1]);
475#endif
476}
477
479{
480#if defined(JPH_USE_SSE)
481 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(2, 2, 2, 2));
482#elif defined(JPH_USE_NEON)
483 return vdupq_laneq_u32(mValue, 2);
484#elif defined(JPH_USE_RVV)
485 UVec4 vec;
486 const vuint32m1_t splat = __riscv_vmv_v_x_u32m1(mU32[2], 4);
487 __riscv_vse32_v_u32m1(vec.mU32, splat, 4);
488 return vec;
489#else
490 return UVec4(mU32[2], mU32[2], mU32[2], mU32[2]);
491#endif
492}
493
495{
496#if defined(JPH_USE_SSE)
497 return _mm_shuffle_epi32(mValue, _MM_SHUFFLE(3, 3, 3, 3));
498#elif defined(JPH_USE_NEON)
499 return vdupq_laneq_u32(mValue, 3);
500#elif defined(JPH_USE_RVV)
501 UVec4 vec;
502 const vuint32m1_t splat = __riscv_vmv_v_x_u32m1(mU32[3], 4);
503 __riscv_vse32_v_u32m1(vec.mU32, splat, 4);
504 return vec;
505#else
506 return UVec4(mU32[3], mU32[3], mU32[3], mU32[3]);
507#endif
508}
509
511{
512#if defined(JPH_USE_SSE)
513 return _mm_cvtepi32_ps(mValue);
514#elif defined(JPH_USE_NEON)
515 return vcvtq_f32_u32(mValue);
516#elif defined(JPH_USE_RVV)
517 Vec4 res;
518 const vuint32m1_t v = __riscv_vle32_v_u32m1(mU32, 4);
519 const vfloat32m1_t v_float = __riscv_vfcvt_f_xu_v_f32m1(v, 4);
520 __riscv_vse32_v_f32m1(res.mF32, v_float, 4);
521 return res;
522#else
523 return Vec4((float)mU32[0], (float)mU32[1], (float)mU32[2], (float)mU32[3]);
524#endif
525}
526
528{
529#if defined(JPH_USE_SSE)
530 return Vec4(_mm_castsi128_ps(mValue));
531#elif defined(JPH_USE_NEON)
532 return vreinterpretq_f32_u32(mValue);
533#else
534 return *reinterpret_cast<const Vec4 *>(this);
535#endif
536}
537
539{
540#if defined(JPH_USE_SSE4_1)
541 __m128i mul = _mm_mullo_epi32(mValue, inV2.mValue);
542 __m128i sum = _mm_add_epi32(mul, _mm_shuffle_epi32(mul, _MM_SHUFFLE(2, 3, 0, 1)));
543 return _mm_add_epi32(sum, _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)));
544#elif defined(JPH_USE_NEON)
545 uint32x4_t mul = vmulq_u32(mValue, inV2.mValue);
546 return vdupq_n_u32(vaddvq_u32(mul));
547#elif defined(JPH_USE_RVV)
548 UVec4 res;
549 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0, 4);
550 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(mU32, 4);
551 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
552 const vuint32m1_t mul = __riscv_vmul_vv_u32m1(v1, v2, 4);
553 const vuint32m1_t sum = __riscv_vredsum_vs_u32m1_u32m1(mul, zeros, 4);
554 const vuint32m1_t splat = __riscv_vrgather_vx_u32m1(sum, 0, 4);
555 __riscv_vse32_v_u32m1(res.mU32, splat, 4);
556 return res;
557
558#else
559 return UVec4::sReplicate(mU32[0] * inV2.mU32[0] + mU32[1] * inV2.mU32[1] + mU32[2] * inV2.mU32[2] + mU32[3] * inV2.mU32[3]);
560#endif
561}
562
564{
565#if defined(JPH_USE_SSE4_1)
566 __m128i mul = _mm_mullo_epi32(mValue, inV2.mValue);
567 __m128i sum = _mm_add_epi32(mul, _mm_shuffle_epi32(mul, _MM_SHUFFLE(2, 3, 0, 1)));
568 return _mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2))));
569#elif defined(JPH_USE_NEON)
570 uint32x4_t mul = vmulq_u32(mValue, inV2.mValue);
571 return vaddvq_u32(mul);
572#elif defined(JPH_USE_RVV)
573 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0, 4);
574 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(mU32, 4);
575 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(inV2.mU32, 4);
576 const vuint32m1_t mul = __riscv_vmul_vv_u32m1(v1, v2, 4);
577 const vuint32m1_t sum = __riscv_vredsum_vs_u32m1_u32m1(mul, zeros, 4);
578 return __riscv_vmv_x_s_u32m1_u32(sum);
579#else
580 return mU32[0] * inV2.mU32[0] + mU32[1] * inV2.mU32[1] + mU32[2] * inV2.mU32[2] + mU32[3] * inV2.mU32[3];
581#endif
582}
583
584void UVec4::StoreInt4(uint32 *outV) const
585{
586#if defined(JPH_USE_SSE)
587 _mm_storeu_si128(reinterpret_cast<__m128i *>(outV), mValue);
588#elif defined(JPH_USE_NEON)
589 vst1q_u32(outV, mValue);
590#elif defined(JPH_USE_RVV)
591 const vuint32m1_t v = __riscv_vle32_v_u32m1(mU32, 4);
592 __riscv_vse32_v_u32m1(outV, v, 4);
593#else
594 for (int i = 0; i < 4; ++i)
595 outV[i] = mU32[i];
596#endif
597}
598
600{
601#if defined(JPH_USE_SSE)
602 _mm_store_si128(reinterpret_cast<__m128i *>(outV), mValue);
603#elif defined(JPH_USE_NEON)
604 vst1q_u32(outV, mValue); // ARM doesn't make distinction between aligned or not
605#elif defined(JPH_USE_RVV)
606 const vuint32m1_t v = __riscv_vle32_v_u32m1(mU32, 4);
607 __riscv_vse32_v_u32m1(outV, v, 4);
608#else
609 for (int i = 0; i < 4; ++i)
610 outV[i] = mU32[i];
611#endif
612}
613
615{
616#if defined(JPH_USE_SSE)
617 return CountBits(_mm_movemask_ps(_mm_castsi128_ps(mValue)));
618#elif defined(JPH_USE_NEON)
619 return vaddvq_u32(vshrq_n_u32(mValue, 31));
620#elif defined(JPH_USE_RVV)
621 const vuint32m1_t src = __riscv_vle32_v_u32m1(mU32, 4);
622 const vuint32m1_t filter = __riscv_vand_vx_u32m1(src, 0x80000000, 4);
623 const vbool32_t mask = __riscv_vmsne_vx_u32m1_b32(filter, 0, 4);
624 return __riscv_vcpop_m_b32(mask, 4);
625#else
626 return (mU32[0] >> 31) + (mU32[1] >> 31) + (mU32[2] >> 31) + (mU32[3] >> 31);
627#endif
628}
629
631{
632#if defined(JPH_USE_SSE)
633 return _mm_movemask_ps(_mm_castsi128_ps(mValue));
634#elif defined(JPH_USE_NEON)
635 int32x4_t shift = JPH_NEON_INT32x4(0, 1, 2, 3);
636 return vaddvq_u32(vshlq_u32(vshrq_n_u32(mValue, 31), shift));
637#elif defined(JPH_USE_RVV)
638 const vuint32m1_t src = __riscv_vle32_v_u32m1(mU32, 4);
639 const vbool32_t mask = __riscv_vmsgeu_vx_u32m1_b32(src, 0x80000000, 4);
640 const vuint32m1_t as_int = __riscv_vreinterpret_v_b32_u32m1(mask);
641 const uint32 result = __riscv_vmv_x_s_u32m1_u32(as_int) & 0xF;
642 return result;
643#else
644 return (mU32[0] >> 31) | ((mU32[1] >> 31) << 1) | ((mU32[2] >> 31) << 2) | ((mU32[3] >> 31) << 3);
645#endif
646}
647
649{
650 return GetTrues() != 0;
651}
652
654{
655 return (GetTrues() & 0b111) != 0;
656}
657
659{
660 return GetTrues() == 0b1111;
661}
662
664{
665 return (GetTrues() & 0b111) == 0b111;
666}
667
668template <const uint Count>
670{
671 static_assert(Count <= 31, "Invalid shift");
672
673#if defined(JPH_USE_SSE)
674 return _mm_slli_epi32(mValue, Count);
675#elif defined(JPH_USE_NEON)
676 return vshlq_n_u32(mValue, Count);
677#elif defined(JPH_USE_RVV)
678 const vuint32m1_t v = __riscv_vle32_v_u32m1(mU32, 4);
679 const vuint32m1_t shifted = __riscv_vsll_vx_u32m1(v, Count, 4);
680
681 UVec4 vec;
682 __riscv_vse32_v_u32m1(vec.mU32, shifted, 4);
683 return vec;
684#else
685 return UVec4(mU32[0] << Count, mU32[1] << Count, mU32[2] << Count, mU32[3] << Count);
686#endif
687}
688
689template <const uint Count>
691{
692 static_assert(Count <= 31, "Invalid shift");
693
694#if defined(JPH_USE_SSE)
695 return _mm_srli_epi32(mValue, Count);
696#elif defined(JPH_USE_NEON)
697 return vshrq_n_u32(mValue, Count);
698#elif defined(JPH_USE_RVV)
699 const vuint32m1_t v = __riscv_vle32_v_u32m1(mU32, 4);
700 const vuint32m1_t shifted = __riscv_vsrl_vx_u32m1(v, Count, 4);
701
702 UVec4 vec;
703 __riscv_vse32_v_u32m1(vec.mU32, shifted, 4);
704 return vec;
705#else
706 return UVec4(mU32[0] >> Count, mU32[1] >> Count, mU32[2] >> Count, mU32[3] >> Count);
707#endif
708}
709
710template <const uint Count>
712{
713 static_assert(Count <= 31, "Invalid shift");
714
715#if defined(JPH_USE_SSE)
716 return _mm_srai_epi32(mValue, Count);
717#elif defined(JPH_USE_NEON)
718 return vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(mValue), Count));
719#elif defined(JPH_USE_RVV)
720 const vint32m1_t v = __riscv_vle32_v_i32m1(reinterpret_cast<const int32 *>(mU32), 4);
721 const vint32m1_t shifted = __riscv_vsra_vx_i32m1(v, Count, 4);
722
723 UVec4 vec;
724 __riscv_vse32_v_i32m1(reinterpret_cast<int32 *>(vec.mU32), shifted, 4);
725 return vec;
726#else
727 return UVec4(uint32(int32(mU32[0]) >> Count),
728 uint32(int32(mU32[1]) >> Count),
729 uint32(int32(mU32[2]) >> Count),
730 uint32(int32(mU32[3]) >> Count));
731#endif
732}
733
735{
736#if defined(JPH_USE_SSE)
737 return _mm_unpacklo_epi16(mValue, _mm_castps_si128(_mm_setzero_ps()));
738#elif defined(JPH_USE_NEON)
739 uint16x4_t value = vget_low_u16(vreinterpretq_u16_u32(mValue));
740 uint16x4_t zero = vdup_n_u16(0);
741 return vreinterpretq_u32_u16(vcombine_u16(vzip1_u16(value, zero), vzip2_u16(value, zero)));
742#elif defined(JPH_USE_RVV)
743 const vuint16mf2_t v = __riscv_vle16_v_u16mf2(reinterpret_cast<const uint16 *>(mU32), 4);
744 const vuint32m1_t zext = __riscv_vzext_vf2_u32m1(v, 4);
745
746 UVec4 res;
747 __riscv_vse32_v_u32m1(res.mU32, zext, 4);
748 return res;
749#else
750 return UVec4(mU32[0] & 0xffff,
751 (mU32[0] >> 16) & 0xffff,
752 mU32[1] & 0xffff,
753 (mU32[1] >> 16) & 0xffff);
754#endif
755}
756
758{
759#if defined(JPH_USE_SSE)
760 return _mm_unpackhi_epi16(mValue, _mm_castps_si128(_mm_setzero_ps()));
761#elif defined(JPH_USE_NEON)
762 uint16x4_t value = vget_high_u16(vreinterpretq_u16_u32(mValue));
763 uint16x4_t zero = vdup_n_u16(0);
764 return vreinterpretq_u32_u16(vcombine_u16(vzip1_u16(value, zero), vzip2_u16(value, zero)));
765#elif defined(JPH_USE_RVV)
766 const vuint16mf2_t v = __riscv_vle16_v_u16mf2(reinterpret_cast<const uint16 *>(&mU32[2]), 4);
767 const vuint32m1_t zext = __riscv_vzext_vf2_u32m1(v, 4);
768
769 UVec4 res;
770 __riscv_vse32_v_u32m1(res.mU32, zext, 4);
771 return res;
772#else
773 return UVec4(mU32[2] & 0xffff,
774 (mU32[2] >> 16) & 0xffff,
775 mU32[3] & 0xffff,
776 (mU32[3] >> 16) & 0xffff);
777#endif
778}
779
781{
782#if defined(JPH_USE_SSE4_1)
783 return _mm_shuffle_epi8(mValue, _mm_set_epi32(int(0xffffff03), int(0xffffff02), int(0xffffff01), int(0xffffff00)));
784#elif defined(JPH_USE_NEON)
785 uint8x16_t idx = JPH_NEON_UINT8x16(0x00, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, 0x7f, 0x7f, 0x02, 0x7f, 0x7f, 0x7f, 0x03, 0x7f, 0x7f, 0x7f);
786 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
787#elif defined(JPH_USE_RVV)
788 const vuint8mf4_t v = __riscv_vle8_v_u8mf4(reinterpret_cast<const uint8 *>(mU32), 4);
789 const vuint32m1_t zext = __riscv_vzext_vf4_u32m1(v, 4);
790
791 UVec4 res;
792 __riscv_vse32_v_u32m1(res.mU32, zext, 4);
793 return res;
794#else
795 UVec4 result;
796 for (int i = 0; i < 4; i++)
797 result.mU32[i] = (mU32[0] >> (i * 8)) & 0xff;
798 return result;
799#endif
800}
801
803{
804#if defined(JPH_USE_SSE4_1)
805 return _mm_shuffle_epi8(mValue, _mm_set_epi32(int(0xffffff07), int(0xffffff06), int(0xffffff05), int(0xffffff04)));
806#elif defined(JPH_USE_NEON)
807 uint8x16_t idx = JPH_NEON_UINT8x16(0x04, 0x7f, 0x7f, 0x7f, 0x05, 0x7f, 0x7f, 0x7f, 0x06, 0x7f, 0x7f, 0x7f, 0x07, 0x7f, 0x7f, 0x7f);
808 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
809#elif defined(JPH_USE_RVV)
810 const vuint8mf4_t v = __riscv_vle8_v_u8mf4(reinterpret_cast<const uint8 *>(&mU32[1]), 4);
811 const vuint32m1_t zext = __riscv_vzext_vf4_u32m1(v, 4);
812
813 UVec4 res;
814 __riscv_vse32_v_u32m1(res.mU32, zext, 4);
815 return res;
816#else
817 UVec4 result;
818 for (int i = 0; i < 4; i++)
819 result.mU32[i] = (mU32[1] >> (i * 8)) & 0xff;
820 return result;
821#endif
822}
823
825{
826#if defined(JPH_USE_SSE4_1)
827 return _mm_shuffle_epi8(mValue, _mm_set_epi32(int(0xffffff0b), int(0xffffff0a), int(0xffffff09), int(0xffffff08)));
828#elif defined(JPH_USE_NEON)
829 uint8x16_t idx = JPH_NEON_UINT8x16(0x08, 0x7f, 0x7f, 0x7f, 0x09, 0x7f, 0x7f, 0x7f, 0x0a, 0x7f, 0x7f, 0x7f, 0x0b, 0x7f, 0x7f, 0x7f);
830 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
831#elif defined(JPH_USE_RVV)
832 const vuint8mf4_t v = __riscv_vle8_v_u8mf4(reinterpret_cast<const uint8 *>(&mU32[2]), 4);
833 const vuint32m1_t zext = __riscv_vzext_vf4_u32m1(v, 4);
834
835 UVec4 res;
836 __riscv_vse32_v_u32m1(res.mU32, zext, 4);
837 return res;
838#else
839 UVec4 result;
840 for (int i = 0; i < 4; i++)
841 result.mU32[i] = (mU32[2] >> (i * 8)) & 0xff;
842 return result;
843#endif
844}
845
847{
848#if defined(JPH_USE_SSE4_1)
849 return _mm_shuffle_epi8(mValue, _mm_set_epi32(int(0xffffff0f), int(0xffffff0e), int(0xffffff0d), int(0xffffff0c)));
850#elif defined(JPH_USE_NEON)
851 uint8x16_t idx = JPH_NEON_UINT8x16(0x0c, 0x7f, 0x7f, 0x7f, 0x0d, 0x7f, 0x7f, 0x7f, 0x0e, 0x7f, 0x7f, 0x7f, 0x0f, 0x7f, 0x7f, 0x7f);
852 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
853#elif defined(JPH_USE_RVV)
854 const vuint8mf4_t v = __riscv_vle8_v_u8mf4(reinterpret_cast<const uint8 *>(&mU32[3]), 4);
855 const vuint32m1_t zext = __riscv_vzext_vf4_u32m1(v, 4);
856
857 UVec4 res;
858 __riscv_vse32_v_u32m1(res.mU32, zext, 4);
859 return res;
860#else
861 UVec4 result;
862 for (int i = 0; i < 4; i++)
863 result.mU32[i] = (mU32[3] >> (i * 8)) & 0xff;
864 return result;
865#endif
866}
867
869{
870#if defined(JPH_USE_SSE4_1) || defined(JPH_USE_NEON)
871 alignas(UVec4) static constexpr uint32 sFourMinusXShuffle[5][4] =
872 {
873 { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
874 { 0x0f0e0d0c, 0xffffffff, 0xffffffff, 0xffffffff },
875 { 0x0b0a0908, 0x0f0e0d0c, 0xffffffff, 0xffffffff },
876 { 0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0xffffffff },
877 { 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c }
878 };
879#endif
880
881#if defined(JPH_USE_SSE4_1)
882 return _mm_shuffle_epi8(mValue, *reinterpret_cast<const UVec4::Type *>(sFourMinusXShuffle[inCount]));
883#elif defined(JPH_USE_NEON)
884 uint8x16_t idx = vreinterpretq_u8_u32(*reinterpret_cast<const UVec4::Type *>(sFourMinusXShuffle[inCount]));
885 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(mValue), idx));
886#elif defined(JPH_USE_RVV)
887 const uint32 *start_ptr = mU32 + (4 - inCount);
888 const vuint32m1_t v = __riscv_vle32_v_u32m1(start_ptr, inCount);
889
890 UVec4 res = sZero();
891 __riscv_vse32_v_u32m1(res.mU32, v, inCount);
892 return res;
893#else
894 UVec4 result = UVec4::sZero();
895 for (int i = 0; i < inCount; i++)
896 result.mU32[i] = mU32[i + 4 - inCount];
897 return result;
898#endif
899}
900
std::uint8_t uint8
Definition Core.h:506
std::int32_t int32
Definition Core.h:509
std::uint64_t uint64
Definition Core.h:510
#define JPH_NAMESPACE_END
Definition Core.h:428
std::uint32_t uint32
Definition Core.h:508
#define JPH_NAMESPACE_BEGIN
Definition Core.h:422
std::uint16_t uint16
Definition Core.h:507
#define xy
Definition HLSLToCPP.h:511
uint CountBits(uint32 inValue)
Count the number of 1 bits in a value.
Definition Math.h:164
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_W
Use the W component.
Definition Swizzle.h:15
@ SWIZZLE_X
Use the X component.
Definition Swizzle.h:12
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Definition UVec4.h:12
JPH_INLINE UVec4 operator-(UVec4Arg inV2) const
Subtract two integer vectors (component wise)
Definition UVec4.inl:407
JPH_INLINE UVec4 Swizzle() const
Swizzle the elements in inV.
static JPH_INLINE UVec4 sNot(UVec4Arg inV1)
Logical not (component wise)
Definition UVec4.inl:314
JPH_INLINE uint32 GetZ() const
Definition UVec4.h:104
static JPH_INLINE UVec4 sMin(UVec4Arg inV1, UVec4Arg inV2)
Return the minimum value of each of the components.
Definition UVec4.inl:156
JPH_INLINE UVec4 LogicalShiftLeft() const
Shift all components by Count bits to the left (filling with zeros from the left)
JPH_INLINE int CountTrues() const
Count the number of components that are true (true is when highest bit of component is set)
Definition UVec4.inl:614
JPH_INLINE UVec4 & operator-=(UVec4Arg inV2)
Subtract two integer vectors (component wise)
Definition UVec4.inl:428
JPH_INLINE UVec4 SplatY() const
Replicate the Y component to all components.
Definition UVec4.inl:462
static JPH_INLINE UVec4 sSelect(UVec4Arg inNotSet, UVec4Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition UVec4.inl:221
static JPH_INLINE UVec4 sLoadInt(const uint32 *inV)
Load 1 int from memory and place it in the X component, zeros Y, Z and W.
Definition UVec4.inl:91
JPH_INLINE UVec4 Expand4Uint16Lo() const
Takes the lower 4 16 bits and expands them to X, Y, Z and W.
Definition UVec4.inl:734
static JPH_INLINE UVec4 sSort4True(UVec4Arg inValue, UVec4Arg inIndex)
Definition UVec4.inl:333
JPH_INLINE UVec4 operator+(UVec4Arg inV2) const
Add two integer vectors (component wise)
Definition UVec4.inl:368
JPH_INLINE uint32 GetY() const
Definition UVec4.h:103
JPH_INLINE UVec4 LogicalShiftRight() const
Shift all components by Count bits to the right (filling with zeros from the right)
static JPH_INLINE UVec4 sReplicate(uint32 inV)
Replicate int inV across all components.
Definition UVec4.inl:75
JPH_INLINE UVec4 SplatX() const
Replicate the X component to all components.
Definition UVec4.inl:446
JPH_INLINE UVec4 Expand4Byte4() const
Takes byte 4 .. 7 and expands them to X, Y, Z and W.
Definition UVec4.inl:802
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition UVec4.inl:658
JPH_INLINE UVec4 Expand4Byte0() const
Takes byte 0 .. 3 and expands them to X, Y, Z and W.
Definition UVec4.inl:780
JPH_INLINE int GetTrues() const
Store if X is true in bit 0, Y in bit 1, Z in bit 2 and W in bit 3 (true is when highest bit of compo...
Definition UVec4.inl:630
JPH_INLINE bool TestAnyXYZTrue() const
Test if any of X, Y or Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:653
JPH_INLINE UVec4 & operator+=(UVec4Arg inV2)
Add two integer vectors (component wise)
Definition UVec4.inl:389
static JPH_INLINE UVec4 sGatherInt4(const uint32 *inBase, UVec4Arg inOffsets)
Gather 4 ints from memory at inBase + inOffsets[i] * Scale.
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:292
static JPH_INLINE UVec4 sEquals(UVec4Arg inV1, UVec4Arg inV2)
Equals (component wise)
Definition UVec4.inl:198
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:250
struct { uint32 mData[4];} Type
Definition UVec4.h:22
JPH_INLINE uint32 GetW() const
Definition UVec4.h:105
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:663
JPH_INLINE UVec4 ShiftComponents4Minus(int inCount) const
Shift vector components by 4 - Count floats to the left, so if Count = 1 the resulting vector is (W,...
Definition UVec4.inl:868
JPH_INLINE bool operator==(UVec4Arg inV2) const
Comparison.
Definition UVec4.inl:29
static JPH_INLINE UVec4 sMax(UVec4Arg inV1, UVec4Arg inV2)
Return the maximum of each of the components.
Definition UVec4.inl:177
JPH_INLINE UVec4 SplatZ() const
Replicate the Z component to all components.
Definition UVec4.inl:478
Type mValue
Definition UVec4.h:223
JPH_INLINE UVec4 SplatW() const
Replicate the W component to all components.
Definition UVec4.inl:494
JPH_INLINE void StoreInt4(uint32 *outV) const
Store 4 ints to memory.
Definition UVec4.inl:584
JPH_INLINE uint32 GetX() const
Get individual components.
Definition UVec4.h:102
JPH_INLINE UVec4 Expand4Byte8() const
Takes byte 8 .. 11 and expands them to X, Y, Z and W.
Definition UVec4.inl:824
static JPH_INLINE UVec4 sLoadInt4Aligned(const uint32 *inV)
Load 4 ints from memory, aligned to 16 bytes.
Definition UVec4.inl:118
static JPH_INLINE UVec4 sLoadInt4(const uint32 *inV)
Load 4 ints from memory.
Definition UVec4.inl:102
JPH_INLINE UVec4 Expand4Byte12() const
Takes byte 12 .. 15 and expands them to X, Y, Z and W.
Definition UVec4.inl:846
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:271
JPH_INLINE UVec4 Expand4Uint16Hi() const
Takes the upper 4 16 bits and expands them to X, Y, Z and W.
Definition UVec4.inl:757
static JPH_INLINE UVec4 sZero()
Vector with all zeros.
Definition UVec4.inl:59
JPH_INLINE uint32 Dot(UVec4Arg inV2) const
Dot product.
Definition UVec4.inl:563
JPH_INLINE UVec4 DotV(UVec4Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition UVec4.inl:538
JPH_INLINE UVec4 ArithmeticShiftRight() const
Shift all components by Count bits to the right (shifting in the value of the highest bit)
UVec4()=default
Constructor.
JPH_INLINE UVec4 operator*(UVec4Arg inV2) const
Component wise multiplication of two integer vectors (stores low 32 bits of result only)
Definition UVec4.inl:347
JPH_INLINE Vec4 ToFloat() const
Convert each component from an int to a float.
Definition UVec4.inl:510
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:527
JPH_INLINE void StoreInt4Aligned(uint32 *outV) const
Store 4 ints to memory, aligned to 16 bytes.
Definition UVec4.inl:599
JPH_INLINE bool TestAnyTrue() const
Test if any of the components are true (true is when highest bit of component is set)
Definition UVec4.inl:648
uint32 mU32[4]
Definition UVec4.h:224
Definition Vec4.h:14
float mF32[4]
Definition Vec4.h:312