19#if defined(JPH_USE_SSE4_1)
20 mValue = _mm_blend_ps(inRHS.
mValue, _mm_set1_ps(inW), 8);
21#elif defined(JPH_USE_NEON)
23#elif defined(JPH_USE_RVV)
24 const vfloat32m1_t v = __riscv_vle32_v_f32m1(inRHS.
mF32, 4);
25 __riscv_vse32_v_f32m1(
mF32, v, 4);
28 for (
int i = 0; i < 3; i++)
36#if defined(JPH_USE_SSE)
37 mValue = _mm_set_ps(inW, inZ, inY, inX);
38#elif defined(JPH_USE_NEON)
41 mValue = vreinterpretq_f32_u32(vcombine_u32(
xy, zw));
42#elif defined(JPH_USE_RVV)
43 vfloat32m1_t v = __riscv_vfmv_v_f_f32m1(inW, 4);
44 v = __riscv_vfslide1up_vf_f32m1(v, inZ, 4);
45 v = __riscv_vfslide1up_vf_f32m1(v, inY, 4);
46 v = __riscv_vfslide1up_vf_f32m1(v, inX, 4);
47 __riscv_vse32_v_f32m1(
mF32, v, 4);
56template<u
int32 SwizzleX, u
int32 SwizzleY, u
int32 SwizzleZ, u
int32 SwizzleW>
59 static_assert(SwizzleX <= 3,
"SwizzleX template parameter out of range");
60 static_assert(SwizzleY <= 3,
"SwizzleY template parameter out of range");
61 static_assert(SwizzleZ <= 3,
"SwizzleZ template parameter out of range");
62 static_assert(SwizzleW <= 3,
"SwizzleW template parameter out of range");
64#if defined(JPH_USE_SSE)
65 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
66#elif defined(JPH_USE_NEON)
67 return JPH_NEON_SHUFFLE_F32x4(
mValue,
mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
68#elif defined(JPH_USE_RVV)
70 const vfloat32m1_t data = __riscv_vle32_v_f32m1(
mF32, 4);
71 const uint32 stored_indices[4] = { SwizzleX, SwizzleY, SwizzleZ, SwizzleW };
72 const vuint32m1_t index = __riscv_vle32_v_u32m1(stored_indices, 4);
73 const vfloat32m1_t swizzled = __riscv_vrgather_vv_f32m1(data, index, 4);
74 __riscv_vse32_v_f32m1(v.
mF32, swizzled, 4);
83#if defined(JPH_USE_SSE)
84 return _mm_setzero_ps();
85#elif defined(JPH_USE_NEON)
86 return vdupq_n_f32(0);
87#elif defined(JPH_USE_RVV)
89 const vfloat32m1_t zero_vec = __riscv_vfmv_v_f_f32m1(0.0f, 4);
90 __riscv_vse32_v_f32m1(v.
mF32, zero_vec, 4);
93 return Vec4(0, 0, 0, 0);
99#if defined(JPH_USE_SSE)
100 return _mm_set1_ps(inV);
101#elif defined(JPH_USE_NEON)
102 return vdupq_n_f32(inV);
103#elif defined(JPH_USE_RVV)
105 const vfloat32m1_t v = __riscv_vfmv_v_f_f32m1(inV, 4);
106 __riscv_vse32_v_f32m1(vec.
mF32, v, 4);
109 return Vec4(inV, inV, inV, inV);
120 return sReplicate(numeric_limits<float>::quiet_NaN());
125#if defined(JPH_USE_SSE)
126 return _mm_loadu_ps(&inV->
x);
127#elif defined(JPH_USE_NEON)
128 return vld1q_f32(&inV->
x);
129#elif defined(JPH_USE_RVV)
131 const vfloat32m1_t v = __riscv_vle32_v_f32m1(&inV->
x, 4);
132 __riscv_vse32_v_f32m1(vector.
mF32, v, 4);
135 return Vec4(inV->
x, inV->
y, inV->
z, inV->
w);
141#if defined(JPH_USE_SSE)
142 return _mm_load_ps(&inV->
x);
143#elif defined(JPH_USE_NEON)
144 return vld1q_f32(&inV->
x);
145#elif defined(JPH_USE_RVV)
147 vfloat32m1_t v = __riscv_vle32_v_f32m1(&inV->
x, 4);
148 __riscv_vse32_v_f32m1(vector.
mF32, v, 4);
151 return Vec4(inV->
x, inV->
y, inV->
z, inV->
w);
155template <const
int Scale>
158#if defined(JPH_USE_SSE)
160 return _mm_i32gather_ps(inBase, inOffsets.
mValue, Scale);
162 const uint8 *base =
reinterpret_cast<const uint8 *
>(inBase);
163 Type x = _mm_load_ss(
reinterpret_cast<const float *
>(base + inOffsets.
GetX() * Scale));
164 Type y = _mm_load_ss(
reinterpret_cast<const float *
>(base + inOffsets.
GetY() * Scale));
165 Type xy = _mm_unpacklo_ps(x, y);
166 Type z = _mm_load_ss(
reinterpret_cast<const float *
>(base + inOffsets.
GetZ() * Scale));
167 Type w = _mm_load_ss(
reinterpret_cast<const float *
>(base + inOffsets.
GetW() * Scale));
168 Type zw = _mm_unpacklo_ps(z, w);
169 return _mm_movelh_ps(
xy, zw);
171#elif defined(JPH_USE_RVV)
173 const vuint32m1_t offsets = __riscv_vle32_v_u32m1(inOffsets.
mU32, 4);
174 const vuint32m1_t scaled_offsets = __riscv_vmul_vx_u32m1(offsets, Scale, 4);
175 const vfloat32m1_t gathered = __riscv_vluxei32_v_f32m1(inBase, scaled_offsets, 4);
176 __riscv_vse32_v_f32m1(v.
mF32, gathered, 4);
179 const uint8 *base =
reinterpret_cast<const uint8 *
>(inBase);
180 float x = *
reinterpret_cast<const float *
>(base + inOffsets.
GetX() * Scale);
181 float y = *
reinterpret_cast<const float *
>(base + inOffsets.
GetY() * Scale);
182 float z = *
reinterpret_cast<const float *
>(base + inOffsets.
GetZ() * Scale);
183 float w = *
reinterpret_cast<const float *
>(base + inOffsets.
GetW() * Scale);
184 return Vec4(x, y, z, w);
190#if defined(JPH_USE_SSE)
192#elif defined(JPH_USE_NEON)
194#elif defined(JPH_USE_RVV)
196 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.
mF32, 4);
197 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
198 const vfloat32m1_t min = __riscv_vfmin_vv_f32m1(v1, v2, 4);
199 __riscv_vse32_v_f32m1(res.
mF32, min, 4);
211#if defined(JPH_USE_SSE)
213#elif defined(JPH_USE_NEON)
215#elif defined(JPH_USE_RVV)
217 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.
mF32, 4);
218 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
219 const vfloat32m1_t max = __riscv_vfmax_vv_f32m1(v1, v2, 4);
220 __riscv_vse32_v_f32m1(res.
mF32, max, 4);
232 return sMax(
sMin(inV, inMax), inMin);
237#if defined(JPH_USE_SSE)
238 return _mm_castps_si128(_mm_cmpeq_ps(inV1.
mValue, inV2.
mValue));
239#elif defined(JPH_USE_NEON)
241#elif defined(JPH_USE_RVV)
243 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.
mF32, 4);
244 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
245 const vbool32_t mask = __riscv_vmfeq_vv_f32m1_b32(v1, v2, 4);
246 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 4);
247 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 4);
248 __riscv_vse32_v_u32m1(res.
mU32, merged, 4);
252 inV1.
mF32[1] == inV2.
mF32[1]? 0xffffffffu : 0,
253 inV1.
mF32[2] == inV2.
mF32[2]? 0xffffffffu : 0,
254 inV1.
mF32[3] == inV2.
mF32[3]? 0xffffffffu : 0);
260#if defined(JPH_USE_SSE)
261 return _mm_castps_si128(_mm_cmplt_ps(inV1.
mValue, inV2.
mValue));
262#elif defined(JPH_USE_NEON)
264#elif defined(JPH_USE_RVV)
266 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.
mF32, 4);
267 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
268 const vbool32_t mask = __riscv_vmflt_vv_f32m1_b32(v1, v2, 4);
269 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 4);
270 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 4);
271 __riscv_vse32_v_u32m1(res.
mU32, merged, 4);
275 inV1.
mF32[1] < inV2.
mF32[1]? 0xffffffffu : 0,
276 inV1.
mF32[2] < inV2.
mF32[2]? 0xffffffffu : 0,
277 inV1.
mF32[3] < inV2.
mF32[3]? 0xffffffffu : 0);
283#if defined(JPH_USE_SSE)
284 return _mm_castps_si128(_mm_cmple_ps(inV1.
mValue, inV2.
mValue));
285#elif defined(JPH_USE_NEON)
287#elif defined(JPH_USE_RVV)
289 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.
mF32, 4);
290 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
291 const vbool32_t mask = __riscv_vmfle_vv_f32m1_b32(v1, v2, 4);
292 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 4);
293 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 4);
294 __riscv_vse32_v_u32m1(res.
mU32, merged, 4);
298 inV1.
mF32[1] <= inV2.
mF32[1]? 0xffffffffu : 0,
299 inV1.
mF32[2] <= inV2.
mF32[2]? 0xffffffffu : 0,
300 inV1.
mF32[3] <= inV2.
mF32[3]? 0xffffffffu : 0);
306#if defined(JPH_USE_SSE)
307 return _mm_castps_si128(_mm_cmpgt_ps(inV1.
mValue, inV2.
mValue));
308#elif defined(JPH_USE_NEON)
310#elif defined(JPH_USE_RVV)
312 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.
mF32, 4);
313 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
314 const vbool32_t mask = __riscv_vmfgt_vv_f32m1_b32(v1, v2, 4);
315 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 4);
316 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 4);
317 __riscv_vse32_v_u32m1(res.
mU32, merged, 4);
321 inV1.
mF32[1] > inV2.
mF32[1]? 0xffffffffu : 0,
322 inV1.
mF32[2] > inV2.
mF32[2]? 0xffffffffu : 0,
323 inV1.
mF32[3] > inV2.
mF32[3]? 0xffffffffu : 0);
329#if defined(JPH_USE_SSE)
330 return _mm_castps_si128(_mm_cmpge_ps(inV1.
mValue, inV2.
mValue));
331#elif defined(JPH_USE_NEON)
333#elif defined(JPH_USE_RVV)
335 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV1.
mF32, 4);
336 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
337 const vbool32_t mask = __riscv_vmfge_vv_f32m1_b32(v1, v2, 4);
338 const vuint32m1_t zeros = __riscv_vmv_v_x_u32m1(0x0, 4);
339 const vuint32m1_t merged = __riscv_vmerge_vxm_u32m1(zeros, 0xFFFFFFFF, mask, 4);
340 __riscv_vse32_v_u32m1(res.
mU32, merged, 4);
344 inV1.
mF32[1] >= inV2.
mF32[1]? 0xffffffffu : 0,
345 inV1.
mF32[2] >= inV2.
mF32[2]? 0xffffffffu : 0,
346 inV1.
mF32[3] >= inV2.
mF32[3]? 0xffffffffu : 0);
352#if defined(JPH_USE_SSE)
358#elif defined(JPH_USE_NEON)
360#elif defined(JPH_USE_RVV)
362 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inMul1.
mF32, 4);
363 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inMul2.
mF32, 4);
364 const vfloat32m1_t rvv_add = __riscv_vle32_v_f32m1(inAdd.
mF32, 4);
365 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(v1, v2, 4);
366 const vfloat32m1_t fmadd = __riscv_vfadd_vv_f32m1(rvv_add, mul, 4);
367 __riscv_vse32_v_f32m1(res.
mF32, fmadd, 4);
379#if defined(JPH_USE_SSE4_1) && !defined(JPH_PLATFORM_WASM)
380 return _mm_blendv_ps(inNotSet.
mValue, inSet.
mValue, _mm_castsi128_ps(inControl.
mValue));
381#elif defined(JPH_USE_SSE)
382 __m128 is_set = _mm_castsi128_ps(_mm_srai_epi32(inControl.
mValue, 31));
383 return _mm_or_ps(_mm_and_ps(is_set, inSet.
mValue), _mm_andnot_ps(is_set, inNotSet.
mValue));
384#elif defined(JPH_USE_NEON)
385 return vbslq_f32(vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(inControl.
mValue), 31)), inSet.
mValue, inNotSet.
mValue);
386#elif defined(JPH_USE_RVV)
388 const vuint32m1_t control = __riscv_vle32_v_u32m1(inControl.
mU32, 4);
389 const vfloat32m1_t not_set = __riscv_vle32_v_f32m1(inNotSet.
mF32, 4);
390 const vfloat32m1_t set = __riscv_vle32_v_f32m1(inSet.
mF32, 4);
393 const vuint32m1_t r = __riscv_vand_vx_u32m1(control, 0x80000000u, 4);
394 const vbool32_t rvv_mask = __riscv_vmsne_vx_u32m1_b32(r, 0x0, 4);
395 const vfloat32m1_t merged = __riscv_vmerge_vvm_f32m1(not_set, set, rvv_mask, 4);
396 __riscv_vse32_v_f32m1(masked.
mF32, merged, 4);
400 for (
int i = 0; i < 4; i++)
401 result.
mF32[i] = (inControl.
mU32[i] & 0x80000000u) ? inSet.
mF32[i] : inNotSet.
mF32[i];
408#if defined(JPH_USE_SSE)
410#elif defined(JPH_USE_NEON)
411 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(inV1.
mValue), vreinterpretq_u32_f32(inV2.
mValue)));
412#elif defined(JPH_USE_RVV)
414 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(
reinterpret_cast<const uint32 *
>(inV1.
mF32), 4);
415 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(
reinterpret_cast<const uint32 *
>(inV2.
mF32), 4);
416 const vuint32m1_t res = __riscv_vor_vv_u32m1(v1, v2, 4);
417 __riscv_vse32_v_u32m1(
reinterpret_cast<uint32 *
>(or_result.
mF32), res, 4);
426#if defined(JPH_USE_SSE)
428#elif defined(JPH_USE_NEON)
429 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(inV1.
mValue), vreinterpretq_u32_f32(inV2.
mValue)));
430#elif defined(JPH_USE_RVV)
432 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(
reinterpret_cast<const uint32 *
>(inV1.
mF32), 4);
433 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(
reinterpret_cast<const uint32 *
>(inV2.
mF32), 4);
434 const vuint32m1_t res = __riscv_vxor_vv_u32m1(v1, v2, 4);
435 __riscv_vse32_v_u32m1(
reinterpret_cast<uint32 *
>(xor_result.
mF32), res, 4);
444#if defined(JPH_USE_SSE)
446#elif defined(JPH_USE_NEON)
447 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(inV1.
mValue), vreinterpretq_u32_f32(inV2.
mValue)));
448#elif defined(JPH_USE_RVV)
450 const vuint32m1_t v1 = __riscv_vle32_v_u32m1(
reinterpret_cast<const uint32 *
>(inV1.
mF32), 4);
451 const vuint32m1_t v2 = __riscv_vle32_v_u32m1(
reinterpret_cast<const uint32 *
>(inV2.
mF32), 4);
452 const vuint32m1_t res = __riscv_vand_vv_u32m1(v1, v2, 4);
453 __riscv_vse32_v_u32m1(
reinterpret_cast<uint32 *
>(and_result.
mF32), res, 4);
466 ioValue =
sSelect(ioValue, v1, c1);
473 ioValue =
sSelect(ioValue, v2, c2);
480 ioValue =
sSelect(ioValue, v3, c3);
490 ioValue =
sSelect(ioValue, v1, c1);
497 ioValue =
sSelect(ioValue, v2, c2);
504 ioValue =
sSelect(ioValue, v3, c3);
515 return (inV2 - *
this).LengthSq() <= inMaxDistSq;
525 return abs(
LengthSq() - 1.0f) <= inTolerance;
530#if defined(JPH_USE_AVX512)
531 return _mm_fpclass_ps_mask(
mValue, 0b10000001) != 0;
532#elif defined(JPH_USE_SSE)
533 return _mm_movemask_ps(_mm_cmpunord_ps(
mValue,
mValue)) != 0;
534#elif defined(JPH_USE_NEON)
536 return vaddvq_u32(vshrq_n_u32(is_equal, 31)) != 4;
537#elif defined(JPH_USE_RVV)
538 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
539 const vbool32_t mask = __riscv_vmfeq_vv_f32m1_b32(v, v, 4);
540 const uint32 eq = __riscv_vcpop_m_b32(mask, 4);
543 return isnan(
mF32[0]) || isnan(
mF32[1]) || isnan(
mF32[2]) || isnan(
mF32[3]);
549#if defined(JPH_USE_SSE)
551#elif defined(JPH_USE_NEON)
553#elif defined(JPH_USE_RVV)
555 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
556 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
557 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(v1, v2, 4);
558 __riscv_vse32_v_f32m1(res.
mF32, mul, 4);
570#if defined(JPH_USE_SSE)
571 return _mm_mul_ps(
mValue, _mm_set1_ps(inV2));
572#elif defined(JPH_USE_NEON)
573 return vmulq_n_f32(
mValue, inV2);
574#elif defined(JPH_USE_RVV)
576 const vfloat32m1_t src = __riscv_vle32_v_f32m1(
mF32, 4);
577 const vfloat32m1_t mul = __riscv_vfmul_vf_f32m1(src, inV2, 4);
578 __riscv_vse32_v_f32m1(res.
mF32, mul, 4);
588#if defined(JPH_USE_SSE)
589 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.
mValue);
590#elif defined(JPH_USE_NEON)
591 return vmulq_n_f32(inV2.
mValue, inV1);
592#elif defined(JPH_USE_RVV)
594 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
595 const vfloat32m1_t mul = __riscv_vfmul_vf_f32m1(v1, inV1, 4);
596 __riscv_vse32_v_f32m1(res.
mF32, mul, 4);
602 inV1 * inV2.
mF32[3]);
608#if defined(JPH_USE_SSE)
609 return _mm_div_ps(
mValue, _mm_set1_ps(inV2));
610#elif defined(JPH_USE_NEON)
611 return vdivq_f32(
mValue, vdupq_n_f32(inV2));
612#elif defined(JPH_USE_RVV)
614 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
615 const vfloat32m1_t div = __riscv_vfdiv_vf_f32m1(v1, inV2, 4);
616 __riscv_vse32_v_f32m1(res.
mF32, div, 4);
625#if defined(JPH_USE_SSE)
627#elif defined(JPH_USE_NEON)
629#elif defined(JPH_USE_RVV)
630 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
631 const vfloat32m1_t res = __riscv_vfmul_vf_f32m1(v1, inV2, 4);
632 __riscv_vse32_v_f32m1(
mF32, res, 4);
634 for (
int i = 0; i < 4; ++i)
642#if defined(JPH_USE_SSE)
644#elif defined(JPH_USE_NEON)
646#elif defined(JPH_USE_RVV)
647 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
648 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
649 const vfloat32m1_t rvv_res = __riscv_vfmul_vv_f32m1(v1, v2, 4);
650 __riscv_vse32_v_f32m1(
mF32, rvv_res, 4);
652 for (
int i = 0; i < 4; ++i)
660#if defined(JPH_USE_SSE)
662#elif defined(JPH_USE_NEON)
664#elif defined(JPH_USE_RVV)
665 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
666 const vfloat32m1_t res = __riscv_vfdiv_vf_f32m1(v, inV2, 4);
667 __riscv_vse32_v_f32m1(
mF32, res, 4);
669 for (
int i = 0; i < 4; ++i)
677#if defined(JPH_USE_SSE)
679#elif defined(JPH_USE_NEON)
681#elif defined(JPH_USE_RVV)
683 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
684 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
685 const vfloat32m1_t rvv_add = __riscv_vfadd_vv_f32m1(v1, v2, 4);
686 __riscv_vse32_v_f32m1(res.
mF32, rvv_add, 4);
698#if defined(JPH_USE_SSE)
700#elif defined(JPH_USE_NEON)
702#elif defined(JPH_USE_RVV)
703 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
704 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
705 const vfloat32m1_t rvv_add = __riscv_vfadd_vv_f32m1(v1, v2, 4);
706 __riscv_vse32_v_f32m1(
mF32, rvv_add, 4);
708 for (
int i = 0; i < 4; ++i)
716#if defined(JPH_USE_SSE)
717 return _mm_sub_ps(_mm_setzero_ps(),
mValue);
718#elif defined(JPH_USE_NEON)
719 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
720 return vsubq_f32(vdupq_n_f32(0),
mValue);
724#elif defined(JPH_USE_RVV)
725 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
727 const vfloat32m1_t rvv_zero = __riscv_vfmv_v_f_f32m1(0.0f, 4);
728 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
729 const vfloat32m1_t rvv_neg = __riscv_vfsub_vv_f32m1(rvv_zero, v, 4);
730 __riscv_vse32_v_f32m1(res.
mF32, rvv_neg, 4);
734 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
735 const vfloat32m1_t rvv_neg = __riscv_vfsgnjn_vv_f32m1(v, v, 4);
736 __riscv_vse32_v_f32m1(res.
mF32, rvv_neg, 4);
740 #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
750#if defined(JPH_USE_SSE)
752#elif defined(JPH_USE_NEON)
754#elif defined(JPH_USE_RVV)
756 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
757 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
758 const vfloat32m1_t rvv_sub = __riscv_vfsub_vv_f32m1(v1, v2, 4);
759 __riscv_vse32_v_f32m1(res.
mF32, rvv_sub, 4);
771#if defined(JPH_USE_SSE)
773#elif defined(JPH_USE_NEON)
775#elif defined(JPH_USE_RVV)
776 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
777 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
778 const vfloat32m1_t rvv_sub = __riscv_vfsub_vv_f32m1(v1, v2, 4);
779 __riscv_vse32_v_f32m1(
mF32, rvv_sub, 4);
781 for (
int i = 0; i < 4; ++i)
789#if defined(JPH_USE_SSE)
791#elif defined(JPH_USE_NEON)
793#elif defined(JPH_USE_RVV)
795 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
796 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
797 const vfloat32m1_t rvv_div = __riscv_vfdiv_vv_f32m1(v1, v2, 4);
798 __riscv_vse32_v_f32m1(res.
mF32, rvv_div, 4);
810#if defined(JPH_USE_SSE)
811 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(0, 0, 0, 0));
812#elif defined(JPH_USE_NEON)
813 return vdupq_laneq_f32(
mValue, 0);
814#elif defined(JPH_USE_RVV)
816 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(
mF32[0], 4);
817 __riscv_vse32_v_f32m1(vec.
mF32, splat, 4);
826#if defined(JPH_USE_SSE)
827 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(1, 1, 1, 1));
828#elif defined(JPH_USE_NEON)
829 return vdupq_laneq_f32(
mValue, 1);
830#elif defined(JPH_USE_RVV)
832 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(
mF32[1], 4);
833 __riscv_vse32_v_f32m1(vec.
mF32, splat, 4);
842#if defined(JPH_USE_SSE)
843 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(2, 2, 2, 2));
844#elif defined(JPH_USE_NEON)
845 return vdupq_laneq_f32(
mValue, 2);
846#elif defined(JPH_USE_RVV)
848 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(
mF32[2], 4);
849 __riscv_vse32_v_f32m1(vec.
mF32, splat, 4);
858#if defined(JPH_USE_SSE)
859 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(3, 3, 3, 3));
860#elif defined(JPH_USE_NEON)
861 return vdupq_laneq_f32(
mValue, 3);
862#elif defined(JPH_USE_RVV)
864 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(
mF32[3], 4);
865 __riscv_vse32_v_f32m1(vec.
mF32, splat, 4);
874#if defined(JPH_USE_SSE)
875 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(0, 0, 0, 0));
876#elif defined(JPH_USE_NEON)
877 return vdupq_laneq_f32(
mValue, 0);
878#elif defined(JPH_USE_RVV)
880 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(
mF32[0], 3);
881 __riscv_vse32_v_f32m1(vec.
mF32, splat, 3);
890#if defined(JPH_USE_SSE)
891 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(1, 1, 1, 1));
892#elif defined(JPH_USE_NEON)
893 return vdupq_laneq_f32(
mValue, 1);
894#elif defined(JPH_USE_RVV)
896 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(
mF32[1], 3);
897 __riscv_vse32_v_f32m1(vec.
mF32, splat, 3);
906#if defined(JPH_USE_SSE)
907 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(2, 2, 2, 2));
908#elif defined(JPH_USE_NEON)
909 return vdupq_laneq_f32(
mValue, 2);
910#elif defined(JPH_USE_RVV)
912 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(
mF32[2], 3);
913 __riscv_vse32_v_f32m1(vec.
mF32, splat, 3);
922#if defined(JPH_USE_SSE)
923 return _mm_shuffle_ps(
mValue,
mValue, _MM_SHUFFLE(3, 3, 3, 3));
924#elif defined(JPH_USE_NEON)
925 return vdupq_laneq_f32(
mValue, 3);
926#elif defined(JPH_USE_RVV)
928 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(
mF32[3], 3);
929 __riscv_vse32_v_f32m1(vec.
mF32, splat, 3);
958#if defined(JPH_USE_AVX512)
960#elif defined(JPH_USE_SSE)
961 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(),
mValue),
mValue);
962#elif defined(JPH_USE_NEON)
964#elif defined(JPH_USE_RVV)
966 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
967 const vfloat32m1_t rvv_abs = __riscv_vfsgnj_vf_f32m1(v, 1.0, 4);
968 __riscv_vse32_v_f32m1(res.
mF32, rvv_abs, 4);
982#if defined(JPH_USE_SSE4_1)
984#elif defined(JPH_USE_NEON)
986 return vdupq_n_f32(vaddvq_f32(mul));
987#elif defined(JPH_USE_RVV)
989 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
990 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
991 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(v1, v2, 4);
992 float dot = RVVSumElementsFloat32x4(mul);
993 const vfloat32m1_t splat = __riscv_vfmv_v_f_f32m1(dot, 4);
994 __riscv_vse32_v_f32m1(res.
mF32, splat, 4);
1004#if defined(JPH_USE_SSE4_1)
1005 return _mm_cvtss_f32(_mm_dp_ps(
mValue, inV2.
mValue, 0xff));
1006#elif defined(JPH_USE_NEON)
1008 return vaddvq_f32(mul);
1009#elif defined(JPH_USE_RVV)
1010 const vfloat32m1_t v1 = __riscv_vle32_v_f32m1(
mF32, 4);
1011 const vfloat32m1_t v2 = __riscv_vle32_v_f32m1(inV2.
mF32, 4);
1012 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(v1, v2, 4);
1013 return RVVSumElementsFloat32x4(mul);
1022#if defined(JPH_USE_SSE4_1)
1024#elif defined(JPH_USE_NEON)
1026 return vaddvq_f32(mul);
1027#elif defined(JPH_USE_RVV)
1028 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
1029 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(v, v, 4);
1030 return RVVSumElementsFloat32x4(mul);
1039#if defined(JPH_USE_SSE4_1)
1040 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(
mValue,
mValue, 0xff)));
1041#elif defined(JPH_USE_NEON)
1043 float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
1044 return vget_lane_f32(vsqrt_f32(sum), 0);
1045#elif defined(JPH_USE_RVV)
1046 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
1047 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(v, v, 4);
1048 return sqrt(RVVSumElementsFloat32x4(mul));
1057#if defined(JPH_USE_SSE)
1058 return _mm_sqrt_ps(
mValue);
1059#elif defined(JPH_USE_NEON)
1060 return vsqrtq_f32(
mValue);
1061#elif defined(JPH_USE_RVV)
1063 const vfloat32m1_t rvv_v = __riscv_vle32_v_f32m1(
mF32, 4);
1064 const vfloat32m1_t rvv_sqrt = __riscv_vfsqrt_v_f32m1(rvv_v, 4);
1065 __riscv_vse32_v_f32m1(res.
mF32, rvv_sqrt, 4);
1075#if defined(JPH_USE_AVX512)
1076 return _mm_fixupimm_ps(
mValue,
mValue, _mm_set1_epi32(0xA9A90A00), 0);
1077#elif defined(JPH_USE_SSE)
1078 Type minus_one = _mm_set1_ps(-1.0f);
1079 Type one = _mm_set1_ps(1.0f);
1080 return _mm_or_ps(_mm_and_ps(
mValue, minus_one), one);
1081#elif defined(JPH_USE_NEON)
1082 Type minus_one = vdupq_n_f32(-1.0f);
1083 Type one = vdupq_n_f32(1.0f);
1084 return vreinterpretq_f32_u32(vorrq_u32(vandq_u32(vreinterpretq_u32_f32(
mValue), vreinterpretq_u32_f32(minus_one)), vreinterpretq_u32_f32(one)));
1085#elif defined(JPH_USE_RVV)
1087 const vfloat32m1_t rvv_in = __riscv_vle32_v_f32m1(
mF32, 4);
1088 const vfloat32m1_t rvv_one = __riscv_vfmv_v_f_f32m1(1.0, 4);
1089 const vfloat32m1_t rvv_signs = __riscv_vfsgnj_vv_f32m1(rvv_one, rvv_in, 4);
1090 __riscv_vse32_v_f32m1(res.
mF32, rvv_signs, 4);
1093 return Vec4(std::signbit(
mF32[0])? -1.0f : 1.0f,
1094 std::signbit(
mF32[1])? -1.0f : 1.0f,
1095 std::signbit(
mF32[2])? -1.0f : 1.0f,
1096 std::signbit(
mF32[3])? -1.0f : 1.0f);
1100template <
int X,
int Y,
int Z,
int W>
1103 static_assert(X == 1 || X == -1,
"X must be 1 or -1");
1104 static_assert(Y == 1 || Y == -1,
"Y must be 1 or -1");
1105 static_assert(Z == 1 || Z == -1,
"Z must be 1 or -1");
1106 static_assert(W == 1 || W == -1,
"W must be 1 or -1");
1107 return Vec4::sXor(*
this,
Vec4(X > 0? 0.0f : -0.0f, Y > 0? 0.0f : -0.0f, Z > 0? 0.0f : -0.0f, W > 0? 0.0f : -0.0f));
1112#if defined(JPH_USE_SSE4_1)
1114#elif defined(JPH_USE_NEON)
1116 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
1117 return vdivq_f32(
mValue, vsqrtq_f32(sum));
1118#elif defined(JPH_USE_RVV)
1119 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
1120 const vfloat32m1_t mul = __riscv_vfmul_vv_f32m1(v, v, 4);
1121 const float length = sqrt(RVVSumElementsFloat32x4(mul));
1122 const vfloat32m1_t norm_v = __riscv_vfdiv_vf_f32m1(v, length, 4);
1125 __riscv_vse32_v_f32m1(vec.
mF32, norm_v, 4);
1134#if defined(JPH_USE_SSE)
1135 _mm_storeu_ps(&outV->
x,
mValue);
1136#elif defined(JPH_USE_NEON)
1138#elif defined(JPH_USE_RVV)
1139 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
1140 __riscv_vse32_v_f32m1(&outV->
x, v, 4);
1142 for (
int i = 0; i < 4; ++i)
1143 (&outV->
x)[i] =
mF32[i];
1149#if defined(JPH_USE_SSE)
1150 return _mm_cvttps_epi32(
mValue);
1151#elif defined(JPH_USE_NEON)
1152 return vcvtq_u32_f32(
mValue);
1153#elif defined(JPH_USE_RVV)
1155 const vfloat32m1_t v = __riscv_vle32_v_f32m1(
mF32, 4);
1156 const vuint32m1_t cast = __riscv_vfcvt_rtz_xu_f_v_u32m1(v, 4);
1157 __riscv_vse32_v_u32m1(res.
mU32, cast, 4);
1166#if defined(JPH_USE_SSE)
1168#elif defined(JPH_USE_NEON)
1169 return vreinterpretq_u32_f32(
mValue);
1171 return *
reinterpret_cast<const UVec4 *
>(
this);
1177#if defined(JPH_USE_SSE)
1178 return _mm_movemask_ps(
mValue);
1179#elif defined(JPH_USE_NEON)
1180 int32x4_t shift = JPH_NEON_INT32x4(0, 1, 2, 3);
1181 return vaddvq_u32(vshlq_u32(vshrq_n_u32(vreinterpretq_u32_f32(
mValue), 31), shift));
1182#elif defined(JPH_USE_RVV)
1183 const vuint32m1_t v = __riscv_vle32_v_u32m1(
reinterpret_cast<const uint32 *
>(
mF32), 4);
1184 const vuint32m1_t shifted = __riscv_vsrl_vx_u32m1(v, 31, 4);
1185 const vbool32_t mask = __riscv_vmsne_vx_u32m1_b32(shifted, 0x0, 4);
1186 const vuint32m1_t as_int = __riscv_vreinterpret_v_b32_u32m1(mask);
1187 const uint32 result = __riscv_vmv_x_s_u32m1_u32(as_int) & 0xF;
1190 return (std::signbit(
mF32[0])? 1 : 0) | (std::signbit(
mF32[1])? 2 : 0) | (std::signbit(
mF32[2])? 4 : 0) | (std::signbit(
mF32[3])? 8 : 0);
1231 x = ((x - float_quadrant * 1.5703125f) - float_quadrant * 0.0004837512969970703125f) - float_quadrant * 7.549789948768648e-8f;
1284 x = ((x - float_quadrant * 1.5703125f) - float_quadrant * 0.0004837512969970703125f) - float_quadrant * 7.549789948768648e-8f;
1394 Vec4 atan = (numerator / denominator).
ATan();
1413 constexpr float cOneOverSqrt2 = 0.70710678f;
1414 constexpr uint cNumBits = 9;
1415 constexpr uint cMask = (1 << cNumBits) - 1;
1416 constexpr uint cMaxValue = cMask - 1;
1417 constexpr float cScale = float(cMaxValue) / (2.0f * cOneOverSqrt2);
1423 if (v[max_element] < 0.0f)
1425 value = 0x80000000u;
1430 value |= max_element << 29;
1434 switch (max_element)
1449 value |= compressed.
GetX();
1450 value |= compressed.
GetY() << cNumBits;
1451 value |= compressed.
GetZ() << 2 * cNumBits;
1457 constexpr float cOneOverSqrt2 = 0.70710678f;
1458 constexpr uint cNumBits = 9;
1459 constexpr uint cMask = (1u << cNumBits) - 1;
1460 constexpr uint cMaxValue = cMask - 1;
1461 constexpr float cScale = 2.0f * cOneOverSqrt2 / float(cMaxValue);
1464 Vec4 v =
Vec4(
UVec4(inValue & cMask, (inValue >> cNumBits) & cMask, (inValue >> (2 * cNumBits)) & cMask, 0).ToFloat()) * cScale -
Vec4(cOneOverSqrt2, cOneOverSqrt2, cOneOverSqrt2, 0.0f);
1471 if ((inValue & 0x80000000u) != 0)
1475 switch ((inValue >> 29) & 3)
std::uint8_t uint8
Definition Core.h:506
std::uint64_t uint64
Definition Core.h:510
unsigned int uint
Definition Core.h:505
#define JPH_NAMESPACE_END
Definition Core.h:428
std::uint32_t uint32
Definition Core.h:508
#define JPH_IF_FLOATING_POINT_EXCEPTIONS_ENABLED(...)
Definition Core.h:580
#define JPH_NAMESPACE_BEGIN
Definition Core.h:422
#define xy
Definition HLSLToCPP.h:511
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
uint CountTrailingZeros(uint32 inValue)
Compute number of trailing zero bits (how many low bits are zero)
Definition Math.h:98
JPH_INLINE To BitCast(const From &inValue)
Definition Math.h:192
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_W
Use the W component.
Definition Swizzle.h:15
@ SWIZZLE_X
Use the X component.
Definition Swizzle.h:12
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec4 operator*(float inV1, Vec4Arg inV2)
Multiply vector with float.
Definition Vec4.inl:586
Class that holds 4 float values. Convert to Vec4 to perform calculations.
Definition Float4.h:11
float x
Definition Float4.h:36
float y
Definition Float4.h:37
float z
Definition Float4.h:38
float w
Definition Float4.h:39
JPH_INLINE UVec4 Swizzle() const
Swizzle the elements in inV.
JPH_INLINE uint32 GetZ() const
Definition UVec4.h:104
JPH_INLINE UVec4 LogicalShiftLeft() const
Shift all components by Count bits to the left (filling with zeros from the left)
static JPH_INLINE UVec4 sSelect(UVec4Arg inNotSet, UVec4Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition UVec4.inl:221
JPH_INLINE uint32 GetY() const
Definition UVec4.h:103
static JPH_INLINE UVec4 sReplicate(uint32 inV)
Replicate int inV across all components.
Definition UVec4.inl:75
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition UVec4.inl:658
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:292
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:250
JPH_INLINE uint32 GetW() const
Definition UVec4.h:105
Type mValue
Definition UVec4.h:223
JPH_INLINE uint32 GetX() const
Get individual components.
Definition UVec4.h:102
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:271
JPH_INLINE UVec4 ArithmeticShiftRight() const
Shift all components by Count bits to the right (shifting in the value of the highest bit)
JPH_INLINE Vec4 ToFloat() const
Convert each component from an int to a float.
Definition UVec4.inl:510
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:527
uint32 mU32[4]
Definition UVec4.h:224
Type mValue
Definition Vec3.h:299
float mF32[4]
Definition Vec3.h:300
JPH_INLINE bool IsNearZero(float inMaxDistSq=1.0e-12f) const
Test if vector is near zero.
Definition Vec4.inl:518
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec4.inl:808
static JPH_INLINE void sSort4(Vec4 &ioValue, UVec4 &ioIndex)
Definition Vec4.inl:460
Vec4 ATan() const
Calculate the arc tangent for each element of this vector (returns value in the range [-PI / 2,...
Definition Vec4.inl:1344
static JPH_INLINE UVec4 sGreater(Vec4Arg inV1, Vec4Arg inV2)
Greater than (component wise)
Definition Vec4.inl:304
float mF32[4]
Definition Vec4.h:312
JPH_INLINE Vec3 SplatW3() const
Replicate the W component to all components.
Definition Vec4.inl:920
JPH_INLINE Vec4 operator-() const
Negate.
Definition Vec4.inl:714
Vec4()=default
Constructor.
static JPH_INLINE Vec4 sAnd(Vec4Arg inV1, Vec4Arg inV2)
Logical and (component wise)
Definition Vec4.inl:442
static JPH_INLINE Vec4 sLoadFloat4Aligned(const Float4 *inV)
Load 4 floats from memory, 16 bytes aligned.
Definition Vec4.inl:139
static Vec4 sATan2(Vec4Arg inY, Vec4Arg inX)
Calculate the arc tangent of y / x using the signs of the arguments to determine the correct quadrant...
Definition Vec4.inl:1378
JPH_INLINE void SetW(float inW)
Definition Vec4.h:129
JPH_INLINE Vec4 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec4.inl:1073
Vec4 ASin() const
Definition Vec4.inl:1303
JPH_INLINE Vec4 FlipSign() const
Flips the signs of the components, e.g. FlipSign<-1, 1, -1, 1>() will flip the signs of the X and Z c...
Definition Vec4.inl:1101
static JPH_INLINE Vec4 sXor(Vec4Arg inV1, Vec4Arg inV2)
Logical xor (component wise)
Definition Vec4.inl:424
JPH_INLINE Vec4 Abs() const
Return the absolute value of each of the components.
Definition Vec4.inl:956
JPH_INLINE Vec4 operator/(float inV2) const
Divide vector by float.
Definition Vec4.inl:606
Vec4 Tan() const
Calculate the tangent for each element of this vector (input in radians)
Definition Vec4.inl:1270
JPH_INLINE float GetW() const
Definition Vec4.h:122
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec4.inl:1147
JPH_INLINE Vec4 & operator+=(Vec4Arg inV2)
Add two float vectors (component wise)
Definition Vec4.inl:696
static JPH_INLINE UVec4 sLessOrEqual(Vec4Arg inV1, Vec4Arg inV2)
Less than or equal (component wise)
Definition Vec4.inl:281
static JPH_INLINE UVec4 sLess(Vec4Arg inV1, Vec4Arg inV2)
Less than (component wise)
Definition Vec4.inl:258
JPH_INLINE int GetLowestComponentIndex() const
Get index of component with lowest value.
Definition Vec4.inl:936
JPH_INLINE float Length() const
Length of vector.
Definition Vec4.inl:1037
static JPH_INLINE void sSort4Reverse(Vec4 &ioValue, UVec4 &ioIndex)
Definition Vec4.inl:484
static JPH_INLINE Vec4 sOne()
Vector with all ones.
Definition Vec4.inl:113
static JPH_INLINE Vec4 sFusedMultiplyAdd(Vec4Arg inMul1, Vec4Arg inMul2, Vec4Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec4.inl:350
JPH_INLINE Vec4 Normalized() const
Normalize vector.
Definition Vec4.inl:1110
static JPH_INLINE UVec4 sEquals(Vec4Arg inV1, Vec4Arg inV2)
Equals (component wise)
Definition Vec4.inl:235
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y, Z and W.
Definition Vec4.inl:1201
JPH_INLINE Vec4 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec4.inl:975
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec4.inl:824
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec4 as a UVec4 (doesn't change the bits)
Definition Vec4.inl:1164
static JPH_INLINE UVec4 sGreaterOrEqual(Vec4Arg inV1, Vec4Arg inV2)
Greater than or equal (component wise)
Definition Vec4.inl:327
static JPH_INLINE Vec4 sMin(Vec4Arg inV1, Vec4Arg inV2)
Return the minimum value of each of the components.
Definition Vec4.inl:188
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec4.inl:840
JPH_INLINE Vec4 Sqrt() const
Component wise square root.
Definition Vec4.inl:1055
JPH_INLINE Vec4 & operator*=(float inV2)
Multiply vector with float.
Definition Vec4.inl:623
static JPH_INLINE Vec4 sGatherFloat4(const float *inBase, UVec4Arg inOffsets)
Gather 4 floats from memory at inBase + inOffsets[i] * Scale.
JPH_INLINE Vec4 operator+(Vec4Arg inV2) const
Add two float vectors (component wise)
Definition Vec4.inl:675
JPH_INLINE Vec4 & operator/=(float inV2)
Divide vector by float.
Definition Vec4.inl:658
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec4.inl:523
JPH_INLINE bool operator==(Vec4Arg inV2) const
Comparison.
Definition Vec4.inl:508
JPH_INLINE Vec4 SplatW() const
Replicate the W component to all components.
Definition Vec4.inl:856
JPH_INLINE Vec4 DotV(Vec4Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition Vec4.inl:980
JPH_INLINE bool IsClose(Vec4Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec4.inl:513
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:119
static JPH_INLINE Vec4 sLoadFloat4(const Float4 *inV)
Load 4 floats from memory.
Definition Vec4.inl:123
static JPH_INLINE Vec4 sZero()
Vector with all zeros.
Definition Vec4.inl:81
JPH_INLINE Vec4 Swizzle() const
Swizzle the elements in inV.
struct { float mData[4];} Type
Definition Vec4.h:24
static JPH_INLINE Vec4 sOr(Vec4Arg inV1, Vec4Arg inV2)
Logical or (component wise)
Definition Vec4.inl:406
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y, Z and W.
Definition Vec4.inl:1194
Type mValue
Definition Vec4.h:311
static JPH_INLINE Vec4 sDecompressUnitVector(uint32 inValue)
Decompress a unit vector from a 32 bit value.
Definition Vec4.inl:1455
JPH_INLINE uint32 CompressUnitVector() const
Compress a unit vector to a 32 bit value, precision is around 0.5 * 10^-3.
Definition Vec4.inl:1411
JPH_INLINE Vec4 & operator-=(Vec4Arg inV2)
Subtract two float vectors (component wise)
Definition Vec4.inl:769
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec4.inl:1020
static JPH_INLINE Vec4 sMax(Vec4Arg inV1, Vec4Arg inV2)
Return the maximum of each of the components.
Definition Vec4.inl:209
JPH_INLINE float Dot(Vec4Arg inV2) const
Dot product.
Definition Vec4.inl:1002
JPH_INLINE Vec3 SplatZ3() const
Replicate the Z component to all components.
Definition Vec4.inl:904
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec4.inl:528
JPH_INLINE Vec3 SplatX3() const
Replicate the X component to all components.
Definition Vec4.inl:872
static JPH_INLINE Vec4 sNaN()
Vector with all NaN's.
Definition Vec4.inl:118
Vec4 ACos() const
Definition Vec4.inl:1338
static JPH_INLINE Vec4 sSelect(Vec4Arg inNotSet, Vec4Arg inSet, UVec4Arg inControl)
Component wise select, returns inNotSet when highest bit of inControl = 0 and inSet when highest bit ...
Definition Vec4.inl:377
JPH_INLINE int GetSignBits() const
Store if X is negative in bit 0, Y in bit 1, Z in bit 2 and W in bit 3.
Definition Vec4.inl:1175
JPH_INLINE int GetHighestComponentIndex() const
Get index of component with highest value.
Definition Vec4.inl:946
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:97
JPH_INLINE Vec3 SplatY3() const
Replicate the Y component to all components.
Definition Vec4.inl:888
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:1208
JPH_INLINE void StoreFloat4(Float4 *outV) const
Store 4 floats to memory.
Definition Vec4.inl:1132
static JPH_INLINE Vec4 sClamp(Vec4Arg inV, Vec4Arg inMin, Vec4Arg inMax)
Clamp a vector between min and max (component wise)
Definition Vec4.inl:230
friend JPH_INLINE Vec4 operator*(float inV1, Vec4Arg inV2)
Multiply vector with float.
Definition Vec4.inl:586