Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Mat44.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
7#include <Jolt/Math/Vec3.h>
8#include <Jolt/Math/Vec4.h>
9#include <Jolt/Math/Quat.h>
10
12
13#define JPH_EL(r, c) mCol[c].mF32[r]
14
15Mat44::Mat44(Vec4Arg inC1, Vec4Arg inC2, Vec4Arg inC3, Vec4Arg inC4) :
16 mCol { inC1, inC2, inC3, inC4 }
17{
18}
19
20Mat44::Mat44(Vec4Arg inC1, Vec4Arg inC2, Vec4Arg inC3, Vec3Arg inC4) :
21 mCol { inC1, inC2, inC3, Vec4(inC4, 1.0f) }
22{
23}
24
25Mat44::Mat44(Type inC1, Type inC2, Type inC3, Type inC4) :
26 mCol { inC1, inC2, inC3, inC4 }
27{
28}
29
34
36{
37 return Mat44(Vec4(1, 0, 0, 0), Vec4(0, 1, 0, 0), Vec4(0, 0, 1, 0), Vec4(0, 0, 0, 1));
38}
39
44
46{
47 Mat44 result;
48 for (int c = 0; c < 4; ++c)
49 result.mCol[c] = Vec4::sLoadFloat4(inV + c);
50 return result;
51}
52
54{
55 Mat44 result;
56 for (int c = 0; c < 4; ++c)
57 result.mCol[c] = Vec4::sLoadFloat4Aligned(inV + c);
58 return result;
59}
60
62{
63 Vec4 sv, cv;
64 Vec4::sReplicate(inX).SinCos(sv, cv);
65 float s = sv.GetX(), c = cv.GetX();
66 return Mat44(Vec4(1, 0, 0, 0), Vec4(0, c, s, 0), Vec4(0, -s, c, 0), Vec4(0, 0, 0, 1));
67}
68
70{
71 Vec4 sv, cv;
72 Vec4::sReplicate(inY).SinCos(sv, cv);
73 float s = sv.GetX(), c = cv.GetX();
74 return Mat44(Vec4(c, 0, -s, 0), Vec4(0, 1, 0, 0), Vec4(s, 0, c, 0), Vec4(0, 0, 0, 1));
75}
76
78{
79 Vec4 sv, cv;
80 Vec4::sReplicate(inZ).SinCos(sv, cv);
81 float s = sv.GetX(), c = cv.GetX();
82 return Mat44(Vec4(c, s, 0, 0), Vec4(-s, c, 0, 0), Vec4(0, 0, 1, 0), Vec4(0, 0, 0, 1));
83}
84
86{
87 JPH_ASSERT(inQuat.IsNormalized());
88
89 // See: https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation section 'Quaternion-derived rotation matrix'
90#ifdef JPH_USE_SSE4_1
91 __m128 xyzw = inQuat.mValue.mValue;
92 __m128 two_xyzw = _mm_add_ps(xyzw, xyzw);
93 __m128 yzxw = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(3, 0, 2, 1));
94 __m128 two_yzxw = _mm_add_ps(yzxw, yzxw);
95 __m128 zxyw = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(3, 1, 0, 2));
96 __m128 two_zxyw = _mm_add_ps(zxyw, zxyw);
97 __m128 wwww = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(3, 3, 3, 3));
98 __m128 diagonal = _mm_sub_ps(_mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(two_yzxw, yzxw)), _mm_mul_ps(two_zxyw, zxyw)); // (1 - 2 y^2 - 2 z^2, 1 - 2 x^2 - 2 z^2, 1 - 2 x^2 - 2 y^2, 1 - 4 w^2)
99 __m128 plus = _mm_add_ps(_mm_mul_ps(two_xyzw, zxyw), _mm_mul_ps(two_yzxw, wwww)); // 2 * (xz + yw, xy + zw, yz + xw, ww)
100 __m128 minus = _mm_sub_ps(_mm_mul_ps(two_yzxw, xyzw), _mm_mul_ps(two_zxyw, wwww)); // 2 * (xy - zw, yz - xw, xz - yw, 0)
101
102 // Workaround for compiler changing _mm_sub_ps(_mm_mul_ps(...), ...) into a fused multiply sub instruction, resulting in w not being 0
103 // There doesn't appear to be a reliable way to turn this off in Clang
104 minus = _mm_insert_ps(minus, minus, 0b1000);
105
106 __m128 col0 = _mm_blend_ps(_mm_blend_ps(plus, diagonal, 0b0001), minus, 0b1100); // (1 - 2 y^2 - 2 z^2, 2 xy + 2 zw, 2 xz - 2 yw, 0)
107 __m128 col1 = _mm_blend_ps(_mm_blend_ps(diagonal, minus, 0b1001), plus, 0b0100); // (2 xy - 2 zw, 1 - 2 x^2 - 2 z^2, 2 yz + 2 xw, 0)
108 __m128 col2 = _mm_blend_ps(_mm_blend_ps(minus, plus, 0b0001), diagonal, 0b0100); // (2 xz + 2 yw, 2 yz - 2 xw, 1 - 2 x^2 - 2 y^2, 0)
109 __m128 col3 = _mm_set_ps(1, 0, 0, 0);
110
111 return Mat44(col0, col1, col2, col3);
112#else
113 float x = inQuat.GetX();
114 float y = inQuat.GetY();
115 float z = inQuat.GetZ();
116 float w = inQuat.GetW();
117
118 float tx = x + x; // Note: Using x + x instead of 2.0f * x to force this function to return the same value as the SSE4.1 version across platforms.
119 float ty = y + y;
120 float tz = z + z;
121
122 float xx = tx * x;
123 float yy = ty * y;
124 float zz = tz * z;
125 float xy = tx * y;
126 float xz = tx * z;
127 float xw = tx * w;
128 float yz = ty * z;
129 float yw = ty * w;
130 float zw = tz * w;
131
132 return Mat44(Vec4((1.0f - yy) - zz, xy + zw, xz - yw, 0.0f), // Note: Added extra brackets to force this function to return the same value as the SSE4.1 version across platforms.
133 Vec4(xy - zw, (1.0f - zz) - xx, yz + xw, 0.0f),
134 Vec4(xz + yw, yz - xw, (1.0f - xx) - yy, 0.0f),
135 Vec4(0.0f, 0.0f, 0.0f, 1.0f));
136#endif
137}
138
139Mat44 Mat44::sRotation(Vec3Arg inAxis, float inAngle)
140{
141 return sRotation(Quat::sRotation(inAxis, inAngle));
142}
143
145{
146 return Mat44(Vec4(1, 0, 0, 0), Vec4(0, 1, 0, 0), Vec4(0, 0, 1, 0), Vec4(inV, 1));
147}
148
150{
151 Mat44 m = sRotation(inR);
152 m.SetTranslation(inT);
153 return m;
154}
155
157{
158 Mat44 m = sRotation(inR.Conjugated());
159 m.SetTranslation(-m.Multiply3x3(inT));
160 return m;
161}
162
163Mat44 Mat44::sScale(float inScale)
164{
165 return Mat44(Vec4(inScale, 0, 0, 0), Vec4(0, inScale, 0, 0), Vec4(0, 0, inScale, 0), Vec4(0, 0, 0, 1));
166}
167
169{
170 return Mat44(Vec4(inV.GetX(), 0, 0, 0), Vec4(0, inV.GetY(), 0, 0), Vec4(0, 0, inV.GetZ(), 0), Vec4(0, 0, 0, 1));
171}
172
174{
175 Vec4 v1(inV1, 0);
176 return Mat44(v1 * inV2.SplatX(), v1 * inV2.SplatY(), v1 * inV2.SplatZ(), Vec4(0, 0, 0, 1));
177}
178
180{
181#ifdef JPH_USE_SSE4_1
182 // Zero out the W component
183 __m128 zero = _mm_setzero_ps();
184 __m128 v = _mm_blend_ps(inV.mValue, zero, 0b1000);
185
186 // Negate
187 __m128 min_v = _mm_sub_ps(zero, v);
188
189 return Mat44(
190 _mm_shuffle_ps(v, min_v, _MM_SHUFFLE(3, 1, 2, 3)), // [0, z, -y, 0]
191 _mm_shuffle_ps(min_v, v, _MM_SHUFFLE(3, 0, 3, 2)), // [-z, 0, x, 0]
192 _mm_blend_ps(_mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 1)), _mm_shuffle_ps(min_v, min_v, _MM_SHUFFLE(3, 3, 0, 3)), 0b0010), // [y, -x, 0, 0]
193 Vec4(0, 0, 0, 1));
194#else
195 float x = inV.GetX();
196 float y = inV.GetY();
197 float z = inV.GetZ();
198
199 return Mat44(
200 Vec4(0, z, -y, 0),
201 Vec4(-z, 0, x, 0),
202 Vec4(y, -x, 0, 0),
203 Vec4(0, 0, 0, 1));
204#endif
205}
206
208{
209 Vec3 direction = (inTarget - inPos).NormalizedOr(-Vec3::sAxisZ());
210 Vec3 right = direction.Cross(inUp).NormalizedOr(Vec3::sAxisX());
211 Vec3 up = right.Cross(direction);
212
213 return Mat44(Vec4(right, 0), Vec4(up, 0), Vec4(-direction, 0), Vec4(inPos, 1)).InversedRotationTranslation();
214}
215
216Mat44 Mat44::sPerspective(float inFovY, float inAspect, float inNear, float inFar)
217{
218 float height = 1.0f / Tan(0.5f * inFovY);
219 float width = height / inAspect;
220 float range = inFar / (inNear - inFar);
221
222 return Mat44(Vec4(width, 0.0f, 0.0f, 0.0f), Vec4(0.0f, height, 0.0f, 0.0f), Vec4(0.0f, 0.0f, range, -1.0f), Vec4(0.0f, 0.0f, range * inNear, 0.0f));
223}
224
226{
227 return UVec4::sAnd(
228 UVec4::sAnd(Vec4::sEquals(mCol[0], inM2.mCol[0]), Vec4::sEquals(mCol[1], inM2.mCol[1])),
229 UVec4::sAnd(Vec4::sEquals(mCol[2], inM2.mCol[2]), Vec4::sEquals(mCol[3], inM2.mCol[3]))
230 ).TestAllTrue();
231}
232
233bool Mat44::IsClose(Mat44Arg inM2, float inMaxDistSq) const
234{
235 for (int i = 0; i < 4; ++i)
236 if (!mCol[i].IsClose(inM2.mCol[i], inMaxDistSq))
237 return false;
238 return true;
239}
240
242{
243 Mat44 result;
244#if defined(JPH_USE_SSE)
245 for (int i = 0; i < 4; ++i)
246 {
247 __m128 c = inM.mCol[i].mValue;
248 __m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)));
249 t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1))));
250 t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2))));
251 t = _mm_add_ps(t, _mm_mul_ps(mCol[3].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3))));
252 result.mCol[i].mValue = t;
253 }
254#elif defined(JPH_USE_NEON)
255 for (int i = 0; i < 4; ++i)
256 {
257 Type c = inM.mCol[i].mValue;
258 Type t = vmulq_f32(mCol[0].mValue, vdupq_laneq_f32(c, 0));
259 t = vmlaq_f32(t, mCol[1].mValue, vdupq_laneq_f32(c, 1));
260 t = vmlaq_f32(t, mCol[2].mValue, vdupq_laneq_f32(c, 2));
261 t = vmlaq_f32(t, mCol[3].mValue, vdupq_laneq_f32(c, 3));
262 result.mCol[i].mValue = t;
263 }
264#elif defined(JPH_USE_RVV)
265 for (int i = 0; i < 4; ++i)
266 {
267 const float *c = inM.mCol[i].mF32;
268 const vfloat32m1_t rep_0 = __riscv_vfmv_v_f_f32m1(c[0], 4);
269 const vfloat32m1_t rep_1 = __riscv_vfmv_v_f_f32m1(c[1], 4);
270 const vfloat32m1_t rep_2 = __riscv_vfmv_v_f_f32m1(c[2], 4);
271 const vfloat32m1_t rep_3 = __riscv_vfmv_v_f_f32m1(c[3], 4);
272
273 const vfloat32m1_t col0 = __riscv_vle32_v_f32m1(mCol[0].mF32, 4);
274 const vfloat32m1_t col1 = __riscv_vle32_v_f32m1(mCol[1].mF32, 4);
275 const vfloat32m1_t col2 = __riscv_vle32_v_f32m1(mCol[2].mF32, 4);
276 const vfloat32m1_t col3 = __riscv_vle32_v_f32m1(mCol[3].mF32, 4);
277
278 const vfloat32m1_t mul1 = __riscv_vfmul_vv_f32m1(col1, rep_1, 4);
279 const vfloat32m1_t mul2 = __riscv_vfmul_vv_f32m1(col2, rep_2, 4);
280 const vfloat32m1_t mul3 = __riscv_vfmul_vv_f32m1(col3, rep_3, 4);
281
282 vfloat32m1_t t = __riscv_vfmul_vv_f32m1(col0, rep_0, 4);
283 t = __riscv_vfadd_vv_f32m1(t, mul1, 4);
284 t = __riscv_vfadd_vv_f32m1(t, mul2, 4);
285 t = __riscv_vfadd_vv_f32m1(t, mul3, 4);
286 __riscv_vse32_v_f32m1(result.mCol[i].mF32, t, 4);
287 }
288#else
289 for (int i = 0; i < 4; ++i)
290 result.mCol[i] = mCol[0] * inM.mCol[i].mF32[0] + mCol[1] * inM.mCol[i].mF32[1] + mCol[2] * inM.mCol[i].mF32[2] + mCol[3] * inM.mCol[i].mF32[3];
291#endif
292 return result;
293}
294
296{
297#if defined(JPH_USE_SSE)
298 __m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(0, 0, 0, 0)));
299 t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(1, 1, 1, 1))));
300 t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
301 t = _mm_add_ps(t, mCol[3].mValue);
302 return Vec3::sFixW(t);
303#elif defined(JPH_USE_NEON)
304 Type t = vmulq_f32(mCol[0].mValue, vdupq_laneq_f32(inV.mValue, 0));
305 t = vmlaq_f32(t, mCol[1].mValue, vdupq_laneq_f32(inV.mValue, 1));
306 t = vmlaq_f32(t, mCol[2].mValue, vdupq_laneq_f32(inV.mValue, 2));
307 t = vaddq_f32(t, mCol[3].mValue); // Don't combine this with the first mul into a fused multiply add, causes precision issues
308 return Vec3::sFixW(t);
309#elif defined(JPH_USE_RVV)
310 const vfloat32m1_t v0 = __riscv_vfmv_v_f_f32m1(inV.mF32[0], 4);
311 const vfloat32m1_t v1 = __riscv_vfmv_v_f_f32m1(inV.mF32[1], 4);
312 const vfloat32m1_t v2 = __riscv_vfmv_v_f_f32m1(inV.mF32[2], 4);
313
314 const vfloat32m1_t col0 = __riscv_vle32_v_f32m1(mCol[0].mF32, 4);
315 const vfloat32m1_t col1 = __riscv_vle32_v_f32m1(mCol[1].mF32, 4);
316 const vfloat32m1_t col2 = __riscv_vle32_v_f32m1(mCol[2].mF32, 4);
317 const vfloat32m1_t col3 = __riscv_vle32_v_f32m1(mCol[3].mF32, 4);
318
319 const vfloat32m1_t mul1 = __riscv_vfmul_vv_f32m1(col1, v1, 4);
320 const vfloat32m1_t mul2 = __riscv_vfmul_vv_f32m1(col2, v2, 4);
321
322 vfloat32m1_t t = __riscv_vfmul_vv_f32m1(col0, v0, 4);
323 t = __riscv_vfadd_vv_f32m1(t, mul1, 4);
324 t = __riscv_vfadd_vv_f32m1(t, mul2, 4);
325 t = __riscv_vfadd_vv_f32m1(t, col3, 4);
326
327 Type v;
328 __riscv_vse32_v_f32m1(v.mData, t, 4);
329 return Vec3::sFixW(v);
330#else
331 return Vec3(
332 mCol[0].mF32[0] * inV.mF32[0] + mCol[1].mF32[0] * inV.mF32[1] + mCol[2].mF32[0] * inV.mF32[2] + mCol[3].mF32[0],
333 mCol[0].mF32[1] * inV.mF32[0] + mCol[1].mF32[1] * inV.mF32[1] + mCol[2].mF32[1] * inV.mF32[2] + mCol[3].mF32[1],
334 mCol[0].mF32[2] * inV.mF32[0] + mCol[1].mF32[2] * inV.mF32[1] + mCol[2].mF32[2] * inV.mF32[2] + mCol[3].mF32[2]);
335#endif
336}
337
339{
340#if defined(JPH_USE_SSE)
341 __m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(0, 0, 0, 0)));
342 t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(1, 1, 1, 1))));
343 t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
344 t = _mm_add_ps(t, _mm_mul_ps(mCol[3].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(3, 3, 3, 3))));
345 return t;
346#elif defined(JPH_USE_NEON)
347 Type t = vmulq_f32(mCol[0].mValue, vdupq_laneq_f32(inV.mValue, 0));
348 t = vmlaq_f32(t, mCol[1].mValue, vdupq_laneq_f32(inV.mValue, 1));
349 t = vmlaq_f32(t, mCol[2].mValue, vdupq_laneq_f32(inV.mValue, 2));
350 t = vmlaq_f32(t, mCol[3].mValue, vdupq_laneq_f32(inV.mValue, 3));
351 return t;
352#elif defined(JPH_USE_RVV)
353 const vfloat32m1_t v0 = __riscv_vfmv_v_f_f32m1(inV.mF32[0], 4);
354 const vfloat32m1_t v1 = __riscv_vfmv_v_f_f32m1(inV.mF32[1], 4);
355 const vfloat32m1_t v2 = __riscv_vfmv_v_f_f32m1(inV.mF32[2], 4);
356 const vfloat32m1_t v3 = __riscv_vfmv_v_f_f32m1(inV.mF32[3], 4);
357
358 const vfloat32m1_t col0 = __riscv_vle32_v_f32m1(mCol[0].mF32, 4);
359 const vfloat32m1_t col1 = __riscv_vle32_v_f32m1(mCol[1].mF32, 4);
360 const vfloat32m1_t col2 = __riscv_vle32_v_f32m1(mCol[2].mF32, 4);
361 const vfloat32m1_t col3 = __riscv_vle32_v_f32m1(mCol[3].mF32, 4);
362
363 const vfloat32m1_t mul1 = __riscv_vfmul_vv_f32m1(col1, v1, 4);
364 const vfloat32m1_t mul2 = __riscv_vfmul_vv_f32m1(col2, v2, 4);
365 const vfloat32m1_t mul3 = __riscv_vfmul_vv_f32m1(col3, v3, 4);
366
367 vfloat32m1_t t = __riscv_vfmul_vv_f32m1(col0, v0, 4);
368 t = __riscv_vfadd_vv_f32m1(t, mul1, 4);
369 t = __riscv_vfadd_vv_f32m1(t, mul2, 4);
370 t = __riscv_vfadd_vv_f32m1(t, mul3, 4);
371
372 Vec4 v;
373 __riscv_vse32_v_f32m1(v.mF32, t, 4);
374 return v;
375#else
376 return Vec4(
377 mCol[0].mF32[0] * inV.mF32[0] + mCol[1].mF32[0] * inV.mF32[1] + mCol[2].mF32[0] * inV.mF32[2] + mCol[3].mF32[0] * inV.mF32[3],
378 mCol[0].mF32[1] * inV.mF32[0] + mCol[1].mF32[1] * inV.mF32[1] + mCol[2].mF32[1] * inV.mF32[2] + mCol[3].mF32[1] * inV.mF32[3],
379 mCol[0].mF32[2] * inV.mF32[0] + mCol[1].mF32[2] * inV.mF32[1] + mCol[2].mF32[2] * inV.mF32[2] + mCol[3].mF32[2] * inV.mF32[3],
380 mCol[0].mF32[3] * inV.mF32[0] + mCol[1].mF32[3] * inV.mF32[1] + mCol[2].mF32[3] * inV.mF32[2] + mCol[3].mF32[3] * inV.mF32[3]);
381#endif
382}
383
385{
386#if defined(JPH_USE_SSE)
387 __m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(0, 0, 0, 0)));
388 t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(1, 1, 1, 1))));
389 t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
390 return Vec3::sFixW(t);
391#elif defined(JPH_USE_NEON)
392 Type t = vmulq_f32(mCol[0].mValue, vdupq_laneq_f32(inV.mValue, 0));
393 t = vmlaq_f32(t, mCol[1].mValue, vdupq_laneq_f32(inV.mValue, 1));
394 t = vmlaq_f32(t, mCol[2].mValue, vdupq_laneq_f32(inV.mValue, 2));
395 return Vec3::sFixW(t);
396#elif defined(JPH_USE_RVV)
397 const vfloat32m1_t v0 = __riscv_vfmv_v_f_f32m1(inV.mF32[0], 4);
398 const vfloat32m1_t v1 = __riscv_vfmv_v_f_f32m1(inV.mF32[1], 4);
399 const vfloat32m1_t v2 = __riscv_vfmv_v_f_f32m1(inV.mF32[2], 4);
400
401 const vfloat32m1_t col0 = __riscv_vle32_v_f32m1(mCol[0].mF32, 4);
402 const vfloat32m1_t col1 = __riscv_vle32_v_f32m1(mCol[1].mF32, 4);
403 const vfloat32m1_t col2 = __riscv_vle32_v_f32m1(mCol[2].mF32, 4);
404
405 const vfloat32m1_t mul1 = __riscv_vfmul_vv_f32m1(v1, col1, 4);
406 const vfloat32m1_t mul2 = __riscv_vfmul_vv_f32m1(v2, col2, 4);
407
408 vfloat32m1_t t = __riscv_vfmul_vv_f32m1(v0, col0, 4);
409 t = __riscv_vfadd_vv_f32m1(t, mul1, 4);
410 t = __riscv_vfadd_vv_f32m1(t, mul2, 4);
411
412 Type v;
413 __riscv_vse32_v_f32m1(v.mData, t, 4);
414 return Vec3::sFixW(v);
415#else
416 return Vec3(
417 mCol[0].mF32[0] * inV.mF32[0] + mCol[1].mF32[0] * inV.mF32[1] + mCol[2].mF32[0] * inV.mF32[2],
418 mCol[0].mF32[1] * inV.mF32[0] + mCol[1].mF32[1] * inV.mF32[1] + mCol[2].mF32[1] * inV.mF32[2],
419 mCol[0].mF32[2] * inV.mF32[0] + mCol[1].mF32[2] * inV.mF32[1] + mCol[2].mF32[2] * inV.mF32[2]);
420#endif
421}
422
424{
425#if defined(JPH_USE_SSE4_1)
426 __m128 x = _mm_dp_ps(mCol[0].mValue, inV.mValue, 0x7f);
427 __m128 y = _mm_dp_ps(mCol[1].mValue, inV.mValue, 0x7f);
428 __m128 xy = _mm_blend_ps(x, y, 0b0010);
429 __m128 z = _mm_dp_ps(mCol[2].mValue, inV.mValue, 0x7f);
430 __m128 xyzz = _mm_blend_ps(xy, z, 0b1100);
431 return xyzz;
432#else
433 return Transposed3x3().Multiply3x3(inV);
434#endif
435}
436
438{
439 JPH_ASSERT(mCol[0][3] == 0.0f);
440 JPH_ASSERT(mCol[1][3] == 0.0f);
441 JPH_ASSERT(mCol[2][3] == 0.0f);
442
443 Mat44 result;
444#if defined(JPH_USE_SSE)
445 for (int i = 0; i < 3; ++i)
446 {
447 __m128 c = inM.mCol[i].mValue;
448 __m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)));
449 t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1))));
450 t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2))));
451 result.mCol[i].mValue = t;
452 }
453#elif defined(JPH_USE_NEON)
454 for (int i = 0; i < 3; ++i)
455 {
456 Type c = inM.mCol[i].mValue;
457 Type t = vmulq_f32(mCol[0].mValue, vdupq_laneq_f32(c, 0));
458 t = vmlaq_f32(t, mCol[1].mValue, vdupq_laneq_f32(c, 1));
459 t = vmlaq_f32(t, mCol[2].mValue, vdupq_laneq_f32(c, 2));
460 result.mCol[i].mValue = t;
461 }
462#elif defined(JPH_USE_RVV)
463 for (int i = 0; i < 3; ++i)
464 {
465 const float* col_i = inM.mCol[i].mF32;
466 const vfloat32m1_t v0 = __riscv_vfmv_v_f_f32m1(col_i[0], 4);
467 const vfloat32m1_t v1 = __riscv_vfmv_v_f_f32m1(col_i[1], 4);
468 const vfloat32m1_t v2 = __riscv_vfmv_v_f_f32m1(col_i[2], 4);
469
470 const vfloat32m1_t col0 = __riscv_vle32_v_f32m1(mCol[0].mF32, 4);
471 const vfloat32m1_t col1 = __riscv_vle32_v_f32m1(mCol[1].mF32, 4);
472 const vfloat32m1_t col2 = __riscv_vle32_v_f32m1(mCol[2].mF32, 4);
473
474 const vfloat32m1_t mul1 = __riscv_vfmul_vv_f32m1(v1, col1, 4);
475 const vfloat32m1_t mul2 = __riscv_vfmul_vv_f32m1(v2, col2, 4);
476
477 vfloat32m1_t t = __riscv_vfmul_vv_f32m1(v0, col0, 4);
478 t = __riscv_vfadd_vv_f32m1(t, mul1, 4);
479 t = __riscv_vfadd_vv_f32m1(t, mul2, 4);
480 __riscv_vse32_v_f32m1(result.mCol[i].mF32, t, 4);
481 }
482#else
483 for (int i = 0; i < 3; ++i)
484 result.mCol[i] = mCol[0] * inM.mCol[i].mF32[0] + mCol[1] * inM.mCol[i].mF32[1] + mCol[2] * inM.mCol[i].mF32[2];
485#endif
486 result.mCol[3] = Vec4(0, 0, 0, 1);
487 return result;
488}
489
491{
492 // Transpose left hand side
493 Mat44 trans = Transposed3x3();
494
495 // Do 3x3 matrix multiply
496 Mat44 result;
497 result.mCol[0] = trans.mCol[0] * inM.mCol[0].SplatX() + trans.mCol[1] * inM.mCol[0].SplatY() + trans.mCol[2] * inM.mCol[0].SplatZ();
498 result.mCol[1] = trans.mCol[0] * inM.mCol[1].SplatX() + trans.mCol[1] * inM.mCol[1].SplatY() + trans.mCol[2] * inM.mCol[1].SplatZ();
499 result.mCol[2] = trans.mCol[0] * inM.mCol[2].SplatX() + trans.mCol[1] * inM.mCol[2].SplatY() + trans.mCol[2] * inM.mCol[2].SplatZ();
500 result.mCol[3] = Vec4(0, 0, 0, 1);
501 return result;
502}
503
505{
506 JPH_ASSERT(mCol[0][3] == 0.0f);
507 JPH_ASSERT(mCol[1][3] == 0.0f);
508 JPH_ASSERT(mCol[2][3] == 0.0f);
509
510 Mat44 result;
511 result.mCol[0] = mCol[0] * inM.mCol[0].SplatX() + mCol[1] * inM.mCol[1].SplatX() + mCol[2] * inM.mCol[2].SplatX();
512 result.mCol[1] = mCol[0] * inM.mCol[0].SplatY() + mCol[1] * inM.mCol[1].SplatY() + mCol[2] * inM.mCol[2].SplatY();
513 result.mCol[2] = mCol[0] * inM.mCol[0].SplatZ() + mCol[1] * inM.mCol[1].SplatZ() + mCol[2] * inM.mCol[2].SplatZ();
514 result.mCol[3] = Vec4(0, 0, 0, 1);
515 return result;
516}
517
518Mat44 Mat44::operator * (float inV) const
519{
520 Vec4 multiplier = Vec4::sReplicate(inV);
521
522 Mat44 result;
523 for (int c = 0; c < 4; ++c)
524 result.mCol[c] = mCol[c] * multiplier;
525 return result;
526}
527
529{
530 for (int c = 0; c < 4; ++c)
531 mCol[c] *= inV;
532
533 return *this;
534}
535
537{
538 Mat44 result;
539 for (int i = 0; i < 4; ++i)
540 result.mCol[i] = mCol[i] + inM.mCol[i];
541 return result;
542}
543
545{
546 Mat44 result;
547 for (int i = 0; i < 4; ++i)
548 result.mCol[i] = -mCol[i];
549 return result;
550}
551
553{
554 Mat44 result;
555 for (int i = 0; i < 4; ++i)
556 result.mCol[i] = mCol[i] - inM.mCol[i];
557 return result;
558}
559
561{
562 for (int c = 0; c < 4; ++c)
563 mCol[c] += inM.mCol[c];
564
565 return *this;
566}
567
569{
570 for (int c = 0; c < 4; ++c)
571 mCol[c].StoreFloat4(outV + c);
572}
573
575{
576#if defined(JPH_USE_SSE)
577 __m128 tmp1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(1, 0, 1, 0));
578 __m128 tmp3 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(3, 2, 3, 2));
579 __m128 tmp2 = _mm_shuffle_ps(mCol[2].mValue, mCol[3].mValue, _MM_SHUFFLE(1, 0, 1, 0));
580 __m128 tmp4 = _mm_shuffle_ps(mCol[2].mValue, mCol[3].mValue, _MM_SHUFFLE(3, 2, 3, 2));
581
582 Mat44 result;
583 result.mCol[0].mValue = _mm_shuffle_ps(tmp1, tmp2, _MM_SHUFFLE(2, 0, 2, 0));
584 result.mCol[1].mValue = _mm_shuffle_ps(tmp1, tmp2, _MM_SHUFFLE(3, 1, 3, 1));
585 result.mCol[2].mValue = _mm_shuffle_ps(tmp3, tmp4, _MM_SHUFFLE(2, 0, 2, 0));
586 result.mCol[3].mValue = _mm_shuffle_ps(tmp3, tmp4, _MM_SHUFFLE(3, 1, 3, 1));
587 return result;
588#elif defined(JPH_USE_NEON)
589 float32x4x2_t tmp1 = vzipq_f32(mCol[0].mValue, mCol[2].mValue);
590 float32x4x2_t tmp2 = vzipq_f32(mCol[1].mValue, mCol[3].mValue);
591 float32x4x2_t tmp3 = vzipq_f32(tmp1.val[0], tmp2.val[0]);
592 float32x4x2_t tmp4 = vzipq_f32(tmp1.val[1], tmp2.val[1]);
593
594 Mat44 result;
595 result.mCol[0].mValue = tmp3.val[0];
596 result.mCol[1].mValue = tmp3.val[1];
597 result.mCol[2].mValue = tmp4.val[0];
598 result.mCol[3].mValue = tmp4.val[1];
599 return result;
600#elif defined(JPH_USE_RVV)
601 const vfloat32m1_t row0 = __riscv_vlse32_v_f32m1(&mCol[0].mF32[0], sizeof(Vec4), 4);
602 const vfloat32m1_t row1 = __riscv_vlse32_v_f32m1(&mCol[0].mF32[1], sizeof(Vec4), 4);
603 const vfloat32m1_t row2 = __riscv_vlse32_v_f32m1(&mCol[0].mF32[2], sizeof(Vec4), 4);
604 const vfloat32m1_t row3 = __riscv_vlse32_v_f32m1(&mCol[0].mF32[3], sizeof(Vec4), 4);
605
606 Mat44 result;
607 __riscv_vse32_v_f32m1(result.mCol[0].mF32, row0, 4);
608 __riscv_vse32_v_f32m1(result.mCol[1].mF32, row1, 4);
609 __riscv_vse32_v_f32m1(result.mCol[2].mF32, row2, 4);
610 __riscv_vse32_v_f32m1(result.mCol[3].mF32, row3, 4);
611 return result;
612#else
613 Mat44 result;
614 for (int c = 0; c < 4; ++c)
615 for (int r = 0; r < 4; ++r)
616 result.mCol[r].mF32[c] = mCol[c].mF32[r];
617 return result;
618#endif
619}
620
622{
623#if defined(JPH_USE_SSE)
624 __m128 zero = _mm_setzero_ps();
625 __m128 tmp1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(1, 0, 1, 0));
626 __m128 tmp3 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(3, 2, 3, 2));
627 __m128 tmp2 = _mm_shuffle_ps(mCol[2].mValue, zero, _MM_SHUFFLE(1, 0, 1, 0));
628 __m128 tmp4 = _mm_shuffle_ps(mCol[2].mValue, zero, _MM_SHUFFLE(3, 2, 3, 2));
629
630 Mat44 result;
631 result.mCol[0].mValue = _mm_shuffle_ps(tmp1, tmp2, _MM_SHUFFLE(2, 0, 2, 0));
632 result.mCol[1].mValue = _mm_shuffle_ps(tmp1, tmp2, _MM_SHUFFLE(3, 1, 3, 1));
633 result.mCol[2].mValue = _mm_shuffle_ps(tmp3, tmp4, _MM_SHUFFLE(2, 0, 2, 0));
634#elif defined(JPH_USE_NEON)
635 float32x4x2_t tmp1 = vzipq_f32(mCol[0].mValue, mCol[2].mValue);
636 float32x4x2_t tmp2 = vzipq_f32(mCol[1].mValue, vdupq_n_f32(0));
637 float32x4x2_t tmp3 = vzipq_f32(tmp1.val[0], tmp2.val[0]);
638 float32x4x2_t tmp4 = vzipq_f32(tmp1.val[1], tmp2.val[1]);
639
640 Mat44 result;
641 result.mCol[0].mValue = tmp3.val[0];
642 result.mCol[1].mValue = tmp3.val[1];
643 result.mCol[2].mValue = tmp4.val[0];
644#elif defined(JPH_USE_RVV)
645 const float end_col[4] = { 0, 0, 0, 1 };
646 const vfloat32m1_t rvv_end_col = __riscv_vle32_v_f32m1(end_col, 4);
647 const vfloat32m1_t rvv_end_row = __riscv_vfmv_v_f_f32m1(0.0f, 3);
648
649 const vfloat32m1_t row0 = __riscv_vlse32_v_f32m1(&mCol[0].mF32[0], sizeof(Vec4), 3);
650 const vfloat32m1_t row1 = __riscv_vlse32_v_f32m1(&mCol[0].mF32[1], sizeof(Vec4), 3);
651 const vfloat32m1_t row2 = __riscv_vlse32_v_f32m1(&mCol[0].mF32[2], sizeof(Vec4), 3);
652
653 Mat44 result;
654 __riscv_vse32_v_f32m1(result.mCol[0].mF32, row0, 3);
655 __riscv_vse32_v_f32m1(result.mCol[1].mF32, row1, 3);
656 __riscv_vse32_v_f32m1(result.mCol[2].mF32, row2, 3);
657 __riscv_vse32_v_f32m1(result.mCol[3].mF32, rvv_end_col, 4);
658 __riscv_vsse32_v_f32m1(&result.mCol[0].mF32[3], sizeof(Vec4), rvv_end_row, 3);
659 return result;
660#else
661 Mat44 result;
662 for (int c = 0; c < 3; ++c)
663 {
664 for (int r = 0; r < 3; ++r)
665 result.mCol[c].mF32[r] = mCol[r].mF32[c];
666 result.mCol[c].mF32[3] = 0;
667 }
668#endif
669 result.mCol[3] = Vec4(0, 0, 0, 1);
670 return result;
671}
672
674{
675#if defined(JPH_USE_SSE)
676 // Algorithm from: http://download.intel.com/design/PentiumIII/sml/24504301.pdf
677 // Streaming SIMD Extensions - Inverse of 4x4 Matrix
678 // Adapted to load data using _mm_shuffle_ps instead of loading from memory
679 // Replaced _mm_rcp_ps with _mm_div_ps for better accuracy
680
681 __m128 tmp1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(1, 0, 1, 0));
682 __m128 row1 = _mm_shuffle_ps(mCol[2].mValue, mCol[3].mValue, _MM_SHUFFLE(1, 0, 1, 0));
683 __m128 row0 = _mm_shuffle_ps(tmp1, row1, _MM_SHUFFLE(2, 0, 2, 0));
684 row1 = _mm_shuffle_ps(row1, tmp1, _MM_SHUFFLE(3, 1, 3, 1));
685 tmp1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(3, 2, 3, 2));
686 __m128 row3 = _mm_shuffle_ps(mCol[2].mValue, mCol[3].mValue, _MM_SHUFFLE(3, 2, 3, 2));
687 __m128 row2 = _mm_shuffle_ps(tmp1, row3, _MM_SHUFFLE(2, 0, 2, 0));
688 row3 = _mm_shuffle_ps(row3, tmp1, _MM_SHUFFLE(3, 1, 3, 1));
689
690 tmp1 = _mm_mul_ps(row2, row3);
691 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
692 __m128 minor0 = _mm_mul_ps(row1, tmp1);
693 __m128 minor1 = _mm_mul_ps(row0, tmp1);
694 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
695 minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
696 minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
697 minor1 = _mm_shuffle_ps(minor1, minor1, _MM_SHUFFLE(1, 0, 3, 2));
698
699 tmp1 = _mm_mul_ps(row1, row2);
700 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
701 minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
702 __m128 minor3 = _mm_mul_ps(row0, tmp1);
703 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
704 minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
705 minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3);
706 minor3 = _mm_shuffle_ps(minor3, minor3, _MM_SHUFFLE(1, 0, 3, 2));
707
708 tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, _MM_SHUFFLE(1, 0, 3, 2)), row3);
709 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
710 row2 = _mm_shuffle_ps(row2, row2, _MM_SHUFFLE(1, 0, 3, 2));
711 minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
712 __m128 minor2 = _mm_mul_ps(row0, tmp1);
713 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
714 minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
715 minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
716 minor2 = _mm_shuffle_ps(minor2, minor2, _MM_SHUFFLE(1, 0, 3, 2));
717
718 tmp1 = _mm_mul_ps(row0, row1);
719 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
720 minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
721 minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3);
722 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
723 minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
724 minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1));
725
726 tmp1 = _mm_mul_ps(row0, row3);
727 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
728 minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
729 minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
730 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
731 minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
732 minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
733
734 tmp1 = _mm_mul_ps(row0, row2);
735 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
736 minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
737 minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1));
738 tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
739 minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
740 minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3);
741
742 __m128 det = _mm_mul_ps(row0, minor0);
743 det = _mm_add_ps(_mm_shuffle_ps(det, det, _MM_SHUFFLE(2, 3, 0, 1)), det); // Original code did (x + z) + (y + w), changed to (x + y) + (z + w) to match the ARM code below and make the result cross platform deterministic
744 det = _mm_add_ss(_mm_shuffle_ps(det, det, _MM_SHUFFLE(1, 0, 3, 2)), det);
745 det = _mm_div_ss(_mm_set_ss(1.0f), det);
746 det = _mm_shuffle_ps(det, det, _MM_SHUFFLE(0, 0, 0, 0));
747
748 Mat44 result;
749 result.mCol[0].mValue = _mm_mul_ps(det, minor0);
750 result.mCol[1].mValue = _mm_mul_ps(det, minor1);
751 result.mCol[2].mValue = _mm_mul_ps(det, minor2);
752 result.mCol[3].mValue = _mm_mul_ps(det, minor3);
753 return result;
754#elif defined(JPH_USE_NEON)
755 // Adapted from the SSE version, there's surprising few articles about efficient ways of calculating an inverse for ARM on the internet
756 Type tmp1 = JPH_NEON_SHUFFLE_F32x4(mCol[0].mValue, mCol[1].mValue, 0, 1, 4, 5);
757 Type row1 = JPH_NEON_SHUFFLE_F32x4(mCol[2].mValue, mCol[3].mValue, 0, 1, 4, 5);
758 Type row0 = JPH_NEON_SHUFFLE_F32x4(tmp1, row1, 0, 2, 4, 6);
759 row1 = JPH_NEON_SHUFFLE_F32x4(row1, tmp1, 1, 3, 5, 7);
760 tmp1 = JPH_NEON_SHUFFLE_F32x4(mCol[0].mValue, mCol[1].mValue, 2, 3, 6, 7);
761 Type row3 = JPH_NEON_SHUFFLE_F32x4(mCol[2].mValue, mCol[3].mValue, 2, 3, 6, 7);
762 Type row2 = JPH_NEON_SHUFFLE_F32x4(tmp1, row3, 0, 2, 4, 6);
763 row3 = JPH_NEON_SHUFFLE_F32x4(row3, tmp1, 1, 3, 5, 7);
764
765 tmp1 = vmulq_f32(row2, row3);
766 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
767 Type minor0 = vmulq_f32(row1, tmp1);
768 Type minor1 = vmulq_f32(row0, tmp1);
769 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
770 minor0 = vsubq_f32(vmulq_f32(row1, tmp1), minor0);
771 minor1 = vsubq_f32(vmulq_f32(row0, tmp1), minor1);
772 minor1 = JPH_NEON_SHUFFLE_F32x4(minor1, minor1, 2, 3, 0, 1);
773
774 tmp1 = vmulq_f32(row1, row2);
775 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
776 minor0 = vaddq_f32(vmulq_f32(row3, tmp1), minor0);
777 Type minor3 = vmulq_f32(row0, tmp1);
778 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
779 minor0 = vsubq_f32(minor0, vmulq_f32(row3, tmp1));
780 minor3 = vsubq_f32(vmulq_f32(row0, tmp1), minor3);
781 minor3 = JPH_NEON_SHUFFLE_F32x4(minor3, minor3, 2, 3, 0, 1);
782
783 tmp1 = JPH_NEON_SHUFFLE_F32x4(row1, row1, 2, 3, 0, 1);
784 tmp1 = vmulq_f32(tmp1, row3);
785 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
786 row2 = JPH_NEON_SHUFFLE_F32x4(row2, row2, 2, 3, 0, 1);
787 minor0 = vaddq_f32(vmulq_f32(row2, tmp1), minor0);
788 Type minor2 = vmulq_f32(row0, tmp1);
789 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
790 minor0 = vsubq_f32(minor0, vmulq_f32(row2, tmp1));
791 minor2 = vsubq_f32(vmulq_f32(row0, tmp1), minor2);
792 minor2 = JPH_NEON_SHUFFLE_F32x4(minor2, minor2, 2, 3, 0, 1);
793
794 tmp1 = vmulq_f32(row0, row1);
795 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
796 minor2 = vaddq_f32(vmulq_f32(row3, tmp1), minor2);
797 minor3 = vsubq_f32(vmulq_f32(row2, tmp1), minor3);
798 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
799 minor2 = vsubq_f32(vmulq_f32(row3, tmp1), minor2);
800 minor3 = vsubq_f32(minor3, vmulq_f32(row2, tmp1));
801
802 tmp1 = vmulq_f32(row0, row3);
803 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
804 minor1 = vsubq_f32(minor1, vmulq_f32(row2, tmp1));
805 minor2 = vaddq_f32(vmulq_f32(row1, tmp1), minor2);
806 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
807 minor1 = vaddq_f32(vmulq_f32(row2, tmp1), minor1);
808 minor2 = vsubq_f32(minor2, vmulq_f32(row1, tmp1));
809
810 tmp1 = vmulq_f32(row0, row2);
811 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
812 minor1 = vaddq_f32(vmulq_f32(row3, tmp1), minor1);
813 minor3 = vsubq_f32(minor3, vmulq_f32(row1, tmp1));
814 tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
815 minor1 = vsubq_f32(minor1, vmulq_f32(row3, tmp1));
816 minor3 = vaddq_f32(vmulq_f32(row1, tmp1), minor3);
817
818 Type det = vmulq_f32(row0, minor0);
819 det = vdupq_n_f32(vaddvq_f32(det));
820 det = vdivq_f32(vdupq_n_f32(1.0f), det);
821
822 Mat44 result;
823 result.mCol[0].mValue = vmulq_f32(det, minor0);
824 result.mCol[1].mValue = vmulq_f32(det, minor1);
825 result.mCol[2].mValue = vmulq_f32(det, minor2);
826 result.mCol[3].mValue = vmulq_f32(det, minor3);
827 return result;
828#elif defined(JPH_USE_RVV)
829 // Implementation mirrored from SSE and NEON implementations
830 const vfloat32m1_t zeros = __riscv_vfmv_v_f_f32m1(0.0f, 1);
831
832 const vfloat32m1_t c0 = __riscv_vle32_v_f32m1(mCol[0].mF32, 4);
833 const vfloat32m1_t c1 = __riscv_vle32_v_f32m1(mCol[1].mF32, 4);
834 const vfloat32m1_t c2 = __riscv_vle32_v_f32m1(mCol[2].mF32, 4);
835 const vfloat32m1_t c3 = __riscv_vle32_v_f32m1(mCol[3].mF32, 4);
836
837 vfloat32m1_t minor0, minor1, minor2, minor3;
838 vfloat32m1_t tmp1;
839 vfloat32m1_t row0, row1, row2, row3;
840
841 tmp1 = RVVShuffleFloat32x4<0, 1, 4, 5>(c0, c1);
842 row1 = RVVShuffleFloat32x4<0, 1, 4, 5>(c2, c3);
843 row0 = RVVShuffleFloat32x4<0, 2, 4, 6>(tmp1, row1);
844 row1 = RVVShuffleFloat32x4<1, 3, 5, 7>(row1, tmp1);
845 tmp1 = RVVShuffleFloat32x4<2, 3, 6, 7>(c0, c1);
846 row3 = RVVShuffleFloat32x4<2, 3, 6, 7>(c2, c3);
847 row2 = RVVShuffleFloat32x4<0, 2, 4, 6>(tmp1, row3);
848 row3 = RVVShuffleFloat32x4<1, 3, 5, 7>(row3, tmp1);
849
850 tmp1 = __riscv_vfmul_vv_f32m1(row2, row3, 4);
851 tmp1 = RVVShuffleFloat32x4<1, 0, 3, 2>(tmp1, tmp1);
852 minor0 = __riscv_vfmul_vv_f32m1(row1, tmp1, 4);
853 minor1 = __riscv_vfmul_vv_f32m1(row0, tmp1, 4);
854 tmp1 = RVVShuffleFloat32x4<2, 3, 0, 1>(tmp1, tmp1);
855 minor0 = __riscv_vfsub_vv_f32m1(__riscv_vfmul_vv_f32m1(row1, tmp1, 4), minor0, 4);
856 minor1 = __riscv_vfsub_vv_f32m1(__riscv_vfmul_vv_f32m1(row0, tmp1, 4), minor1, 4);
857 minor1 = RVVShuffleFloat32x4<2, 3, 0, 1>(minor1, minor1);
858
859 tmp1 = __riscv_vfmul_vv_f32m1(row1, row2, 4);
860 tmp1 = RVVShuffleFloat32x4<1, 0, 3, 2>(tmp1, tmp1);
861 minor0 = __riscv_vfadd_vv_f32m1(__riscv_vfmul_vv_f32m1(row3, tmp1, 4), minor0, 4);
862 minor3 = __riscv_vfmul_vv_f32m1(row0, tmp1, 4);
863 tmp1 = RVVShuffleFloat32x4<2, 3, 0, 1>(tmp1, tmp1);
864 minor0 = __riscv_vfsub_vv_f32m1(minor0, __riscv_vfmul_vv_f32m1(row3, tmp1, 4), 4);
865 minor3 = __riscv_vfsub_vv_f32m1(__riscv_vfmul_vv_f32m1(row0, tmp1, 4), minor3, 4);
866 minor3 = RVVShuffleFloat32x4<2, 3, 0, 1>(minor3, minor3);
867
868 tmp1 = RVVShuffleFloat32x4<2, 3, 0, 1>(row1, row1);
869 tmp1 = __riscv_vfmul_vv_f32m1(tmp1, row3, 4);
870 tmp1 = RVVShuffleFloat32x4<1, 0, 3, 2>(tmp1, tmp1);
871 row2 = RVVShuffleFloat32x4<2, 3, 0, 1>(row2, row2);
872 minor0 = __riscv_vfadd_vv_f32m1(__riscv_vfmul_vv_f32m1(row2, tmp1, 4), minor0, 4);
873 minor2 = __riscv_vfmul_vv_f32m1(row0, tmp1, 4);
874 tmp1 = RVVShuffleFloat32x4<2, 3, 0, 1>(tmp1, tmp1);
875 minor0 = __riscv_vfsub_vv_f32m1(minor0, __riscv_vfmul_vv_f32m1(row2, tmp1, 4), 4);
876 minor2 = __riscv_vfsub_vv_f32m1(__riscv_vfmul_vv_f32m1(row0, tmp1, 4), minor2, 4);
877 minor2 = RVVShuffleFloat32x4<2, 3, 0, 1>(minor2, minor2);
878
879 tmp1 = __riscv_vfmul_vv_f32m1(row0, row1, 4);
880 tmp1 = RVVShuffleFloat32x4<1, 0, 3, 2>(tmp1, tmp1);
881 minor2 = __riscv_vfadd_vv_f32m1(__riscv_vfmul_vv_f32m1(row3, tmp1, 4), minor2, 4);
882 minor3 = __riscv_vfsub_vv_f32m1(__riscv_vfmul_vv_f32m1(row2, tmp1, 4), minor3, 4);
883 tmp1 = RVVShuffleFloat32x4<2, 3, 0, 1>(tmp1, tmp1);
884 minor2 = __riscv_vfsub_vv_f32m1(__riscv_vfmul_vv_f32m1(row3, tmp1, 4), minor2, 4);
885 minor3 = __riscv_vfsub_vv_f32m1(minor3, __riscv_vfmul_vv_f32m1(row2, tmp1, 4), 4);
886
887 tmp1 = __riscv_vfmul_vv_f32m1(row0, row3, 4);
888 tmp1 = RVVShuffleFloat32x4<1, 0, 3, 2>(tmp1, tmp1);
889 minor1 = __riscv_vfsub_vv_f32m1(minor1, __riscv_vfmul_vv_f32m1(row2, tmp1, 4), 4);
890 minor2 = __riscv_vfadd_vv_f32m1(__riscv_vfmul_vv_f32m1(row1, tmp1, 4), minor2, 4);
891 tmp1 = RVVShuffleFloat32x4<2, 3, 0, 1>(tmp1, tmp1);
892 minor1 = __riscv_vfadd_vv_f32m1(__riscv_vfmul_vv_f32m1(row2, tmp1, 4), minor1, 4);
893 minor2 = __riscv_vfsub_vv_f32m1(minor2, __riscv_vfmul_vv_f32m1(row1, tmp1, 4), 4);
894
895 tmp1 = __riscv_vfmul_vv_f32m1(row0, row2, 4);
896 tmp1 = RVVShuffleFloat32x4<1, 0, 3, 2>(tmp1, tmp1);
897 minor1 = __riscv_vfadd_vv_f32m1(__riscv_vfmul_vv_f32m1(row3, tmp1, 4), minor1, 4);
898 minor3 = __riscv_vfsub_vv_f32m1(minor3, __riscv_vfmul_vv_f32m1(row1, tmp1, 4), 4);
899 tmp1 = RVVShuffleFloat32x4<2, 3, 0, 1>(tmp1, tmp1);
900 minor1 = __riscv_vfsub_vv_f32m1(minor1, __riscv_vfmul_vv_f32m1(row3, tmp1, 4), 4);
901 minor3 = __riscv_vfadd_vv_f32m1(__riscv_vfmul_vv_f32m1(row1, tmp1, 4), minor3, 4);
902
903 const vfloat32m1_t v_det = __riscv_vfmul_vv_f32m1(row0, minor0, 4);
904 const vfloat32m1_t sum_vec = __riscv_vfredusum_vs_f32m1_f32m1(v_det, zeros, 4);
905 const float s_det = __riscv_vfmv_f_s_f32m1_f32(sum_vec);
906 const vfloat32m1_t det_inv = __riscv_vfmv_v_f_f32m1(1.0f / s_det, 4);
907
908 minor0 = __riscv_vfmul_vv_f32m1(det_inv, minor0, 4);
909 minor1 = __riscv_vfmul_vv_f32m1(det_inv, minor1, 4);
910 minor2 = __riscv_vfmul_vv_f32m1(det_inv, minor2, 4);
911 minor3 = __riscv_vfmul_vv_f32m1(det_inv, minor3, 4);
912
913 Mat44 result;
914 __riscv_vse32_v_f32m1(result.mCol[0].mF32, minor0, 4);
915 __riscv_vse32_v_f32m1(result.mCol[1].mF32, minor1, 4);
916 __riscv_vse32_v_f32m1(result.mCol[2].mF32, minor2, 4);
917 __riscv_vse32_v_f32m1(result.mCol[3].mF32, minor3, 4);
918 return result;
919#else
920 float m00 = JPH_EL(0, 0), m10 = JPH_EL(1, 0), m20 = JPH_EL(2, 0), m30 = JPH_EL(3, 0);
921 float m01 = JPH_EL(0, 1), m11 = JPH_EL(1, 1), m21 = JPH_EL(2, 1), m31 = JPH_EL(3, 1);
922 float m02 = JPH_EL(0, 2), m12 = JPH_EL(1, 2), m22 = JPH_EL(2, 2), m32 = JPH_EL(3, 2);
923 float m03 = JPH_EL(0, 3), m13 = JPH_EL(1, 3), m23 = JPH_EL(2, 3), m33 = JPH_EL(3, 3);
924
925 float m10211120 = m10 * m21 - m11 * m20;
926 float m10221220 = m10 * m22 - m12 * m20;
927 float m10231320 = m10 * m23 - m13 * m20;
928 float m10311130 = m10 * m31 - m11 * m30;
929 float m10321230 = m10 * m32 - m12 * m30;
930 float m10331330 = m10 * m33 - m13 * m30;
931 float m11221221 = m11 * m22 - m12 * m21;
932 float m11231321 = m11 * m23 - m13 * m21;
933 float m11321231 = m11 * m32 - m12 * m31;
934 float m11331331 = m11 * m33 - m13 * m31;
935 float m12231322 = m12 * m23 - m13 * m22;
936 float m12331332 = m12 * m33 - m13 * m32;
937 float m20312130 = m20 * m31 - m21 * m30;
938 float m20322230 = m20 * m32 - m22 * m30;
939 float m20332330 = m20 * m33 - m23 * m30;
940 float m21322231 = m21 * m32 - m22 * m31;
941 float m21332331 = m21 * m33 - m23 * m31;
942 float m22332332 = m22 * m33 - m23 * m32;
943
944 Vec4 col0(m11 * m22332332 - m12 * m21332331 + m13 * m21322231, -m10 * m22332332 + m12 * m20332330 - m13 * m20322230, m10 * m21332331 - m11 * m20332330 + m13 * m20312130, -m10 * m21322231 + m11 * m20322230 - m12 * m20312130);
945 Vec4 col1(-m01 * m22332332 + m02 * m21332331 - m03 * m21322231, m00 * m22332332 - m02 * m20332330 + m03 * m20322230, -m00 * m21332331 + m01 * m20332330 - m03 * m20312130, m00 * m21322231 - m01 * m20322230 + m02 * m20312130);
946 Vec4 col2(m01 * m12331332 - m02 * m11331331 + m03 * m11321231, -m00 * m12331332 + m02 * m10331330 - m03 * m10321230, m00 * m11331331 - m01 * m10331330 + m03 * m10311130, -m00 * m11321231 + m01 * m10321230 - m02 * m10311130);
947 Vec4 col3(-m01 * m12231322 + m02 * m11231321 - m03 * m11221221, m00 * m12231322 - m02 * m10231320 + m03 * m10221220, -m00 * m11231321 + m01 * m10231320 - m03 * m10211120, m00 * m11221221 - m01 * m10221220 + m02 * m10211120);
948
949 float det = m00 * col0.mF32[0] + m01 * col0.mF32[1] + m02 * col0.mF32[2] + m03 * col0.mF32[3];
950
951 return Mat44(col0 / det, col1 / det, col2 / det, col3 / det);
952#endif
953}
954
961
963{
964 return GetAxisX().Dot(GetAxisY().Cross(GetAxisZ()));
965}
966
968{
969 return Mat44(
970 Vec4(JPH_EL(1, 1), JPH_EL(1, 2), JPH_EL(1, 0), 0) * Vec4(JPH_EL(2, 2), JPH_EL(2, 0), JPH_EL(2, 1), 0)
971 - Vec4(JPH_EL(1, 2), JPH_EL(1, 0), JPH_EL(1, 1), 0) * Vec4(JPH_EL(2, 1), JPH_EL(2, 2), JPH_EL(2, 0), 0),
972 Vec4(JPH_EL(0, 2), JPH_EL(0, 0), JPH_EL(0, 1), 0) * Vec4(JPH_EL(2, 1), JPH_EL(2, 2), JPH_EL(2, 0), 0)
973 - Vec4(JPH_EL(0, 1), JPH_EL(0, 2), JPH_EL(0, 0), 0) * Vec4(JPH_EL(2, 2), JPH_EL(2, 0), JPH_EL(2, 1), 0),
974 Vec4(JPH_EL(0, 1), JPH_EL(0, 2), JPH_EL(0, 0), 0) * Vec4(JPH_EL(1, 2), JPH_EL(1, 0), JPH_EL(1, 1), 0)
975 - Vec4(JPH_EL(0, 2), JPH_EL(0, 0), JPH_EL(0, 1), 0) * Vec4(JPH_EL(1, 1), JPH_EL(1, 2), JPH_EL(1, 0), 0),
976 Vec4(0, 0, 0, 1));
977}
978
980{
981 float det = GetDeterminant3x3();
982
983 return Mat44(
984 (Vec4(JPH_EL(1, 1), JPH_EL(1, 2), JPH_EL(1, 0), 0) * Vec4(JPH_EL(2, 2), JPH_EL(2, 0), JPH_EL(2, 1), 0)
985 - Vec4(JPH_EL(1, 2), JPH_EL(1, 0), JPH_EL(1, 1), 0) * Vec4(JPH_EL(2, 1), JPH_EL(2, 2), JPH_EL(2, 0), 0)) / det,
986 (Vec4(JPH_EL(0, 2), JPH_EL(0, 0), JPH_EL(0, 1), 0) * Vec4(JPH_EL(2, 1), JPH_EL(2, 2), JPH_EL(2, 0), 0)
987 - Vec4(JPH_EL(0, 1), JPH_EL(0, 2), JPH_EL(0, 0), 0) * Vec4(JPH_EL(2, 2), JPH_EL(2, 0), JPH_EL(2, 1), 0)) / det,
988 (Vec4(JPH_EL(0, 1), JPH_EL(0, 2), JPH_EL(0, 0), 0) * Vec4(JPH_EL(1, 2), JPH_EL(1, 0), JPH_EL(1, 1), 0)
989 - Vec4(JPH_EL(0, 2), JPH_EL(0, 0), JPH_EL(0, 1), 0) * Vec4(JPH_EL(1, 1), JPH_EL(1, 2), JPH_EL(1, 0), 0)) / det,
990 Vec4(0, 0, 0, 1));
991}
992
994{
995 float det = inM.GetDeterminant3x3();
996
997 // If the determinant is zero the matrix is singular and we return false
998 if (det == 0.0f)
999 return false;
1000
1001 // Finish calculating the inverse
1002 *this = inM.Adjointed3x3();
1003 mCol[0] /= det;
1004 mCol[1] /= det;
1005 mCol[2] /= det;
1006 return true;
1007}
1008
1010{
1011 float tr = mCol[0].mF32[0] + mCol[1].mF32[1] + mCol[2].mF32[2];
1012
1013 if (tr >= 0.0f)
1014 {
1015 float s = sqrt(tr + 1.0f);
1016 float is = 0.5f / s;
1017 return Quat(
1018 (mCol[1].mF32[2] - mCol[2].mF32[1]) * is,
1019 (mCol[2].mF32[0] - mCol[0].mF32[2]) * is,
1020 (mCol[0].mF32[1] - mCol[1].mF32[0]) * is,
1021 0.5f * s);
1022 }
1023 else
1024 {
1025 int i = 0;
1026 if (mCol[1].mF32[1] > mCol[0].mF32[0]) i = 1;
1027 if (mCol[2].mF32[2] > mCol[i].mF32[i]) i = 2;
1028
1029 if (i == 0)
1030 {
1031 float s = sqrt(mCol[0].mF32[0] - (mCol[1].mF32[1] + mCol[2].mF32[2]) + 1);
1032 float is = 0.5f / s;
1033 return Quat(
1034 0.5f * s,
1035 (mCol[1].mF32[0] + mCol[0].mF32[1]) * is,
1036 (mCol[0].mF32[2] + mCol[2].mF32[0]) * is,
1037 (mCol[1].mF32[2] - mCol[2].mF32[1]) * is);
1038 }
1039 else if (i == 1)
1040 {
1041 float s = sqrt(mCol[1].mF32[1] - (mCol[2].mF32[2] + mCol[0].mF32[0]) + 1);
1042 float is = 0.5f / s;
1043 return Quat(
1044 (mCol[1].mF32[0] + mCol[0].mF32[1]) * is,
1045 0.5f * s,
1046 (mCol[2].mF32[1] + mCol[1].mF32[2]) * is,
1047 (mCol[2].mF32[0] - mCol[0].mF32[2]) * is);
1048 }
1049 else
1050 {
1051 JPH_ASSERT(i == 2);
1052
1053 float s = sqrt(mCol[2].mF32[2] - (mCol[0].mF32[0] + mCol[1].mF32[1]) + 1);
1054 float is = 0.5f / s;
1055 return Quat(
1056 (mCol[0].mF32[2] + mCol[2].mF32[0]) * is,
1057 (mCol[2].mF32[1] + mCol[1].mF32[2]) * is,
1058 0.5f * s,
1059 (mCol[0].mF32[1] - mCol[1].mF32[0]) * is);
1060 }
1061 }
1062}
1063
1065{
1066 return Mat44(
1067 inQ.mValue.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>().FlipSign<1, 1, -1, -1>(),
1068 inQ.mValue.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y>().FlipSign<-1, 1, 1, -1>(),
1069 inQ.mValue.Swizzle<SWIZZLE_Y, SWIZZLE_X, SWIZZLE_W, SWIZZLE_Z>().FlipSign<1, -1, 1, -1>(),
1070 inQ.mValue);
1071}
1072
1074{
1075 return Mat44(
1076 inQ.mValue.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>().FlipSign<1, -1, 1, -1>(),
1077 inQ.mValue.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y>().FlipSign<1, 1, -1, -1>(),
1078 inQ.mValue.Swizzle<SWIZZLE_Y, SWIZZLE_X, SWIZZLE_W, SWIZZLE_Z>().FlipSign<-1, 1, 1, -1>(),
1079 inQ.mValue);
1080}
1081
1083{
1084 JPH_ASSERT(mCol[0][3] == 0.0f);
1085 JPH_ASSERT(mCol[1][3] == 0.0f);
1086 JPH_ASSERT(mCol[2][3] == 0.0f);
1087
1088 return Mat44(mCol[0], mCol[1], mCol[2], Vec4(0, 0, 0, 1));
1089}
1090
1092{
1093#if defined(JPH_USE_AVX512)
1094 return Mat44(_mm_maskz_mov_ps(0b0111, mCol[0].mValue),
1095 _mm_maskz_mov_ps(0b0111, mCol[1].mValue),
1096 _mm_maskz_mov_ps(0b0111, mCol[2].mValue),
1097 Vec4(0, 0, 0, 1));
1098#elif defined(JPH_USE_SSE4_1)
1099 __m128 zero = _mm_setzero_ps();
1100 return Mat44(_mm_blend_ps(mCol[0].mValue, zero, 8),
1101 _mm_blend_ps(mCol[1].mValue, zero, 8),
1102 _mm_blend_ps(mCol[2].mValue, zero, 8),
1103 Vec4(0, 0, 0, 1));
1104#elif defined(JPH_USE_NEON)
1105 return Mat44(vsetq_lane_f32(0, mCol[0].mValue, 3),
1106 vsetq_lane_f32(0, mCol[1].mValue, 3),
1107 vsetq_lane_f32(0, mCol[2].mValue, 3),
1108 Vec4(0, 0, 0, 1));
1109#elif defined(JPH_USE_RVV)
1110 const float end_col[4] = { 0, 0, 0, 1 };
1111 const vfloat32m1_t rvv_end_col = __riscv_vle32_v_f32m1(end_col, 4);
1112 const vfloat32m1_t rvv_end_row = __riscv_vfmv_v_f_f32m1(0.0f, 3);
1113
1114 Mat44 result(*this);
1115 __riscv_vse32_v_f32m1(result.mCol[3].mF32, rvv_end_col, 4);
1116 __riscv_vsse32_v_f32m1(&result.mCol[0].mF32[3], sizeof(Vec4), rvv_end_row, 3);
1117 return result;
1118#else
1119 return Mat44(Vec4(mCol[0].mF32[0], mCol[0].mF32[1], mCol[0].mF32[2], 0),
1120 Vec4(mCol[1].mF32[0], mCol[1].mF32[1], mCol[1].mF32[2], 0),
1121 Vec4(mCol[2].mF32[0], mCol[2].mF32[1], mCol[2].mF32[2], 0),
1122 Vec4(0, 0, 0, 1));
1123#endif
1124}
1125
1127{
1128 mCol[0] = inRotation.mCol[0];
1129 mCol[1] = inRotation.mCol[1];
1130 mCol[2] = inRotation.mCol[2];
1131}
1132
1134{
1135 return Mat44(mCol[0], mCol[1], mCol[2], Vec4(GetTranslation() + Multiply3x3(inTranslation), 1));
1136}
1137
1139{
1140 return Mat44(mCol[0], mCol[1], mCol[2], Vec4(GetTranslation() + inTranslation, 1));
1141}
1142
1144{
1145 return Mat44(inScale.GetX() * mCol[0], inScale.GetY() * mCol[1], inScale.GetZ() * mCol[2], mCol[3]);
1146}
1147
1149{
1150 Vec4 scale(inScale, 1);
1151 return Mat44(scale * mCol[0], scale * mCol[1], scale * mCol[2], scale * mCol[3]);
1152}
1153
1155{
1156 // Start the modified Gram-Schmidt algorithm
1157 // X axis will just be normalized
1158 Vec3 x = GetAxisX();
1159
1160 // Make Y axis perpendicular to X
1161 Vec3 y = GetAxisY();
1162 float x_dot_x = x.LengthSq();
1163 y -= (x.Dot(y) / x_dot_x) * x;
1164
1165 // Make Z axis perpendicular to X
1166 Vec3 z = GetAxisZ();
1167 z -= (x.Dot(z) / x_dot_x) * x;
1168
1169 // Make Z axis perpendicular to Y
1170 float y_dot_y = y.LengthSq();
1171 z -= (y.Dot(z) / y_dot_y) * y;
1172
1173 // Determine the scale
1174 float z_dot_z = z.LengthSq();
1175 outScale = Vec3(x_dot_x, y_dot_y, z_dot_z).Sqrt();
1176
1177 // If the resulting x, y and z vectors don't form a right handed matrix, flip the z axis.
1178 if (x.Cross(y).Dot(z) < 0.0f)
1179 outScale.SetZ(-outScale.GetZ());
1180
1181 // Determine the rotation and translation
1182 return Mat44(Vec4(x / outScale.GetX(), 0), Vec4(y / outScale.GetY(), 0), Vec4(z / outScale.GetZ(), 0), GetColumn4(3));
1183}
1184
1185#undef JPH_EL
1186
#define JPH_NAMESPACE_END
Definition Core.h:428
#define JPH_NAMESPACE_BEGIN
Definition Core.h:422
#define xy
Definition HLSLToCPP.h:511
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
#define JPH_EL(r, c)
Definition Mat44.inl:13
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_W
Use the W component.
Definition Swizzle.h:15
@ SWIZZLE_X
Use the X component.
Definition Swizzle.h:12
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
JPH_INLINE float Tan(float inX)
Tangent of x (input in radians)
Definition Trigonometry.h:28
Class that holds 4 float values. Convert to Vec4 to perform calculations.
Definition Float4.h:11
Holds a 4x4 matrix of floats, but supports also operations on the 3x3 upper left part of the matrix.
Definition Mat44.h:13
JPH_INLINE Vec3 GetAxisY() const
Definition Mat44.h:148
JPH_INLINE Mat44 PostTranslated(Vec3Arg inTranslation) const
Post multiply by translation matrix: result = Mat44::sTranslation(inTranslation) * this (i....
Definition Mat44.inl:1138
JPH_INLINE Mat44 PreTranslated(Vec3Arg inTranslation) const
Pre multiply by translation matrix: result = this * Mat44::sTranslation(inTranslation)
Definition Mat44.inl:1133
JPH_INLINE Vec3 GetAxisZ() const
Definition Mat44.h:150
static JPH_INLINE Mat44 sIdentity()
Identity matrix.
Definition Mat44.inl:35
JPH_INLINE Mat44 Multiply3x3LeftTransposed(Mat44Arg inM) const
Multiply transpose of 3x3 matrix by 3x3 matrix ( )
Definition Mat44.inl:490
static JPH_INLINE Mat44 sZero()
Zero matrix.
Definition Mat44.inl:30
JPH_INLINE Quat GetQuaternion() const
Convert to quaternion.
Definition Mat44.inl:1009
JPH_INLINE void StoreFloat4x4(Float4 *outV) const
Store matrix to memory.
Definition Mat44.inl:568
JPH_INLINE Mat44 operator-() const
Negate.
Definition Mat44.inl:544
static JPH_INLINE Mat44 sCrossProduct(Vec3Arg inV)
Get matrix that represents a cross product .
Definition Mat44.inl:179
JPH_INLINE Mat44 Transposed3x3() const
Transpose 3x3 subpart of matrix.
Definition Mat44.inl:621
JPH_INLINE Mat44 & operator*=(float inV)
Multiply matrix with float.
Definition Mat44.inl:528
JPH_INLINE float GetDeterminant3x3() const
Get the determinant of a 3x3 matrix.
Definition Mat44.inl:962
static JPH_INLINE Mat44 sQuatRightMultiply(QuatArg inQ)
Returns matrix MR so that (where p and q are quaternions)
Definition Mat44.inl:1073
JPH_INLINE Mat44 Transposed() const
Transpose matrix.
Definition Mat44.inl:574
Vec4::Type Type
Definition Mat44.h:18
JPH_INLINE Mat44 Adjointed3x3() const
Get the adjoint of a 3x3 matrix.
Definition Mat44.inl:967
JPH_INLINE bool operator==(Mat44Arg inM2) const
Comparison.
Definition Mat44.inl:225
static JPH_INLINE Mat44 sRotationZ(float inZ)
Definition Mat44.inl:77
static JPH_INLINE Mat44 sLoadFloat4x4(const Float4 *inV)
Load 16 floats from memory.
Definition Mat44.inl:45
JPH_INLINE Vec3 Multiply3x3Transposed(Vec3Arg inV) const
Multiply vector by only 3x3 part of the transpose of the matrix ( )
Definition Mat44.inl:423
static JPH_INLINE Mat44 sOuterProduct(Vec3Arg inV1, Vec3Arg inV2)
Get outer product of inV and inV2 (equivalent to )
Definition Mat44.inl:173
static JPH_INLINE Mat44 sLookAt(Vec3Arg inPos, Vec3Arg inTarget, Vec3Arg inUp)
Definition Mat44.inl:207
JPH_INLINE Mat44 GetRotation() const
Get rotation part only (note: retains the first 3 values from the bottom row)
Definition Mat44.inl:1082
JPH_INLINE Mat44 GetRotationSafe() const
Get rotation part only (note: also clears the bottom row)
Definition Mat44.inl:1091
JPH_INLINE Mat44 Multiply3x3RightTransposed(Mat44Arg inM) const
Multiply 3x3 matrix by the transpose of a 3x3 matrix ( )
Definition Mat44.inl:504
static JPH_INLINE Mat44 sNaN()
Matrix filled with NaN's.
Definition Mat44.inl:40
JPH_INLINE Mat44 & operator+=(Mat44Arg inM)
Per element addition of matrix.
Definition Mat44.inl:560
JPH_INLINE Mat44 PostScaled(Vec3Arg inScale) const
Scale a matrix: result = Mat44::sScale(inScale) * this.
Definition Mat44.inl:1148
JPH_INLINE bool IsClose(Mat44Arg inM2, float inMaxDistSq=1.0e-12f) const
Test if two matrices are close.
Definition Mat44.inl:233
JPH_INLINE bool SetInversed3x3(Mat44Arg inM)
*this = inM.Inversed3x3(), returns false if the matrix is singular in which case *this is unchanged
Definition Mat44.inl:993
static JPH_INLINE Mat44 sScale(float inScale)
Get matrix that scales uniformly.
Definition Mat44.inl:163
static JPH_INLINE Mat44 sLoadFloat4x4Aligned(const Float4 *inV)
Load 16 floats from memory, 16 bytes aligned.
Definition Mat44.inl:53
static JPH_INLINE Mat44 sTranslation(Vec3Arg inV)
Get matrix that translates.
Definition Mat44.inl:144
JPH_INLINE Vec4 GetColumn4(uint inCol) const
Definition Mat44.h:160
JPH_INLINE Mat44 Decompose(Vec3 &outScale) const
Definition Mat44.inl:1154
JPH_INLINE Vec3 GetAxisX() const
Access to the columns.
Definition Mat44.h:146
JPH_INLINE Mat44 Inversed() const
Inverse 4x4 matrix.
Definition Mat44.inl:673
JPH_INLINE Vec3 Multiply3x3(Vec3Arg inV) const
Multiply vector by only 3x3 part of the matrix.
Definition Mat44.inl:384
static JPH_INLINE Mat44 sRotationTranslation(QuatArg inR, Vec3Arg inT)
Get matrix that rotates and translates.
Definition Mat44.inl:149
JPH_INLINE void SetRotation(Mat44Arg inRotation)
Updates the rotation part of this matrix (the first 3 columns)
Definition Mat44.inl:1126
JPH_INLINE Vec3 GetTranslation() const
Definition Mat44.h:152
JPH_INLINE Mat44 operator+(Mat44Arg inM) const
Per element addition of matrix.
Definition Mat44.inl:536
static JPH_INLINE Mat44 sRotation(Vec3Arg inAxis, float inAngle)
Rotate around arbitrary axis.
Definition Mat44.inl:139
static JPH_INLINE Mat44 sInverseRotationTranslation(QuatArg inR, Vec3Arg inT)
Get inverse matrix of sRotationTranslation.
Definition Mat44.inl:156
Mat44()=default
Constructor.
JPH_INLINE Mat44 Inversed3x3() const
Inverse 3x3 matrix.
Definition Mat44.inl:979
static JPH_INLINE Mat44 sQuatLeftMultiply(QuatArg inQ)
Returns matrix ML so that (where p and q are quaternions)
Definition Mat44.inl:1064
JPH_INLINE void SetTranslation(Vec3Arg inV)
Definition Mat44.h:153
static JPH_INLINE Mat44 sPerspective(float inFovY, float inAspect, float inNear, float inFar)
Returns a right-handed perspective projection matrix.
Definition Mat44.inl:216
static JPH_INLINE Mat44 sRotationX(float inX)
Rotate around X, Y or Z axis (angle in radians)
Definition Mat44.inl:61
JPH_INLINE Mat44 InversedRotationTranslation() const
Inverse 4x4 matrix when it only contains rotation and translation.
Definition Mat44.inl:955
JPH_INLINE Mat44 PreScaled(Vec3Arg inScale) const
Scale a matrix: result = this * Mat44::sScale(inScale)
Definition Mat44.inl:1143
static JPH_INLINE Mat44 sRotationY(float inY)
Definition Mat44.inl:69
friend JPH_INLINE Mat44 operator*(float inV, Mat44Arg inM)
Definition Mat44.h:128
Definition Quat.h:33
JPH_INLINE float GetW() const
Get W component (real part)
Definition Quat.h:79
JPH_INLINE float GetY() const
Get Y component (imaginary part j)
Definition Quat.h:73
JPH_INLINE float GetZ() const
Get Z component (imaginary part k)
Definition Quat.h:76
static JPH_INLINE Quat sRotation(Vec3Arg inAxis, float inAngle)
Rotation from axis and angle.
Definition Quat.inl:128
JPH_INLINE float GetX() const
Get X component (imaginary part i)
Definition Quat.h:70
JPH_INLINE Quat Conjugated() const
The conjugate [w, -x, -y, -z] is the same as the inverse for unit quaternions.
Definition Quat.h:185
bool IsNormalized(float inTolerance=1.0e-5f) const
If the length of this quaternion is 1 +/- inTolerance.
Definition Quat.h:60
Vec4 mValue
4 vector that stores [x, y, z, w] parts of the quaternion
Definition Quat.h:264
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition UVec4.inl:659
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:292
Definition Vec3.h:17
JPH_INLINE float Dot(Vec3Arg inV2) const
Dot product.
Definition Vec3.inl:945
static JPH_INLINE Type sFixW(Type inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
static JPH_INLINE Vec3 sAxisX()
Vectors with the principal axis.
Definition Vec3.h:56
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec3.inl:759
JPH_INLINE Vec3 Cross(Vec3Arg inV2) const
Cross product.
Definition Vec3.inl:841
JPH_INLINE float GetX() const
Get individual components.
Definition Vec3.h:127
JPH_INLINE Vec3 NormalizedOr(Vec3Arg inZeroValue) const
Normalize vector or return inZeroValue if the length of the vector is zero.
Definition Vec3.inl:1078
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec3.inl:791
JPH_INLINE void SetZ(float inZ)
Definition Vec3.h:135
static JPH_INLINE Vec3 sAxisZ()
Definition Vec3.h:58
Type mValue
Definition Vec3.h:299
JPH_INLINE float GetY() const
Definition Vec3.h:128
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec3.inl:775
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec3.inl:974
float mF32[4]
Definition Vec3.h:300
JPH_INLINE Vec3 Sqrt() const
Component wise square root.
Definition Vec3.inl:1029
JPH_INLINE float GetZ() const
Definition Vec3.h:129
Definition Vec4.h:14
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec4.inl:808
float mF32[4]
Definition Vec4.h:312
static JPH_INLINE Vec4 sLoadFloat4Aligned(const Float4 *inV)
Load 4 floats from memory, 16 bytes aligned.
Definition Vec4.inl:139
JPH_INLINE Vec4 FlipSign() const
Flips the signs of the components, e.g. FlipSign<-1, 1, -1, 1>() will flip the signs of the X and Z c...
Definition Vec4.inl:1113
static JPH_INLINE UVec4 sEquals(Vec4Arg inV1, Vec4Arg inV2)
Equals (component wise)
Definition Vec4.inl:235
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec4.inl:824
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec4.inl:840
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:119
static JPH_INLINE Vec4 sLoadFloat4(const Float4 *inV)
Load 4 floats from memory.
Definition Vec4.inl:123
static JPH_INLINE Vec4 sZero()
Vector with all zeros.
Definition Vec4.inl:81
JPH_INLINE Vec4 Swizzle() const
Swizzle the elements in inV.
Type mValue
Definition Vec4.h:311
static JPH_INLINE Vec4 sNaN()
Vector with all NaN's.
Definition Vec4.inl:118
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:97
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calculate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:1225