Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
RISCVVector.h
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2026 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
8
9#ifdef JPH_USE_RVV
10
11template <unsigned IndexX, unsigned IndexY, unsigned IndexZ, unsigned IndexW>
12JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4(vfloat32m1_t inV0, vfloat32m1_t inV1)
13{
14 vfloat32m2_t combined = __riscv_vlmul_ext_v_f32m1_f32m2(inV0);
15 combined = __riscv_vslideup_vx_f32m2(combined, __riscv_vlmul_ext_v_f32m1_f32m2(inV1), 4, 8);
16
17 const uint32 indices_raw[4] = { IndexX, IndexY, IndexZ, IndexW };
18 const vuint32m1_t v_indices_m1 = __riscv_vle32_v_u32m1(indices_raw, 4);
19 const vuint32m2_t v_indices_m2 = __riscv_vlmul_ext_v_u32m1_u32m2(v_indices_m1);
20
21 const vfloat32m2_t gathered_m2 = __riscv_vrgather_vv_f32m2(combined, v_indices_m2, 4);
22 return __riscv_vlmul_trunc_v_f32m2_f32m1(gathered_m2);
23}
24
25template <>
26JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4<0, 1, 2, 3>(vfloat32m1_t inV0, vfloat32m1_t inV1)
27{
28 return inV0;
29}
30
31template <>
32JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4<0, 1, 4, 5>(vfloat32m1_t inV0, vfloat32m1_t inV1)
33{
34 vfloat32m1_t result = inV0;
35 return __riscv_vslideup_vx_f32m1(result, inV1, 2, 4);
36}
37
38template <>
39JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4<1, 0, 3, 2>(vfloat32m1_t inV0, vfloat32m1_t inV1)
40{
41 // Avoids m2 extension overhead that the default implementation of RVVShuffleFloat32x4 has
42 const uint32 indices_raw[4] = { 1, 0, 3, 2 };
43 const vuint32m1_t indices = __riscv_vle32_v_u32m1(indices_raw, 4);
44 return __riscv_vrgather_vv_f32m1(inV0, indices, 4);
45}
46
47template <>
48JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4<2, 3, 0, 1>(vfloat32m1_t inV0, vfloat32m1_t inV1)
49{
50 vfloat32m1_t upper = __riscv_vslidedown_vx_f32m1(inV0, 2, 4);
51 return __riscv_vslideup_vx_f32m1(upper, inV0, 2, 4);
52}
53
54template <>
55JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4<2, 3, 6, 7>(vfloat32m1_t inV0, vfloat32m1_t inV1)
56{
57 return __riscv_vslidedown_vx_f32m1_tu(inV1, inV0, 2, 2);
58}
59
60template <>
61JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4<4, 5, 6, 7>(vfloat32m1_t inV0, vfloat32m1_t inV1)
62{
63 return inV1;
64}
65
66template <>
67JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4<0, 2, 4, 6>(vfloat32m1_t inV0, vfloat32m1_t inV1)
68{
69 vfloat32m2_t combined = __riscv_vlmul_ext_v_f32m1_f32m2(inV0);
70 combined = __riscv_vslideup_vx_f32m2(combined, __riscv_vlmul_ext_v_f32m1_f32m2(inV1), 4, 8);
71
72 vuint64m2_t combined_u64 = __riscv_vreinterpret_v_u32m2_u64m2(__riscv_vreinterpret_v_f32m2_u32m2(combined));
73
74 // vnsrl extracts lower 32 bits from all 4 u64 elements -> [0, 2, 4, 6]
75 vuint32m1_t result = __riscv_vnsrl_wx_u32m1(combined_u64, 0, 4);
76
77 return __riscv_vreinterpret_v_u32m1_f32m1(result);
78}
79
80template <>
81JPH_INLINE vfloat32m1_t RVVShuffleFloat32x4<1, 3, 5, 7>(vfloat32m1_t inV0, vfloat32m1_t inV1)
82{
83 vfloat32m2_t combined = __riscv_vlmul_ext_v_f32m1_f32m2(inV0);
84 combined = __riscv_vslideup_vx_f32m2(combined, __riscv_vlmul_ext_v_f32m1_f32m2(inV1), 4, 8);
85
86 vuint64m2_t combined_u64 = __riscv_vreinterpret_v_u32m2_u64m2(__riscv_vreinterpret_v_f32m2_u32m2(combined));
87
88 // vnsrl with shift=32 extracts upper 32 bits from all 4 u64 elements -> [1, 3, 5, 7]
89 vuint32m1_t result = __riscv_vnsrl_wx_u32m1(combined_u64, 32, 4);
90
91 return __riscv_vreinterpret_v_u32m1_f32m1(result);
92}
93
95JPH_INLINE float RVVSumElementsFloat32x4(vfloat32m1_t inV)
96{
97#ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
98 const vfloat32m1_t shift1 = __riscv_vslidedown_vx_f32m1(inV, 1, 4);
99 const vfloat32m1_t sum_pairs = __riscv_vfadd_vv_f32m1(inV, shift1, 4);
100 const vfloat32m1_t shift2 = __riscv_vslidedown_vx_f32m1(sum_pairs, 2, 4);
101 const vfloat32m1_t sum = __riscv_vfadd_vv_f32m1(sum_pairs, shift2, 4);
102#else
103 const vfloat32m1_t zeros = __riscv_vfmv_v_f_f32m1(0.0f, 4);
104 const vfloat32m1_t sum = __riscv_vfredusum_vs_f32m1_f32m1(inV, zeros, 4);
105#endif
106 return __riscv_vfmv_f_s_f32m1_f32(sum);
107}
108
109#endif // JPH_USE_RVV
110
#define JPH_NAMESPACE_END
Definition Core.h:428
std::uint32_t uint32
Definition Core.h:508
#define JPH_NAMESPACE_BEGIN
Definition Core.h:422