Skip to content

Commit

Permalink
feat: Implement simd_f64x2_arith for pulley
Browse files Browse the repository at this point in the history
  • Loading branch information
takaebato committed Dec 31, 2024
1 parent f2ab725 commit 4a00bc8
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 1 deletion.
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1157,17 +1157,20 @@

(rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b))
(rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b))
(rule (lower (has_type $F64X2 (fsub a b))) (pulley_vsubf64x2 a b))

;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b))
(rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b))
(rule (lower (has_type $F64X2 (fmul a b))) (pulley_vmulf64x2 a b))

;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fdiv a b))) (pulley_fdiv32 a b))
(rule (lower (has_type $F64 (fdiv a b))) (pulley_fdiv64 a b))
(rule (lower (has_type $F32X4 (fdiv a b))) (pulley_vdivf32x4 a b))
(rule (lower (has_type $F64X2 (fdiv a b))) (pulley_vdivf64x2 a b))

;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1230,6 +1233,7 @@

(rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a))
(rule (lower (has_type $F64 (fneg a))) (pulley_fneg64 a))
(rule (lower (has_type $F64X2 (fneg a))) (pulley_vnegf64x2 a))

;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
1 change: 0 additions & 1 deletion crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,6 @@ impl WastTest {
"spec_testsuite/simd_f32x4_arith.wast",
"spec_testsuite/simd_f32x4_cmp.wast",
"spec_testsuite/simd_f32x4_pmin_pmax.wast",
"spec_testsuite/simd_f64x2_arith.wast",
"spec_testsuite/simd_f64x2_cmp.wast",
"spec_testsuite/simd_f64x2_pmin_pmax.wast",
"spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
Expand Down
39 changes: 39 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3007,6 +3007,19 @@ impl ExtendedOpVisitor for Interpreter<'_> {
ControlFlow::Continue(())
}

fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let a = self.state[operands.src1].get_f64x2();
let b = self.state[operands.src2].get_f64x2();
let mut result = [0.0f64; 2];

for i in 0..2 {
result[i] = a[i] / b[i];
}

self.state[operands.dst].set_f64x2(result);
ControlFlow::Continue(())
}

fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
let a = self.state[operands.src1].get_f32();
let b = self.state[operands.src2].get_f32();
Expand Down Expand Up @@ -3900,6 +3913,16 @@ impl ExtendedOpVisitor for Interpreter<'_> {
ControlFlow::Continue(())
}

fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_f64x2();
let b = self.state[operands.src2].get_f64x2();
for (a, b) in a.iter_mut().zip(b) {
*a = *a - b;
}
self.state[operands.dst].set_f64x2(a);
ControlFlow::Continue(())
}

fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_i8x16();
let b = self.state[operands.src2].get_i8x16();
Expand Down Expand Up @@ -3940,6 +3963,16 @@ impl ExtendedOpVisitor for Interpreter<'_> {
ControlFlow::Continue(())
}

fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_f64x2();
let b = self.state[operands.src2].get_f64x2();
for (a, b) in a.iter_mut().zip(b) {
*a = *a * b;
}
self.state[operands.dst].set_f64x2(a);
ControlFlow::Continue(())
}

fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_i16x8();
let b = self.state[operands.src2].get_i16x8();
Expand Down Expand Up @@ -4367,6 +4400,12 @@ impl ExtendedOpVisitor for Interpreter<'_> {
ControlFlow::Continue(())
}

fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_f64x2();
self.state[dst].set_f64x2(a.map(|i| -i));
ControlFlow::Continue(())
}

fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_i8x16();
let b = self.state[operands.src2].get_i8x16();
Expand Down
8 changes: 8 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,8 @@ macro_rules! for_each_extended_op {
fmul64 = Fmul64 { operands: BinaryOperands<FReg> };
/// `dst = src1 / src2`
fdiv64 = Fdiv64 { operands: BinaryOperands<FReg> };
/// `dst = src1 / src2`
vdivf64x2 = VDivF64x2 { operands: BinaryOperands<VReg> };
/// `dst = ieee_maximum(src1, src2)`
fmaximum64 = Fmaximum64 { operands: BinaryOperands<FReg> };
/// `dst = ieee_minimum(src1, src2)`
Expand Down Expand Up @@ -1075,6 +1077,8 @@ macro_rules! for_each_extended_op {
vsubi32x4 = VSubI32x4 { operands: BinaryOperands<VReg> };
/// `dst = src1 - src2`
vsubi64x2 = VSubI64x2 { operands: BinaryOperands<VReg> };
/// `dst = src1 - src2`
vsubf64x2 = VSubF64x2 { operands: BinaryOperands<VReg> };

/// `dst = saturating_sub(src1, src2)`
vsubi8x16_sat = VSubI8x16Sat { operands: BinaryOperands<VReg> };
Expand All @@ -1093,6 +1097,8 @@ macro_rules! for_each_extended_op {
vmuli32x4 = VMulI32x4 { operands: BinaryOperands<VReg> };
/// `dst = src1 * src2`
vmuli64x2 = VMulI64x2 { operands: BinaryOperands<VReg> };
/// `dst = src1 * src2`
vmulf64x2 = VMulF64x2 { operands: BinaryOperands<VReg> };

/// `dst = signed_saturate(src1 * src2 + (1 << (Q - 1)) >> Q)`
vqmulrsi16x8 = VQmulrsI16x8 { operands: BinaryOperands<VReg> };
Expand Down Expand Up @@ -1183,6 +1189,8 @@ macro_rules! for_each_extended_op {
vneg32x4 = Vneg32x4 { dst: VReg, src: VReg };
/// `dst = -src`
vneg64x2 = Vneg64x2 { dst: VReg, src: VReg };
/// `dst = -src`
vnegf64x2 = VnegF64x2 { dst: VReg, src: VReg };

/// `dst = min(src1, src2)` (signed)
vmin8x16_s = Vmin8x16S { operands: BinaryOperands<VReg> };
Expand Down

0 comments on commit 4a00bc8

Please sign in to comment.