From 4a00bc8bc067551b7ce7f7c5c561dd2fa5953324 Mon Sep 17 00:00:00 2001 From: Takahiro Ebato Date: Tue, 31 Dec 2024 16:24:41 +0900 Subject: [PATCH] feat: Implement simd_f64x2_arith for pulley --- .../codegen/src/isa/pulley_shared/lower.isle | 4 ++ crates/wast-util/src/lib.rs | 1 - pulley/src/interp.rs | 39 +++++++++++++++++++ pulley/src/lib.rs | 8 ++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 2d815ae4c502..b62d44810eb1 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -1157,17 +1157,20 @@ (rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b)) (rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b)) +(rule (lower (has_type $F64X2 (fsub a b))) (pulley_vsubf64x2 a b)) ;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b)) (rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b)) +(rule (lower (has_type $F64X2 (fmul a b))) (pulley_vmulf64x2 a b)) ;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (fdiv a b))) (pulley_fdiv32 a b)) (rule (lower (has_type $F64 (fdiv a b))) (pulley_fdiv64 a b)) (rule (lower (has_type $F32X4 (fdiv a b))) (pulley_vdivf32x4 a b)) +(rule (lower (has_type $F64X2 (fdiv a b))) (pulley_vdivf64x2 a b)) ;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1230,6 +1233,7 @@ (rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a)) (rule (lower (has_type $F64 (fneg a))) (pulley_fneg64 a)) +(rule (lower (has_type $F64X2 (fneg a))) (pulley_vnegf64x2 a)) ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 35959d99ac07..f8e15777a481 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -411,7 +411,6 @@ impl WastTest { "spec_testsuite/simd_f32x4_arith.wast", "spec_testsuite/simd_f32x4_cmp.wast", "spec_testsuite/simd_f32x4_pmin_pmax.wast", - "spec_testsuite/simd_f64x2_arith.wast", "spec_testsuite/simd_f64x2_cmp.wast", "spec_testsuite/simd_f64x2_pmin_pmax.wast", "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 07f70cbf7596..31cb072c55c6 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -3007,6 +3007,19 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vdivf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + let mut result = [0.0f64; 2]; + + for i in 0..2 { + result[i] = a[i] / b[i]; + } + + self.state[operands.dst].set_f64x2(result); + ControlFlow::Continue(()) + } + fn fmaximum32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_f32(); let b = self.state[operands.src2].get_f32(); @@ -3900,6 +3913,16 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vsubf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + for (a, b) in a.iter_mut().zip(b) { + *a = *a - b; + } + self.state[operands.dst].set_f64x2(a); + ControlFlow::Continue(()) + } + fn vmuli8x16(&mut self, operands: BinaryOperands) -> ControlFlow { let mut a = self.state[operands.src1].get_i8x16(); let b = self.state[operands.src2].get_i8x16(); @@ -3940,6 +3963,16 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vmulf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + for (a, b) in a.iter_mut().zip(b) { + *a = *a * b; + } + self.state[operands.dst].set_f64x2(a); + ControlFlow::Continue(()) + } + fn vqmulrsi16x8(&mut self, operands: BinaryOperands) -> ControlFlow { let mut a = self.state[operands.src1].get_i16x8(); let b = self.state[operands.src2].get_i16x8(); @@ -4367,6 +4400,12 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let a = self.state[src].get_f64x2(); + self.state[dst].set_f64x2(a.map(|i| -i)); + ControlFlow::Continue(()) + } + fn vmin8x16_s(&mut self, operands: BinaryOperands) -> ControlFlow { let mut a = self.state[operands.src1].get_i8x16(); let b = self.state[operands.src2].get_i8x16(); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 1a37bd970e53..447264b6a37f 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -860,6 +860,8 @@ macro_rules! for_each_extended_op { fmul64 = Fmul64 { operands: BinaryOperands }; /// `dst = src1 / src2` fdiv64 = Fdiv64 { operands: BinaryOperands }; + /// `dst = src1 / src2` + vdivf64x2 = VDivF64x2 { operands: BinaryOperands }; /// `dst = ieee_maximum(src1, src2)` fmaximum64 = Fmaximum64 { operands: BinaryOperands }; /// `dst = ieee_minimum(src1, src2)` @@ -1075,6 +1077,8 @@ macro_rules! for_each_extended_op { vsubi32x4 = VSubI32x4 { operands: BinaryOperands }; /// `dst = src1 - src2` vsubi64x2 = VSubI64x2 { operands: BinaryOperands }; + /// `dst = src1 - src2` + vsubf64x2 = VSubF64x2 { operands: BinaryOperands }; /// `dst = saturating_sub(src1, src2)` vsubi8x16_sat = VSubI8x16Sat { operands: BinaryOperands }; @@ -1093,6 +1097,8 @@ macro_rules! for_each_extended_op { vmuli32x4 = VMulI32x4 { operands: BinaryOperands }; /// `dst = src1 * src2` vmuli64x2 = VMulI64x2 { operands: BinaryOperands }; + /// `dst = src1 * src2` + vmulf64x2 = VMulF64x2 { operands: BinaryOperands }; /// `dst = signed_saturate(src1 * src2 + (1 << (Q - 1)) >> Q)` vqmulrsi16x8 = VQmulrsI16x8 { operands: BinaryOperands }; @@ -1183,6 +1189,8 @@ macro_rules! for_each_extended_op { vneg32x4 = Vneg32x4 { dst: VReg, src: VReg }; /// `dst = -src` vneg64x2 = Vneg64x2 { dst: VReg, src: VReg }; + /// `dst = -src` + vnegf64x2 = VnegF64x2 { dst: VReg, src: VReg }; /// `dst = min(src1, src2)` (signed) vmin8x16_s = Vmin8x16S { operands: BinaryOperands };