From 09cbccb40be311246079e2c3d3a0c1630f51b3bc Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Thu, 10 Oct 2024 09:40:19 -0700 Subject: [PATCH] shader_recompiler: Implement V_SUBB_U32 and V_SUBBREV_U32. (#1331) --- .../frontend/translate/translate.h | 6 +- .../frontend/translate/vector_alu.cpp | 111 +++++++++++++----- 2 files changed, 88 insertions(+), 29 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index c7758828..b70d4b82 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -155,6 +155,8 @@ public: void V_SUB_I32(const GcnInst& inst); void V_SUBREV_I32(const GcnInst& inst); void V_ADDC_U32(const GcnInst& inst); + void V_SUBB_U32(const GcnInst& inst); + void V_SUBBREV_U32(const GcnInst& inst); void V_LDEXP_F32(const GcnInst& inst); void V_CVT_PKNORM_U16_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); @@ -273,7 +275,9 @@ private: void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); - // Vector ALU Helprers + // Vector ALU Helpers + IR::U32 GetCarryIn(const GcnInst& inst); + void SetCarryOut(const GcnInst& inst, const IR::U1& carry); IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index b061d3b7..27969546 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -87,6 +87,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_SUBREV_I32(inst); case Opcode::V_ADDC_U32: return V_ADDC_U32(inst); + case Opcode::V_SUBB_U32: + return V_SUBB_U32(inst); + case Opcode::V_SUBBREV_U32: + return V_SUBBREV_U32(inst); case Opcode::V_LDEXP_F32: return V_LDEXP_F32(inst); case Opcode::V_CVT_PKNORM_U16_F32: @@ -546,51 +550,71 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { } void Translator::V_ADD_I32(const GcnInst& inst) { + // Signed or unsigned components const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; - SetDst(inst.dst[0], ir.IAdd(src0, src1)); - // TODO: Carry + const IR::U32 result{ir.IAdd(src0, src1)}; + SetDst(inst.dst[0], result); + + // TODO: Carry-out with signed or unsigned components } void Translator::V_SUB_I32(const GcnInst& inst) { + // Unsigned components const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ISub(src0, src1)); + const IR::U32 result{ir.ISub(src0, src1)}; + SetDst(inst.dst[0], result); + + const IR::U1 did_underflow{ir.IGreaterThan(src1, src0, false)}; + SetCarryOut(inst, did_underflow); } void Translator::V_SUBREV_I32(const GcnInst& inst) { + // Unsigned components const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ISub(src1, src0)); - // TODO: Carry-out + const IR::U32 result{ir.ISub(src1, src0)}; + SetDst(inst.dst[0], result); + + const IR::U1 did_underflow{ir.IGreaterThan(src0, src1, false)}; + SetCarryOut(inst, did_underflow); } void Translator::V_ADDC_U32(const GcnInst& inst) { - const auto src0 = GetSrc(inst.src[0]); - const auto src1 = GetSrc(inst.src[1]); - - IR::U1 carry; - if (inst.src_count == 3) { // VOP3 - if (inst.src[2].field == OperandField::VccLo) { - carry = ir.GetVcc(); - } else if (inst.src[2].field == OperandField::ScalarGPR) { - carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code)); - } else { - UNREACHABLE(); - } - } else { // VOP2 - carry = ir.GetVcc(); - } - - const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))}; - const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry); - + // Unsigned components + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 carry{GetCarryIn(inst)}; + const IR::U32 result{ir.IAdd(ir.IAdd(src0, src1), carry)}; SetDst(inst.dst[0], result); - const IR::U1 less_src0 = ir.ILessThan(result, src0, false); - const IR::U1 less_src1 = ir.ILessThan(result, src1, false); - const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); - ir.SetVcc(did_overflow); + const IR::U1 less_src0{ir.ILessThan(result, src0, false)}; + const IR::U1 less_src1{ir.ILessThan(result, src1, false)}; + const IR::U1 did_overflow{ir.LogicalOr(less_src0, less_src1)}; + SetCarryOut(inst, did_overflow); +} + +void Translator::V_SUBB_U32(const GcnInst& inst) { + // Signed or unsigned components + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 carry{GetCarryIn(inst)}; + const IR::U32 result{ir.ISub(ir.ISub(src0, src1), carry)}; + SetDst(inst.dst[0], result); + + // TODO: Carry-out with signed or unsigned components +} + +void Translator::V_SUBBREV_U32(const GcnInst& inst) { + // Signed or unsigned components + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 carry{GetCarryIn(inst)}; + const IR::U32 result{ir.ISub(ir.ISub(src1, src0), carry)}; + SetDst(inst.dst[0], result); + + // TODO: Carry-out with signed or unsigned components } void Translator::V_LDEXP_F32(const GcnInst& inst) { @@ -1152,6 +1176,37 @@ void Translator::V_MAD_U64_U32(const GcnInst& inst) { ir.SetVcc(did_overflow); } +IR::U32 Translator::GetCarryIn(const GcnInst& inst) { + IR::U1 carry; + if (inst.src_count == 3) { // VOP3 + if (inst.src[2].field == OperandField::VccLo) { + carry = ir.GetVcc(); + } else if (inst.src[2].field == OperandField::ScalarGPR) { + carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code)); + } else { + UNREACHABLE(); + } + } else { // VOP2 + carry = ir.GetVcc(); + } + + return IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))}; +} + +void Translator::SetCarryOut(const GcnInst& inst, const IR::U1& carry) { + if (inst.dst_count == 2) { // VOP3 + if (inst.dst[1].field == OperandField::VccLo) { + ir.SetVcc(carry); + } else if (inst.dst[1].field == OperandField::ScalarGPR) { + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), carry); + } else { + UNREACHABLE(); + } + } else { // VOP2 + ir.SetVcc(carry); + } +} + // TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of // [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to