From fdd699a725439b91a548eea5bd4e80254efc8fb3 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 20 Sep 2024 01:11:51 +0300 Subject: [PATCH] shader: Add some instructions --- .../frontend/translate/scalar_alu.cpp | 5 +++++ src/shader_recompiler/frontend/translate/translate.h | 1 + .../frontend/translate/vector_alu.cpp | 11 +++++++++++ 3 files changed, 17 insertions(+) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 0c9efdc48..97e1f4113 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -506,6 +506,8 @@ void Translator::S_NOT_B64(const GcnInst& inst) { return ir.GetExec(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); + case OperandField::ConstZero: + return ir.Imm1(false); default: UNREACHABLE(); } @@ -520,6 +522,9 @@ void Translator::S_NOT_B64(const GcnInst& inst) { case OperandField::ScalarGPR: ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result); break; + case OperandField::ExecLo: + ir.SetExec(result); + break; default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index aeeb8e130..7559b8533 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -155,6 +155,7 @@ public: void V_SUBREV_I32(const GcnInst& inst); void V_ADDC_U32(const GcnInst& inst); void V_LDEXP_F32(const GcnInst& inst); + void V_CVT_PKNORM_U16_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); // VOP1 diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index abe5de835..c62a31ec4 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -89,6 +89,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_ADDC_U32(inst); case Opcode::V_LDEXP_F32: return V_LDEXP_F32(inst); + case Opcode::V_CVT_PKNORM_U16_F32: + return V_CVT_PKNORM_U16_F32(inst); case Opcode::V_CVT_PKRTZ_F16_F32: return V_CVT_PKRTZ_F16_F32(inst); @@ -585,6 +587,15 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); } +void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::U32 dst0 = ir.ConvertFToU(32, ir.FPMul(src0, ir.Imm32(65535.f))); + const IR::U32 dst1 = ir.ConvertFToU(32, ir.FPMul(src1, ir.Imm32(65535.f))); + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.BitFieldInsert(dst0, dst1, ir.Imm32(16), ir.Imm32(16))); +} + void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));