diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e56eb916..f526aaba 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -122,6 +122,10 @@ Id EmitFPSqrt(EmitContext& ctx, Id value) { return ctx.OpSqrt(ctx.F32[1], value); } +Id EmitFPInvSqrt(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F32[1], value); +} + Id EmitFPSaturate16(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 2192b054..8972ff4a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -174,6 +174,7 @@ Id EmitFPRecip64(EmitContext& ctx, Id value); Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPInvSqrt(EmitContext& ctx, Id value); Id EmitFPSaturate16(EmitContext& ctx, Id value); Id EmitFPSaturate32(EmitContext& ctx, Id value); Id EmitFPSaturate64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 1e9925fc..6136b46a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -321,9 +321,13 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MAX_F32: translator.V_MAX_F32(inst); break; + case Opcode::V_RSQ_F32: + translator.V_RSQ_F32(inst); + break; case Opcode::S_ANDN2_B64: translator.S_ANDN2_B64(inst); break; + case Opcode::S_NOP: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: case Opcode::S_MOV_B64: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 8a027e9f..6e50e8fb 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -67,6 +67,7 @@ public: void V_FMA_F32(const GcnInst& inst); void V_CMP_F32(ConditionOp op, const GcnInst& inst); void V_MAX_F32(const GcnInst& inst); + void V_RSQ_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7bb97f01..bdb69672 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -193,4 +193,9 @@ void Translator::V_MAX_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(src0, src1)); } +void Translator::V_RSQ_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPInvSqrt(src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 43e8e439..beacc2fe 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -609,6 +609,10 @@ F32 IREmitter::FPSqrt(const F32& value) { return Inst(Opcode::FPSqrt, value); } +F32 IREmitter::FPInvSqrt(const F32& value) { + return Inst(Opcode::FPInvSqrt, value); +} + F32F64 IREmitter::FPSaturate(const F32F64& value) { switch (value.Type()) { case Type::F32: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index a52437a9..771e9b13 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -123,6 +123,7 @@ public: [[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); + [[nodiscard]] F32 FPInvSqrt(const F32& value); [[nodiscard]] F32F64 FPSaturate(const F32F64& value); [[nodiscard]] F32F64 FPClamp(const F32F64& value, const F32F64& min_value, const F32F64& max_value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 5fb4dd0f..5a30142f 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -142,6 +142,7 @@ OPCODE(FPRecip64, F64, F64, OPCODE(FPRecipSqrt32, F32, F32, ) OPCODE(FPRecipSqrt64, F64, F64, ) OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPInvSqrt, F32, F32, ) OPCODE(FPSin, F32, F32, ) OPCODE(FPExp2, F32, F32, ) OPCODE(FPCos, F32, F32, )