diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c681be97..b0298cbb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -327,6 +327,10 @@ void EmitGetVccHi(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } +void EmitGetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + void EmitSetScc(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } @@ -351,4 +355,8 @@ void EmitSetVccHi(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } +void EmitSetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index ce4d3f13..0cd59175 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -36,12 +36,14 @@ void EmitGetVcc(EmitContext& ctx); void EmitGetSccLo(EmitContext& ctx); void EmitGetVccLo(EmitContext& ctx); void EmitGetVccHi(EmitContext& ctx); +void EmitGetM0(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); void EmitSetExec(EmitContext& ctx); void EmitSetVcc(EmitContext& ctx); void EmitSetSccLo(EmitContext& ctx); void EmitSetVccLo(EmitContext& ctx); void EmitSetVccHi(EmitContext& ctx); +void EmitSetM0(EmitContext& ctx); void EmitFPCmpClass32(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index b33746c7..4e0c110c 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -153,10 +153,11 @@ T Translator::GetSrc(const InstOperand& operand) { break; case OperandField::M0: if constexpr (is_float) { - UNREACHABLE(); + value = ir.BitCast(ir.GetM0()); } else { - return m0_value; + value = ir.GetM0(); } + break; default: UNREACHABLE(); } @@ -296,8 +297,7 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { case OperandField::VccHi: return ir.SetVccHi(result); case OperandField::M0: - m0_value = result; - break; + return ir.SetM0(result); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 0c1f3a58..d6887818 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -192,6 +192,9 @@ public: void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); void V_BFM_B32(const GcnInst& inst); void V_FFBH_U32(const GcnInst& inst); + void V_MOVRELS_B32(const GcnInst& inst); + void V_MOVRELD_B32(const GcnInst& inst); + void V_MOVRELSD_B32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst); @@ -233,6 +236,9 @@ private: void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); + IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); + void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); + void LogMissingOpcode(const GcnInst& inst); private: @@ -240,7 +246,6 @@ private: Info& info; const RuntimeInfo& runtime_info; const Profile& profile; - IR::U32 m0_value; bool opcode_missing = false; }; diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index a07e7078..2024b706 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/frontend/opcodes.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -309,6 +310,12 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_MBCNT_U32_B32(true, inst); case Opcode::V_MBCNT_HI_U32_B32: return V_MBCNT_U32_B32(false, inst); + case Opcode::V_MOVRELS_B32: + return V_MOVRELS_B32(inst); + case Opcode::V_MOVRELD_B32: + return V_MOVRELD_B32(inst); + case Opcode::V_MOVRELSD_B32: + return V_MOVRELSD_B32(inst); case Opcode::V_NOP: return; @@ -990,4 +997,52 @@ void Translator::V_FFBH_U32(const GcnInst& inst) { SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); } +// TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of +// [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to + +IR::U32 Translator::VMovRelSHelper(u32 src_vgprno, const IR::U32 m0) { + // Read from VGPR0 by default when src_vgprno + m0 > num_allocated_vgprs + IR::U32 src_val = ir.GetVectorReg(IR::VectorReg::V0); + for (u32 i = src_vgprno; i < runtime_info.num_allocated_vgprs; i++) { + const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - src_vgprno)); + src_val = + IR::U32{ir.Select(cond, ir.GetVectorReg(IR::VectorReg::V0 + i), src_val)}; + } + return src_val; +} + +void Translator::VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0) { + for (u32 i = dst_vgprno; i < runtime_info.num_allocated_vgprs; i++) { + const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - dst_vgprno)); + const IR::U32 dst_val = + IR::U32{ir.Select(cond, src_val, ir.GetVectorReg(IR::VectorReg::V0 + i))}; + ir.SetVectorReg(IR::VectorReg::V0 + i, dst_val); + } +} + +void Translator::V_MOVRELS_B32(const GcnInst& inst) { + u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); + const IR::U32 m0 = ir.GetM0(); + + const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); + SetDst(inst.dst[0], src_val); +} + +void Translator::V_MOVRELD_B32(const GcnInst& inst) { + const IR::U32 src_val{GetSrc(inst.src[0])}; + u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); + IR::U32 m0 = ir.GetM0(); + + VMovRelDHelper(dst_vgprno, src_val, m0); +} + +void Translator::V_MOVRELSD_B32(const GcnInst& inst) { + u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); + u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); + IR::U32 m0 = ir.GetM0(); + + const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); + VMovRelDHelper(dst_vgprno, src_val, m0); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 473ae4f6..2be0c1ac 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -217,6 +217,10 @@ U32 IREmitter::GetVccHi() { return Inst(Opcode::GetVccHi); } +U32 IREmitter::GetM0() { + return Inst(Opcode::GetM0); +} + void IREmitter::SetScc(const U1& value) { Inst(Opcode::SetScc, value); } @@ -241,6 +245,10 @@ void IREmitter::SetVccHi(const U32& value) { Inst(Opcode::SetVccHi, value); } +void IREmitter::SetM0(const U32& value) { + Inst(Opcode::SetM0, value); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttribute, attribute, Imm32(comp)); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index de8fe450..22d524fb 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -67,12 +67,14 @@ public: [[nodiscard]] U1 GetVcc(); [[nodiscard]] U32 GetVccLo(); [[nodiscard]] U32 GetVccHi(); + [[nodiscard]] U32 GetM0(); void SetScc(const U1& value); void SetExec(const U1& value); void SetVcc(const U1& value); void SetSccLo(const U32& value); void SetVccLo(const U32& value); void SetVccHi(const U32& value); + void SetM0(const U32& value); [[nodiscard]] U1 Condition(IR::Condition cond); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 40dcfa44..4df8d13d 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -60,12 +60,14 @@ OPCODE(GetExec, U1, Void, OPCODE(GetVcc, U1, Void, ) OPCODE(GetVccLo, U32, Void, ) OPCODE(GetVccHi, U32, Void, ) +OPCODE(GetM0, U32, Void, ) OPCODE(SetScc, Void, U1, ) OPCODE(SetExec, Void, U1, ) OPCODE(SetVcc, Void, U1, ) OPCODE(SetSccLo, Void, U32, ) OPCODE(SetVccLo, Void, U32, ) OPCODE(SetVccHi, Void, U32, ) +OPCODE(SetM0, Void, U32, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 9edb157d..ea27c64f 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -33,6 +33,7 @@ struct ExecFlagTag : FlagTag {}; struct VccFlagTag : FlagTag {}; struct VccLoTag : FlagTag {}; struct VccHiTag : FlagTag {}; +struct M0Tag : FlagTag {}; struct GotoVariable : FlagTag { GotoVariable() = default; @@ -44,7 +45,7 @@ struct GotoVariable : FlagTag { }; using Variant = std::variant; + VccFlagTag, VccLoTag, VccHiTag, M0Tag>; using ValueMap = std::unordered_map; struct DefTable { @@ -103,6 +104,12 @@ struct DefTable { void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) { vcc_flag.insert_or_assign(block, value); } + const IR::Value& Def(IR::Block* block, M0Tag) { + return m0_flag[block]; + } + void SetDef(IR::Block* block, M0Tag, const IR::Value& value) { + m0_flag.insert_or_assign(block, value); + } std::unordered_map goto_vars; ValueMap scc_flag; @@ -111,6 +118,7 @@ struct DefTable { ValueMap scc_lo_flag; ValueMap vcc_lo_flag; ValueMap vcc_hi_flag; + ValueMap m0_flag; }; IR::Opcode UndefOpcode(IR::ScalarReg) noexcept { @@ -129,6 +137,10 @@ IR::Opcode UndefOpcode(const VccHiTag) noexcept { return IR::Opcode::UndefU32; } +IR::Opcode UndefOpcode(const M0Tag) noexcept { + return IR::Opcode::UndefU32; +} + IR::Opcode UndefOpcode(const FlagTag) noexcept { return IR::Opcode::UndefU1; } @@ -330,6 +342,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetVccHi: pass.WriteVariable(VccHiTag{}, block, inst.Arg(0)); break; + case IR::Opcode::SetM0: + pass.WriteVariable(M0Tag{}, block, inst.Arg(0)); + break; case IR::Opcode::GetThreadBitScalarReg: case IR::Opcode::GetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; @@ -362,6 +377,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetVccHi: inst.ReplaceUsesWith(pass.ReadVariable(VccHiTag{}, block)); break; + case IR::Opcode::GetM0: + inst.ReplaceUsesWith(pass.ReadVariable(M0Tag{}, block)); + break; default: break; } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 776fd90a..1bb06554 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -107,6 +107,7 @@ struct RuntimeInfo { Stage stage; u32 num_user_data; u32 num_input_vgprs; + u32 num_allocated_vgprs; VertexRuntimeInfo vs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b5435af1..4419b0f8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -76,6 +76,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { case Shader::Stage::Vertex: { info.num_user_data = regs.vs_program.settings.num_user_regs; info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; + info.num_allocated_vgprs = regs.vs_program.settings.num_vgprs * 4; GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && @@ -84,6 +85,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { } case Shader::Stage::Fragment: { info.num_user_data = regs.ps_program.settings.num_user_regs; + info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4; std::ranges::transform(graphics_key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(), [](Liverpool::ColorBuffer::SwapMode mode) { return static_cast(mode); @@ -102,6 +104,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { case Shader::Stage::Compute: { const auto& cs_pgm = regs.cs_program; info.num_user_data = cs_pgm.settings.num_user_regs; + info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, cs_pgm.num_thread_z.full}; info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),