Merge pull request #203 from shadps4-emu/video_core/more_functionality

More instructions support and trivial additions
This commit is contained in:
georgemoralis 2024-06-17 08:59:36 +03:00 committed by GitHub
commit d9f2758850
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 111 additions and 34 deletions

2
.gitmodules vendored
View file

@ -51,7 +51,7 @@
url = https://github.com/zyantific/zydis.git
[submodule "externals/sirit"]
path = externals/sirit
url = https://github.com/raphaelthegreat/sirit.git
url = https://github.com/shadps4-emu/sirit
[submodule "externals/xxhash"]
path = externals/xxhash
url = https://github.com/Cyan4973/xxHash.git

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit fc65ebb5b56b849b1205d5baa2ca38440096652d
Subproject commit 505cc66a2be70b268c1700fef4d5327a5fe46494

View file

@ -110,6 +110,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
CLS(Frontend) \
CLS(Render) \
SUB(Render, Vulkan) \
SUB(Render, Recompiler) \
CLS(Input) \
CLS(Tty) \
CLS(Loader)

View file

@ -77,6 +77,7 @@ enum class Class : u8 {
Frontend, ///< Emulator UI
Render, ///< Video Core
Render_Vulkan, ///< Vulkan backend
Render_Recompiler, ///< Shader recompiler
Loader, ///< ROM loader
Input, ///< Input emulation
Tty, ///< Debug output from emu

View file

@ -185,7 +185,7 @@ Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address)
}
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
UNREACHABLE();
EmitStoreBufferU32(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {

View file

@ -240,6 +240,8 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
Id EmitISub32(EmitContext& ctx, Id a, Id b);
Id EmitISub64(EmitContext& ctx, Id a, Id b);
Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
Id EmitUMulExt(EmitContext& ctx, Id a, Id b);
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
Id EmitSDiv32(EmitContext& ctx, Id a, Id b);
Id EmitUDiv32(EmitContext& ctx, Id a, Id b);

View file

@ -68,6 +68,14 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b) {
return ctx.OpISub(ctx.U64, a, b);
}
Id EmitSMulExt(EmitContext& ctx, Id a, Id b) {
return ctx.OpSMulExtended(ctx.full_result_i32x2, a, b);
}
Id EmitUMulExt(EmitContext& ctx, Id a, Id b) {
return ctx.OpUMulExtended(ctx.full_result_u32x2, a, b);
}
Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
return ctx.OpIMul(ctx.U32[1], a, b);
}

View file

@ -104,6 +104,9 @@ void EmitContext::DefineArithmeticTypes() {
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
}
void EmitContext::DefineInterfaces(const IR::Program& program) {

View file

@ -138,6 +138,9 @@ public:
VectorIds U32{};
VectorIds U1{};
Id full_result_i32x2;
Id full_result_u32x2;
Id true_value{};
Id false_value{};
Id u32_one_value{};

View file

@ -823,6 +823,7 @@ IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
return syntax_list;
}

View file

@ -5,6 +5,15 @@
namespace Shader::Gcn {
void Translator::S_MOVK(const GcnInst& inst) {
const auto simm16 = inst.control.sopk.simm.Value();
if (simm16 & (1 << 15)) {
// TODO: need to verify the case of imm sign extension
UNREACHABLE();
}
SetDst(inst.dst[0], ir.Imm32(simm16));
}
void Translator::S_MOV(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}

View file

@ -7,6 +7,10 @@
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#define MAGIC_ENUM_RANGE_MIN 0
#define MAGIC_ENUM_RANGE_MAX 1515
#include "magic_enum.hpp"
namespace Shader::Gcn {
std::array<bool, IR::NumScalarRegs> Translator::exec_contexts{};
@ -210,6 +214,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
Translator translator{block, info};
for (const auto& inst : inst_list) {
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
translator.S_MOVK(inst);
break;
case Opcode::S_MOV_B32:
translator.S_MOV(inst);
break;
@ -421,6 +428,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_MAX_F32:
translator.V_MAX_F32(inst);
break;
case Opcode::V_MAX_I32:
translator.V_MAX_U32(true, inst);
break;
case Opcode::V_MAX_U32:
translator.V_MAX_U32(false, inst);
break;
case Opcode::V_RSQ_F32:
translator.V_RSQ_F32(inst);
break;
@ -581,8 +594,11 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_ADD_I32:
translator.S_ADD_I32(inst);
break;
case Opcode::V_MUL_HI_U32:
translator.V_MUL_HI_U32(false, inst);
break;
case Opcode::V_MUL_LO_I32:
translator.V_MUL_LO_I32(inst);
translator.V_MUL_LO_U32(inst);
break;
case Opcode::V_SAD_U32:
translator.V_SAD_U32(inst);
@ -641,6 +657,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_BFM_B32:
translator.S_BFM_B32(inst);
break;
case Opcode::V_TRUNC_F32:
translator.V_TRUNC_F32(inst);
break;
case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:
@ -654,7 +673,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
break;
default:
const u32 opcode = u32(inst.opcode);
UNREACHABLE_MSG("Unknown opcode {}", opcode);
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({})",
magic_enum::enum_name(inst.opcode), opcode);
info.translation_failed = true;
}
}
}

View file

@ -34,6 +34,7 @@ public:
void EmitFetch(const GcnInst& inst);
// Scalar ALU
void S_MOVK(const GcnInst& inst);
void S_MOV(const GcnInst& inst);
void S_MUL_I32(const GcnInst& inst);
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
@ -79,6 +80,7 @@ public:
void V_FMA_F32(const GcnInst& inst);
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst);
void V_MAX_U32(bool is_signed, const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst);
void V_SIN_F32(const GcnInst& inst);
void V_LOG_F32(const GcnInst& inst);
@ -96,7 +98,7 @@ public:
void V_SUBREV_I32(const GcnInst& inst);
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
void V_LSHRREV_B32(const GcnInst& inst);
void V_MUL_LO_I32(const GcnInst& inst);
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
void V_SAD_U32(const GcnInst& inst);
void V_BFE_U32(const GcnInst& inst);
void V_MAD_I32_I24(const GcnInst& inst);
@ -112,6 +114,7 @@ public:
void V_CVT_I32_F32(const GcnInst& inst);
void V_MIN_I32(const GcnInst& inst);
void V_MUL_LO_U32(const GcnInst& inst);
void V_TRUNC_F32(const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);

View file

@ -197,6 +197,12 @@ void Translator::V_MAX_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMax(src0, src1));
}
void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IMax(src0, src1, is_signed));
}
void Translator::V_RSQ_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
@ -320,10 +326,11 @@ void Translator::V_LSHRREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.ShiftRightLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
}
void Translator::V_MUL_LO_I32(const GcnInst& inst) {
void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IMul(src0, src1));
const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)};
SetDst(inst.dst[0], hi);
}
void Translator::V_SAD_U32(const GcnInst& inst) {
@ -418,4 +425,9 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.IMul(src0, src1));
}
void Translator::V_TRUNC_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPTrunc(src0));
}
} // namespace Shader::Gcn

View file

@ -216,18 +216,22 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
const IR::VectorReg src_reg{inst.src[1].code};
switch (num_dwords) {
case 1:
value = ir.GetVectorReg(src_reg);
value = ir.GetVectorReg<Shader::IR::F32>(src_reg);
break;
case 2:
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1));
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1));
break;
case 3:
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
ir.GetVectorReg(src_reg + 2));
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 2));
break;
case 4:
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 2),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
break;
}
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);

View file

@ -880,6 +880,10 @@ U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
}
}
IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) {
return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b);
}
U32 IREmitter::IMul(const U32& a, const U32& b) {
return Inst<U32>(Opcode::IMul32, a, b);
}

View file

@ -146,6 +146,7 @@ public:
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32U64 INeg(const U32U64& value);

View file

@ -197,6 +197,8 @@ OPCODE(IAdd64, U64, U64,
OPCODE(ISub32, U32, U32, U32, )
OPCODE(ISub64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, )
OPCODE(SMulExt, U32x2, U32, U32, )
OPCODE(UMulExt, U32x2, U32, U32, )
OPCODE(SDiv32, U32, U32, U32, )
OPCODE(UDiv32, U32, U32, U32, )
OPCODE(INeg32, U32, U32, )

View file

@ -127,6 +127,7 @@ struct Info {
Stage stage;
bool uses_group_quad{};
bool translation_failed{}; // indicates that shader has unsupported instructions
template <typename T>
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {

View file

@ -312,6 +312,12 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc3SrgbBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc3UnormBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc4UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16B16A16Sint;
@ -322,9 +328,6 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc1RgbaUnormBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc3UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR8G8B8A8Uint;
@ -361,22 +364,19 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
"Unsupported component swap mode {}", static_cast<u32>(comp_swap));
const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate;
switch (base_format) {
case vk::Format::eR8G8B8A8Unorm:
return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm : base_format;
case vk::Format::eB8G8R8A8Unorm:
return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm : base_format;
case vk::Format::eR8G8B8A8Srgb:
return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm
: is_vo_surface ? vk::Format::eR8G8B8A8Unorm
: base_format;
case vk::Format::eB8G8R8A8Srgb:
return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm
: is_vo_surface ? vk::Format::eB8G8R8A8Unorm
: base_format;
if (comp_swap_alt) {
switch (base_format) {
case vk::Format::eR8G8B8A8Unorm:
return vk::Format::eB8G8R8A8Unorm;
case vk::Format::eB8G8R8A8Unorm:
return vk::Format::eR8G8B8A8Unorm;
case vk::Format::eR8G8B8A8Srgb:
return is_vo_surface ? vk::Format::eB8G8R8A8Unorm : vk::Format::eB8G8R8A8Srgb;
case vk::Format::eB8G8R8A8Srgb:
return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb;
}
}
UNREACHABLE_MSG("Unsupported base format {}", vk::to_string(base_format));
return base_format;
}
vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) {

View file

@ -189,7 +189,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
inst_pool.ReleaseContents();
// Recompile shader to IR.
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#X}", stage, hash);
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
@ -224,6 +224,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
inst_pool.ReleaseContents();
// Recompile shader to IR.
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
const Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));

View file

@ -85,7 +85,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
}
const vk::ImageViewCreateInfo image_view_ci = {
.pNext = nullptr,
.pNext = usage_override ? &usage_ci : nullptr,
.image = image.image,
.viewType = info.type,
.format = format,