From 725814ce012f5246e03a4df23b8f24a9426cb100 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 10 Jan 2025 00:48:12 -0800 Subject: [PATCH] shader_recompiler: Improvements to array and cube handling. (#2083) * shader_recompiler: Account for instruction array flag in image type. * shader_recompiler: Check da flag for all mimg instructions. * shader_recompiler: Convert cube images into 2D arrays. * shader_recompiler: Move image resource functions into sharp type. * shader_recompiler: Use native AMD cube instructions when possible. * specialization: Fix buffer storage mistake. --- externals/sirit | 2 +- .../backend/spirv/emit_spirv_image.cpp | 23 +++++- .../backend/spirv/emit_spirv_instructions.h | 2 + .../backend/spirv/spirv_emit_context.cpp | 10 +-- .../backend/spirv/spirv_emit_context.h | 1 + .../frontend/translate/translate.h | 3 + .../frontend/translate/vector_alu.cpp | 81 ++++++++++++++++++- .../frontend/translate/vector_memory.cpp | 37 ++++++--- src/shader_recompiler/info.h | 6 -- src/shader_recompiler/ir/ir_emitter.cpp | 13 +-- src/shader_recompiler/ir/ir_emitter.h | 5 +- src/shader_recompiler/ir/opcodes.inc | 4 + .../ir/passes/resource_tracking_pass.cpp | 46 +---------- .../ir/passes/shader_info_collection_pass.cpp | 2 +- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/specialization.h | 4 +- src/video_core/amdgpu/resource.h | 56 +++++++++---- .../renderer_vulkan/vk_compute_pipeline.cpp | 5 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 5 +- .../renderer_vulkan/vk_instance.cpp | 1 + src/video_core/renderer_vulkan/vk_instance.h | 6 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + .../renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/texture_cache/image.cpp | 8 +- src/video_core/texture_cache/image_info.cpp | 4 +- src/video_core/texture_cache/image_info.h | 1 - src/video_core/texture_cache/image_view.cpp | 30 ++----- src/video_core/texture_cache/texture_cache.h | 2 +- 28 files changed, 217 insertions(+), 144 deletions(-) diff --git a/externals/sirit b/externals/sirit index 1e74f4ef8..26ad5a9d0 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35 +Subproject commit 26ad5a9d0fe13260b0d7d6c64419d01a196b2e32 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index c3d937fe7..182437b63 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -172,20 +172,19 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info); - const auto type = sharp.GetBoundType(); const Id zero = ctx.u32_zero_value; const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }}; - const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage}; + const bool uses_lod{texture.bound_type != AmdGpu::ImageType::Color2DMsaa && + !texture.is_storage}; const auto query{[&](Id type) { return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod) : ctx.OpImageQuerySize(type, image); }}; - switch (type) { + switch (texture.bound_type) { case AmdGpu::ImageType::Color1D: return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips()); case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Color2D: - case AmdGpu::ImageType::Cube: case AmdGpu::ImageType::Color2DMsaa: return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips()); case AmdGpu::ImageType::Color2DArray: @@ -257,4 +256,20 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands); } +Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords) { + if (ctx.profile.supports_native_cube_calc) { + return ctx.OpCubeFaceCoordAMD(ctx.F32[2], cube_coords); + } else { + UNREACHABLE_MSG("SPIR-V Instruction"); + } +} + +Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords) { + if (ctx.profile.supports_native_cube_calc) { + return ctx.OpCubeFaceIndexAMD(ctx.F32[1], cube_coords); + } else { + UNREACHABLE_MSG("SPIR-V Instruction"); + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 0d9fcff46..37b6f7786 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -439,6 +439,8 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords); +Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords); Id EmitLaneId(EmitContext& ctx); Id EmitWarpId(EmitContext& ctx); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 575bf91f7..6151c5c65 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -773,8 +773,8 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { const auto image = desc.GetSharp(ctx.info); const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown; - const auto type = image.GetBoundType(); - const u32 sampled = desc.IsStorage(image) ? 2 : 1; + const auto type = image.GetBoundType(desc.is_array); + const u32 sampled = desc.is_written ? 2 : 1; switch (type) { case AmdGpu::ImageType::Color1D: return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format); @@ -788,9 +788,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format); case AmdGpu::ImageType::Color3D: return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); - case AmdGpu::ImageType::Cube: - return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled, - format); default: break; } @@ -802,7 +799,7 @@ void EmitContext::DefineImagesAndSamplers() { const auto sharp = image_desc.GetSharp(info); const auto nfmt = sharp.GetNumberFmt(); const bool is_integer = AmdGpu::IsInteger(nfmt); - const bool is_storage = image_desc.IsStorage(sharp); + const bool is_storage = image_desc.is_written; const VectorIds& data_types = GetAttributeType(*this, nfmt); const Id sampled_type = data_types[1]; const Id image_type{ImageType(*this, image_desc, sampled_type)}; @@ -817,6 +814,7 @@ void EmitContext::DefineImagesAndSamplers() { .sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type), .pointer_type = pointer_type, .image_type = image_type, + .bound_type = sharp.GetBoundType(image_desc.is_array), .is_integer = is_integer, .is_storage = is_storage, }); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 583d96b99..80d0d4d9f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -222,6 +222,7 @@ public: Id sampled_type; Id pointer_type; Id image_type; + AmdGpu::ImageType bound_type; bool is_integer = false; bool is_storage = false; }; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 7a0b736d4..bef61f997 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -301,6 +301,9 @@ private: IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); + IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z, + const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res); + void LogMissingOpcode(const GcnInst& inst); private: diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7fa83eebb..375c5f078 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -3,6 +3,7 @@ #include "shader_recompiler/frontend/opcodes.h" #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/profile.h" namespace Shader::Gcn { @@ -1042,20 +1043,92 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) { V_MAD_I32_I24(inst, false); } +IR::F32 Translator::SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z, + const IR::F32& x_res, const IR::F32& y_res, + const IR::F32& z_res) { + const auto abs_x = ir.FPAbs(x); + const auto abs_y = ir.FPAbs(y); + const auto abs_z = ir.FPAbs(z); + + const auto z_face_cond{ + ir.LogicalAnd(ir.FPGreaterThanEqual(abs_z, abs_x), ir.FPGreaterThanEqual(abs_z, abs_y))}; + const auto y_face_cond{ir.FPGreaterThanEqual(abs_y, abs_x)}; + + return IR::F32{ir.Select(z_face_cond, z_res, ir.Select(y_face_cond, y_res, x_res))}; +} + void Translator::V_CUBEID_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[2])); + const auto x = GetSrc(inst.src[0]); + const auto y = GetSrc(inst.src[1]); + const auto z = GetSrc(inst.src[2]); + + IR::F32 result; + if (profile.supports_native_cube_calc) { + result = ir.CubeFaceIndex(ir.CompositeConstruct(x, y, z)); + } else { + const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))}; + const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))}; + const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))}; + const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))}; + const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))}; + const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))}; + + result = SelectCubeResult(x, y, z, x_face, y_face, z_face); + } + SetDst(inst.dst[0], result); } void Translator::V_CUBESC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0])); + const auto x = GetSrc(inst.src[0]); + const auto y = GetSrc(inst.src[1]); + const auto z = GetSrc(inst.src[2]); + + IR::F32 result; + if (profile.supports_native_cube_calc) { + const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))}; + result = IR::F32{ir.CompositeExtract(coords, 0)}; + } else { + const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))}; + const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))}; + const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)}; + const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))}; + + result = SelectCubeResult(x, y, z, x_sc, x, z_sc); + } + SetDst(inst.dst[0], result); } void Translator::V_CUBETC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[1])); + const auto x = GetSrc(inst.src[0]); + const auto y = GetSrc(inst.src[1]); + const auto z = GetSrc(inst.src[2]); + + IR::F32 result; + if (profile.supports_native_cube_calc) { + const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))}; + result = IR::F32{ir.CompositeExtract(coords, 1)}; + } else { + const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))}; + const IR::F32 x_z_sc{ir.FPNeg(y)}; + const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)}; + + result = SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc); + } + SetDst(inst.dst[0], result); } void Translator::V_CUBEMA_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.Imm32(1.f)); + const auto x = GetSrc(inst.src[0]); + const auto y = GetSrc(inst.src[1]); + const auto z = GetSrc(inst.src[2]); + + const auto two{ir.Imm32(4.f)}; + const IR::F32 x_major_axis{ir.FPMul(x, two)}; + const IR::F32 y_major_axis{ir.FPMul(y, two)}; + const IR::F32 z_major_axis{ir.FPMul(z, two)}; + + const auto result{SelectCubeResult(x, y, z, x_major_axis, y_major_axis, z_major_axis)}; + SetDst(inst.dst[0], result); } void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index c5be08b7d..a5b54dff7 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -418,6 +418,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { IR::TextureInstInfo info{}; info.has_lod.Assign(has_mip); + info.is_array.Assign(mimg.da); const IR::Value texel = ir.ImageRead(handle, body, {}, {}, info); for (u32 i = 0; i < 4; i++) { @@ -442,6 +443,7 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) { IR::TextureInstInfo info{}; info.has_lod.Assign(has_mip); + info.is_array.Assign(mimg.da); boost::container::static_vector comps; for (u32 i = 0; i < 4; i++) { @@ -456,13 +458,18 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) { } void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { + const auto& mimg = inst.control.mimg; IR::VectorReg dst_reg{inst.dst[0].code}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; const auto flags = ImageResFlags(inst.control.mimg.dmask); const bool has_mips = flags.test(ImageResComponent::MipCount); const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code)); const IR::Value tsharp = ir.GetScalarReg(tsharp_reg); - const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips)); + + IR::TextureInstInfo info{}; + info.is_array.Assign(mimg.da); + + const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips), info); if (flags.test(ImageResComponent::Width)) { ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 0)}); @@ -484,6 +491,9 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { IR::VectorReg addr_reg{inst.src[0].code}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + IR::TextureInstInfo info{}; + info.is_array.Assign(mimg.da); + const IR::Value value = ir.GetVectorReg(val_reg); const IR::Value handle = ir.GetScalarReg(tsharp_reg); const IR::Value body = @@ -494,25 +504,25 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { case AtomicOp::Swap: return ir.ImageAtomicExchange(handle, body, value, {}); case AtomicOp::Add: - return ir.ImageAtomicIAdd(handle, body, value, {}); + return ir.ImageAtomicIAdd(handle, body, value, info); case AtomicOp::Smin: - return ir.ImageAtomicIMin(handle, body, value, true, {}); + return ir.ImageAtomicIMin(handle, body, value, true, info); case AtomicOp::Umin: - return ir.ImageAtomicUMin(handle, body, value, {}); + return ir.ImageAtomicUMin(handle, body, value, info); case AtomicOp::Smax: - return ir.ImageAtomicIMax(handle, body, value, true, {}); + return ir.ImageAtomicIMax(handle, body, value, true, info); case AtomicOp::Umax: - return ir.ImageAtomicUMax(handle, body, value, {}); + return ir.ImageAtomicUMax(handle, body, value, info); case AtomicOp::And: - return ir.ImageAtomicAnd(handle, body, value, {}); + return ir.ImageAtomicAnd(handle, body, value, info); case AtomicOp::Or: - return ir.ImageAtomicOr(handle, body, value, {}); + return ir.ImageAtomicOr(handle, body, value, info); case AtomicOp::Xor: - return ir.ImageAtomicXor(handle, body, value, {}); + return ir.ImageAtomicXor(handle, body, value, info); case AtomicOp::Inc: - return ir.ImageAtomicInc(handle, body, value, {}); + return ir.ImageAtomicInc(handle, body, value, info); case AtomicOp::Dec: - return ir.ImageAtomicDec(handle, body, value, {}); + return ir.ImageAtomicDec(handle, body, value, info); default: UNREACHABLE(); } @@ -643,11 +653,14 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) { IR::VectorReg addr_reg{inst.src[0].code}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + IR::TextureInstInfo info{}; + info.is_array.Assign(mimg.da); + const IR::Value handle = ir.GetScalarReg(tsharp_reg); const IR::Value body = ir.CompositeConstruct( ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); - const IR::Value lod = ir.ImageQueryLod(handle, body, {}); + const IR::Value lod = ir.ImageQueryLod(handle, body, info); ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)}); ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)}); } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index b6ac12785..aeff346fa 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -70,14 +70,8 @@ struct ImageResource { bool is_depth{}; bool is_atomic{}; bool is_array{}; - bool is_read{}; bool is_written{}; - [[nodiscard]] bool IsStorage(const AmdGpu::Image& image) const noexcept { - // Need cube as storage when used with ImageRead. - return is_written || (is_read && image.GetBoundType() == AmdGpu::ImageType::Cube); - } - [[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; }; using ImageResourceList = boost::container::small_vector; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 823f9bdcd..8626bdfd1 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1732,11 +1732,6 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); } -Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, - const IR::U1& skip_mips) { - return Inst(Opcode::ImageQueryDimensions, handle, lod, skip_mips); -} - Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, const IR::U1& skip_mips, TextureInstInfo info) { return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips); @@ -1763,6 +1758,14 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, multisampling, color); } +[[nodiscard]] Value IREmitter::CubeFaceCoord(const Value& cube_coords) { + return Inst(Opcode::CubeFaceCoord, cube_coords); +} + +[[nodiscard]] F32 IREmitter::CubeFaceIndex(const Value& cube_coords) { + return Inst(Opcode::CubeFaceIndex, cube_coords); +} + // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction // Renderdoc will hook in its own implementation of the SPIRV instruction // Renderdoc accepts format specifiers, e.g. %u, listed here: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 9aab9459b..783709775 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -324,8 +324,6 @@ public: const F32& dref, const F32& lod, const Value& offset, TextureInstInfo info); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, - const U1& skip_mips); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, const U1& skip_mips, TextureInstInfo info); @@ -344,6 +342,9 @@ public: void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const U32& multisampling, const Value& color, TextureInstInfo info); + [[nodiscard]] Value CubeFaceCoord(const Value& cube_coords); + [[nodiscard]] F32 CubeFaceIndex(const Value& cube_coords); + void EmitVertex(); void EmitPrimitive(); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 6242a230e..19f45418f 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -374,6 +374,10 @@ OPCODE(ImageAtomicOr32, U32, Opaq OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) +// Cube operations - optional, usable if profile.supports_native_cube_calc +OPCODE(CubeFaceCoord, F32x2, F32x3, ) +OPCODE(CubeFaceIndex, F32, F32x3, ) + // Warp operations OPCODE(LaneId, U32, ) OPCODE(WarpId, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index f7040ad75..c00b8e6bb 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -161,10 +161,9 @@ public: u32 Add(const ImageResource& desc) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { - return desc.sharp_idx == existing.sharp_idx; + return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array; })}; auto& image = image_resources[index]; - image.is_read |= desc.is_read; image.is_written |= desc.is_written; return index; } @@ -361,7 +360,6 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& image = AmdGpu::Image::Null(); } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); - const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead; const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; // Patch image instruction if image is FMask. @@ -402,7 +400,6 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& .is_depth = bool(inst_info.is_depth), .is_atomic = IsImageAtomicInstruction(inst), .is_array = bool(inst_info.is_array), - .is_read = is_read, .is_written = is_written, }); @@ -560,32 +557,6 @@ void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) { } } -IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, - const IR::Value& z, bool is_written, bool is_array) { - // When cubemap is written with imageStore it is treated like 2DArray. - if (is_written) { - return ir.CompositeConstruct(s, t, z); - } - - ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below - - // We need to fix x and y coordinate, - // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32. - // We already force the scale value to be 1.0 when handling v_cubema_f32, - // here we subtract 1.5 to recover the original value. - const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f)); - const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f)); - if (is_array) { - const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z}); - const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u)); - const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u)); - return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id), - ir.ConvertIToF(32, 32, false, slice_id)); - } else { - return ir.CompositeConstruct(x, y, z); - } -} - void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, const AmdGpu::Image& image) { const auto handle = inst.Arg(0); @@ -649,7 +620,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, case AmdGpu::ImageType::Color2DMsaa: return ir.CompositeConstruct(read(0), read(8)); case AmdGpu::ImageType::Color3D: - case AmdGpu::ImageType::Cube: return ir.CompositeConstruct(read(0), read(8), read(16)); default: UNREACHABLE(); @@ -675,7 +645,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)), ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))}; case AmdGpu::ImageType::Color3D: - case AmdGpu::ImageType::Cube: // (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy) addr_reg = addr_reg + 6; return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5), @@ -691,7 +660,8 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, // Query dimensions of image if needed for normalization. // We can't use the image sharp because it could be bound to a different image later. const auto dimensions = - unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false)) : IR::Value{}; + unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false), inst_info) + : IR::Value{}; const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value { const auto coord = get_addr_reg(coord_idx); if (unnormalized) { @@ -724,10 +694,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, addr_reg = addr_reg + 3; return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), get_coord(addr_reg - 1, 2)); - case AmdGpu::ImageType::Cube: // x, y, face - addr_reg = addr_reg + 3; - return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), - get_addr_reg(addr_reg - 1), false, inst_info.is_array); default: UNREACHABLE(); } @@ -805,10 +771,6 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { [[fallthrough]]; case AmdGpu::ImageType::Color3D: // x, y, z, [lod] return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; - case AmdGpu::ImageType::Cube: // x, y, face, [lod] - return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), - inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array), - body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", image.GetType()); } @@ -820,7 +782,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{}; const auto ms = has_ms ? IR::U32{arg} : IR::U32{}; - const auto is_storage = image_res.IsStorage(image); + const auto is_storage = image_res.is_written; if (inst.GetOpcode() == IR::Opcode::ImageRead) { auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info); if (is_storage) { diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index c34b59b88..7fd5b75ff 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -5,7 +5,7 @@ namespace Shader::Optimization { -void Visit(Info& info, IR::Inst& inst) { +void Visit(Info& info, const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetAttribute: case IR::Opcode::GetAttributeU32: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index fc8c5956e..f8878d442 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -24,6 +24,7 @@ struct Profile { bool support_explicit_workgroup_layout{}; bool support_legacy_vertex_attributes{}; bool supports_image_load_store_lod{}; + bool supports_native_cube_calc{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index f58d2e2d3..c03621c50 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -113,9 +113,9 @@ struct StageSpecialization { }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { - spec.type = sharp.GetBoundType(); + spec.type = sharp.GetBoundType(desc.is_array); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); - spec.is_storage = desc.IsStorage(sharp); + spec.is_storage = desc.is_written; if (spec.is_storage) { spec.dst_select = sharp.DstSelect(); } diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index ffee7964a..3dc1eadde 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -226,15 +226,13 @@ struct Image { return pitch + 1; } - u32 NumLayers(bool is_array) const { - u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1; - if (GetType() == ImageType::Cube) { - if (is_array) { - slices = last_array + 1; - ASSERT(slices % 6 == 0); - } else { - slices = 6; - } + [[nodiscard]] u32 NumLayers() const noexcept { + u32 slices = depth + 1; + const auto img_type = static_cast(type); + if (img_type == ImageType::Color3D) { + slices = 1; + } else if (img_type == ImageType::Cube) { + slices *= 6; } if (pow2pad) { slices = std::bit_ceil(slices); @@ -257,7 +255,8 @@ struct Image { } ImageType GetType() const noexcept { - return static_cast(type); + const auto img_type = static_cast(type); + return img_type == ImageType::Cube ? ImageType::Color2DArray : img_type; } DataFormat GetDataFmt() const noexcept { @@ -289,13 +288,40 @@ struct Image { GetDataFmt() <= DataFormat::FormatFmask64_8; } - bool IsPartialCubemap() const { - const auto viewed_slice = last_array - base_array + 1; - return GetType() == ImageType::Cube && viewed_slice < 6; + [[nodiscard]] ImageType GetBoundType(const bool is_array) const noexcept { + const auto base_type = GetType(); + if (base_type == ImageType::Color1DArray && !is_array) { + return ImageType::Color1D; + } + if (base_type == ImageType::Color2DArray && !is_array) { + return ImageType::Color2D; + } + if (base_type == ImageType::Color2DMsaaArray && !is_array) { + return ImageType::Color2DMsaa; + } + return base_type; } - ImageType GetBoundType() const noexcept { - return IsPartialCubemap() ? ImageType::Color2DArray : GetType(); + [[nodiscard]] u32 NumViewLevels(const bool is_array) const noexcept { + switch (GetBoundType(is_array)) { + case ImageType::Color2DMsaa: + case ImageType::Color2DMsaaArray: + return 1; + default: + return last_level - base_level + 1; + } + } + + [[nodiscard]] u32 NumViewLayers(const bool is_array) const noexcept { + switch (GetBoundType(is_array)) { + case ImageType::Color1D: + case ImageType::Color2D: + case ImageType::Color2DMsaa: + case ImageType::Color3D: + return 1; + default: + return last_array - base_array + 1; + } } }; static_assert(sizeof(Image) == 32); // 256bits diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index b24767e8a..23faacfc2 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -59,9 +59,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler for (const auto& image : info->images) { bindings.push_back({ .binding = binding++, - .descriptorType = image.IsStorage(image.GetSharp(*info)) - ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, + .descriptorType = image.is_written ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eCompute, }); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0ca1bed8b..0154172d9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -367,9 +367,8 @@ void GraphicsPipeline::BuildDescSetLayout() { for (const auto& image : stage->images) { bindings.push_back({ .binding = binding++, - .descriptorType = image.IsStorage(image.GetSharp(*stage)) - ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, + .descriptorType = image.is_written ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .descriptorCount = 1, .stageFlags = gp_stage_flags, }); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 9bc627830..6c3e066c6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -271,6 +271,7 @@ bool Instance::CreateDevice() { maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); + amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 4e091824d..8928b4267 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -159,6 +159,11 @@ public: return image_load_store_lod; } + /// Returns true when VK_AMD_gcn_shader is supported. + bool IsAmdGcnShaderSupported() const { + return amd_gcn_shader; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -334,6 +339,7 @@ private: bool list_restart{}; bool legacy_vertex_attributes{}; bool image_load_store_lod{}; + bool amd_gcn_shader{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9cfc7c277..37137bd07 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -204,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_explicit_workgroup_layout = true, .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), + .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e8616550b..ceb1d094d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -661,7 +661,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin if (image->binding.is_bound) { // The image is already bound. In case if it is about to be used as storage we need // to force general layout on it. - image->binding.force_general |= image_desc.IsStorage(tsharp); + image->binding.force_general |= image_desc.is_written; } if (image->binding.is_target) { // The image is already bound as target. Since we read and output to it need to force diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 23249bf21..96881c564 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -153,13 +153,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - const bool can_be_cube = - (info.type == vk::ImageType::e2D) && - ((info.props.is_pow2 ? (info.resources.layers % 8) : (info.resources.layers % 6)) == 0) && - (info.size.width == info.size.height); - if (info.props.is_cube || can_be_cube) { - flags |= vk::ImageCreateFlagBits::eCubeCompatible; - } else if (info.props.is_volume) { + if (info.props.is_volume) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index bdbaecda6..58c2a8e23 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -37,7 +37,6 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { return vk::ImageType::e1D; case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Color2DMsaa: - case AmdGpu::ImageType::Cube: case AmdGpu::ImageType::Color2DArray: return vk::ImageType::e2D; case AmdGpu::ImageType::Color3D: @@ -130,7 +129,6 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de } type = ConvertImageType(image.GetType()); props.is_tiled = image.IsTiled(); - props.is_cube = image.GetType() == AmdGpu::ImageType::Cube; props.is_volume = image.GetType() == AmdGpu::ImageType::Color3D; props.is_pow2 = image.pow2pad; props.is_block = IsBlockCoded(); @@ -139,7 +137,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de size.depth = props.is_volume ? image.depth + 1 : 1; pitch = image.Pitch(); resources.levels = image.NumLevels(); - resources.layers = image.NumLayers(desc.is_array); + resources.layers = image.NumLayers(); num_samples = image.NumSamples(); num_bits = NumBits(image.GetDataFmt()); diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 6faca49c5..123540c1e 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -61,7 +61,6 @@ struct ImageInfo { } meta_info{}; struct { - u32 is_cube : 1; u32 is_volume : 1; u32 is_tiled : 1; u32 is_pow2 : 1; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 68b116558..569238168 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -20,8 +20,6 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) { case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Color2DMsaa: return vk::ImageViewType::e2D; - case AmdGpu::ImageType::Cube: - return vk::ImageViewType::eCube; case AmdGpu::ImageType::Color2DArray: return vk::ImageViewType::e2DArray; case AmdGpu::ImageType::Color3D: @@ -32,7 +30,7 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) { } ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept - : is_storage{desc.IsStorage(image)} { + : is_storage{desc.is_written} { const auto dfmt = image.GetDataFmt(); auto nfmt = image.GetNumberFmt(); if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) { @@ -42,30 +40,12 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso if (desc.is_depth) { format = Vulkan::LiverpoolToVK::PromoteFormatToDepth(format); } + range.base.level = image.base_level; range.base.layer = image.base_array; - if (image.GetType() == AmdGpu::ImageType::Color2DMsaa || - image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) { - range.extent.levels = 1; - } else { - range.extent.levels = image.last_level - image.base_level + 1; - } - range.extent.layers = image.last_array - image.base_array + 1; - type = ConvertImageViewType(image.GetBoundType()); - - // Adjust view type for arrays - if (type == vk::ImageViewType::eCube) { - if (desc.is_array) { - type = vk::ImageViewType::eCubeArray; - } else { - // Some games try to bind an array of cubemaps while shader reads only single one. - range.extent.layers = std::min(range.extent.layers, 6u); - } - } - if (type == vk::ImageViewType::e3D && range.extent.layers > 1) { - // Some games pass incorrect layer count for 3D textures so we need to fixup it. - range.extent.layers = 1; - } + range.extent.levels = image.NumViewLevels(desc.is_array); + range.extent.layers = image.NumViewLayers(desc.is_array); + type = ConvertImageViewType(image.GetBoundType(desc.is_array)); if (!is_storage) { mapping = Vulkan::LiverpoolToVK::ComponentMapping(image.DstSelect()); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 944f021df..69907f000 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -65,7 +65,7 @@ public: struct TextureDesc : public BaseDesc { TextureDesc() = default; TextureDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc) - : BaseDesc{desc.IsStorage(image) ? BindingType::Storage : BindingType::Texture, + : BaseDesc{desc.is_written ? BindingType::Storage : BindingType::Texture, ImageInfo{image, desc}, ImageViewInfo{image, desc}} {} };