From b0d7feb2929a03b7bccd2adb799009487b4fd59b Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 7 Jan 2025 02:21:49 -0800 Subject: [PATCH] video_core: Implement conversion for uncommon/unsupported number formats. (#2047) * video_core: Implement conversion for uncommon/unsupported number formats. * shader_recompiler: Reinterpret image sample output as well. * liverpool_to_vk: Remove mappings for remapped number formats. These were poorly supported by drivers anyway. * resource_tracking_pass: Fix image write swizzle mistake. * amdgpu: Add missing specialization and move format mapping data to types * reinterpret: Fix U/SToF input type. --- .../frontend/translate/export.cpp | 9 +- .../ir/passes/resource_tracking_pass.cpp | 522 +++++++++--------- src/shader_recompiler/ir/reinterpret.h | 64 ++- src/shader_recompiler/runtime_info.h | 1 + src/shader_recompiler/specialization.h | 4 + src/video_core/amdgpu/liverpool.h | 8 +- src/video_core/amdgpu/pixel_format.cpp | 2 +- src/video_core/amdgpu/resource.h | 102 +--- src/video_core/amdgpu/types.h | 122 +++- .../renderer_vulkan/liverpool_to_vk.cpp | 33 +- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 3 + 12 files changed, 486 insertions(+), 385 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 83240e17..38ff9ae1 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -31,14 +32,16 @@ void Translator::EmitExport(const GcnInst& inst) { return; } const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); - const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle; + const auto col_buf = runtime_info.fs_info.color_buffers[index]; + const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion); + const auto [r, g, b, a] = col_buf.swizzle; const std::array swizzle_array = {r, g, b, a}; const auto swizzled_comp = swizzle_array[comp]; if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) { - ir.SetAttribute(attrib, value, comp); + ir.SetAttribute(attrib, converted, comp); return; } - ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); + ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); }; const auto unpack = [&](u32 idx) { diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 63675291..f7040ad7 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -301,8 +301,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, }); } -void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, - Descriptors& descriptors) { +void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { s32 binding{}; AmdGpu::Buffer buffer; if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) { @@ -317,19 +316,191 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, }); } - // Update buffer descriptor format. - const auto inst_info = inst.Flags(); - // Replace handle with binding index in buffer resource list. IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); +} + +void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, + Descriptors& descriptors) { + const IR::Inst* handle = inst.Arg(0).InstRecursive(); + const IR::Inst* producer = handle->Arg(0).InstRecursive(); + const auto sharp = TrackSharp(producer, info); + const s32 binding = descriptors.Add(TextureBufferResource{ + .sharp_idx = sharp, + .is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32, + }); + + // Replace handle with binding index in texture buffer resource list. + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.SetArg(0, ir.Imm32(binding)); +} + +void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { + const auto pred = [](const IR::Inst* inst) -> std::optional { + const auto opcode = inst->GetOpcode(); + if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) + opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) + opcode == IR::Opcode::GetUserData) { + return inst; + } + return std::nullopt; + }; + const auto result = IR::BreadthFirstSearch(&inst, pred); + ASSERT_MSG(result, "Unable to find image sharp source"); + const IR::Inst* producer = result.value(); + const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2; + const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer; + + // Read image sharp. + const auto tsharp = TrackSharp(tsharp_handle, info); + const auto inst_info = inst.Flags(); + auto image = info.ReadUdSharp(tsharp); + if (!image.Valid()) { + LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); + image = AmdGpu::Image::Null(); + } + ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); + const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead; + const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; + + // Patch image instruction if image is FMask. + if (image.IsFmask()) { + ASSERT_MSG(!is_written, "FMask storage instructions are not supported"); + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + switch (inst.GetOpcode()) { + case IR::Opcode::ImageRead: + case IR::Opcode::ImageSampleRaw: { + IR::F32 fmaskx = ir.BitCast(ir.Imm32(0x76543210)); + IR::F32 fmasky = ir.BitCast(ir.Imm32(0xfedcba98)); + inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky)); + return; + } + case IR::Opcode::ImageQueryLod: + inst.ReplaceUsesWith(ir.Imm32(1)); + return; + case IR::Opcode::ImageQueryDimensions: { + IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast(image.width)), // x + ir.Imm32(static_cast(image.width)), // y + ir.Imm32(1), ir.Imm32(1)); // depth, mip + inst.ReplaceUsesWith(dims); + + // Track FMask resource to do specialization. + descriptors.Add(FMaskResource{ + .sharp_idx = tsharp, + }); + return; + } + default: + UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode()); + } + } + + u32 image_binding = descriptors.Add(ImageResource{ + .sharp_idx = tsharp, + .is_depth = bool(inst_info.is_depth), + .is_atomic = IsImageAtomicInstruction(inst), + .is_array = bool(inst_info.is_array), + .is_read = is_read, + .is_written = is_written, + }); + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + + if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { + // Read sampler sharp. + const auto [sampler_binding, sampler] = [&] -> std::pair { + ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); + const IR::Value& handle = producer->Arg(1); + // Inline sampler resource. + if (handle.IsImmediate()) { + LOG_WARNING(Render_Vulkan, "Inline sampler detected"); + const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}; + const auto binding = descriptors.Add(SamplerResource{ + .sharp_idx = std::numeric_limits::max(), + .inline_sampler = inline_sampler, + }); + return {binding, inline_sampler}; + } + // Normal sampler resource. + const auto ssharp_handle = handle.InstRecursive(); + const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); + const auto ssharp = TrackSharp(ssharp_ud, info); + const auto binding = descriptors.Add(SamplerResource{ + .sharp_idx = ssharp, + .associated_image = image_binding, + .disable_aniso = disable_aniso, + }); + return {binding, info.ReadUdSharp(ssharp)}; + }(); + // Patch image and sampler handle. + inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16)); + } else { + // Patch image handle. + inst.SetArg(0, ir.Imm32(image_binding)); + } +} + +void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { + // Insert gds binding in the shader if it doesn't exist already. + // The buffer is used for append/consume counters. + constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1}; + const u32 binding = descriptors.Add(BufferResource{ + .used_types = IR::Type::U32, + .inline_cbuf = GdsSharp, + .is_gds_buffer = true, + .is_written = true, + }); + + const auto pred = [](const IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetUserData) { + return inst; + } + return std::nullopt; + }; + + // Attempt to deduce the GDS address of counter at compile time. + const u32 gds_addr = [&] { + const IR::Value& gds_offset = inst.Arg(0); + if (gds_offset.IsImmediate()) { + // Nothing to do, offset is known. + return gds_offset.U32() & 0xFFFF; + } + const auto result = IR::BreadthFirstSearch(&inst, pred); + ASSERT_MSG(result, "Unable to track M0 source"); + + // M0 must be set by some user data register. + const IR::Inst* prod = gds_offset.InstRecursive(); + const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg()); + u32 m0_val = info.user_data[ud_reg] >> 16; + if (prod->GetOpcode() == IR::Opcode::IAdd32) { + m0_val += prod->Arg(1).U32(); + } + return m0_val & 0xFFFF; + }(); + + // Patch instruction. + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.SetArg(0, ir.Imm32(gds_addr >> 2)); + inst.SetArg(1, ir.Imm32(binding)); +} + +void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) { + const auto handle = inst.Arg(0); + const auto buffer_res = info.buffers[handle.U32()]; + const auto buffer = buffer_res.GetSharp(info); + ASSERT(!buffer.add_tid_enable); - // Address of constant buffer reads can be calculated at IR emittion time. + // Address of constant buffer reads can be calculated at IR emission time. if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { return; } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto inst_info = inst.Flags(); + const IR::U32 index_stride = ir.Imm32(buffer.index_stride); const IR::U32 element_size = ir.Imm32(buffer.element_size); @@ -366,21 +537,27 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, inst.SetArg(1, address); } -void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, - Descriptors& descriptors) { - const IR::Inst* handle = inst.Arg(0).InstRecursive(); - const IR::Inst* producer = handle->Arg(0).InstRecursive(); - const auto sharp = TrackSharp(producer, info); - const auto buffer = info.ReadUdSharp(sharp); - const s32 binding = descriptors.Add(TextureBufferResource{ - .sharp_idx = sharp, - .is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32, - }); +void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) { + const auto handle = inst.Arg(0); + const auto buffer_res = info.texture_buffers[handle.U32()]; + const auto buffer = buffer_res.GetSharp(info); - // Replace handle with binding index in texture buffer resource list. - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect()); + const auto converted = + ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion()); + inst.SetArg(2, converted); + } else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect()); + const auto converted = + ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion()); + inst.ReplaceUsesWith(converted); + } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -409,39 +586,14 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& } } -void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, - Descriptors& descriptors, const IR::Inst* producer, - const u32 image_binding, const AmdGpu::Image& image) { - // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - const auto [sampler_binding, sampler] = [&] -> std::pair { - ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); - const IR::Value& handle = producer->Arg(1); - // Inline sampler resource. - if (handle.IsImmediate()) { - LOG_WARNING(Render_Vulkan, "Inline sampler detected"); - const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}; - const auto binding = descriptors.Add(SamplerResource{ - .sharp_idx = std::numeric_limits::max(), - .inline_sampler = inline_sampler, - }); - return {binding, inline_sampler}; - } - // Normal sampler resource. - const auto ssharp_handle = handle.InstRecursive(); - const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); - const auto ssharp = TrackSharp(ssharp_ud, info); - const auto binding = descriptors.Add(SamplerResource{ - .sharp_idx = ssharp, - .associated_image = image_binding, - .disable_aniso = disable_aniso, - }); - return {binding, info.ReadUdSharp(ssharp)}; - }(); +void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, + const AmdGpu::Image& image) { + const auto handle = inst.Arg(0); + const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF]; + auto sampler = sampler_res.GetSharp(info); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - const auto inst_info = inst.Flags(); - const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16); IR::Inst* body1 = inst.Arg(1).InstRecursive(); IR::Inst* body2 = inst.Arg(2).InstRecursive(); @@ -539,8 +691,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, // Query dimensions of image if needed for normalization. // We can't use the image sharp because it could be bound to a different image later. const auto dimensions = - unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) - : IR::Value{}; + unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false)) : IR::Value{}; const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value { const auto coord = get_addr_reg(coord_idx); if (unnormalized) { @@ -589,7 +740,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, : IR::F32{}; const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{}; - auto new_inst = [&] -> IR::Value { + auto texel = [&] -> IR::Value { if (inst_info.is_gather) { if (inst_info.is_depth) { return ir.ImageGatherDref(handle, coords, offset, dref, inst_info); @@ -611,94 +762,30 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, } return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); }(); - inst.ReplaceUsesWithAndRemove(new_inst); + + const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion()); + inst.ReplaceUsesWith(converted); } -void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - const auto pred = [](const IR::Inst* inst) -> std::optional { - const auto opcode = inst->GetOpcode(); - if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) - opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) - opcode == IR::Opcode::GetUserData) { - return inst; - } - return std::nullopt; - }; - const auto result = IR::BreadthFirstSearch(&inst, pred); - ASSERT_MSG(result, "Unable to find image sharp source"); - const IR::Inst* producer = result.value(); - const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2; - const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer; - - // Read image sharp. - const auto tsharp = TrackSharp(tsharp_handle, info); - const auto inst_info = inst.Flags(); - auto image = info.ReadUdSharp(tsharp); - if (!image.Valid()) { - LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); - image = AmdGpu::Image::Null(); - } - ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); - const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead; - const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; - - // Patch image instruction if image is FMask. - if (image.IsFmask()) { - ASSERT_MSG(!is_written, "FMask storage instructions are not supported"); - - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - switch (inst.GetOpcode()) { - case IR::Opcode::ImageRead: - case IR::Opcode::ImageSampleRaw: { - IR::F32 fmaskx = ir.BitCast(ir.Imm32(0x76543210)); - IR::F32 fmasky = ir.BitCast(ir.Imm32(0xfedcba98)); - inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky)); - return; - } - case IR::Opcode::ImageQueryLod: - inst.ReplaceUsesWith(ir.Imm32(1)); - return; - case IR::Opcode::ImageQueryDimensions: { - IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast(image.width)), // x - ir.Imm32(static_cast(image.width)), // y - ir.Imm32(1), ir.Imm32(1)); // depth, mip - inst.ReplaceUsesWith(dims); - - // Track FMask resource to do specialization. - descriptors.Add(FMaskResource{ - .sharp_idx = tsharp, - }); - return; - } - default: - UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode()); - } - } - - u32 image_binding = descriptors.Add(ImageResource{ - .sharp_idx = tsharp, - .is_depth = bool(inst_info.is_depth), - .is_atomic = IsImageAtomicInstruction(inst), - .is_array = bool(inst_info.is_array), - .is_read = is_read, - .is_written = is_written, - }); - - // Sample instructions must be resolved into a new instruction using address register data. - if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { - PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image); - return; - } - - // Patch image handle - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.SetArg(0, ir.Imm32(image_binding)); - - // No need to patch coordinates if we are just querying. +void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { + // Nothing to patch for dimension query. if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) { return; } + const auto handle = inst.Arg(0); + const auto image_res = info.images[handle.U32() & 0xFFFF]; + auto image = image_res.GetSharp(info); + + // Sample instructions must be handled separately using address register data. + if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { + PatchImageSampleArgs(block, inst, info, image); + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto inst_info = inst.Flags(); + // Now that we know the image type, adjust texture coordinate vector. IR::Inst* body = inst.Arg(1).InstRecursive(); const auto [coords, arg] = [&] -> std::pair { @@ -719,152 +806,77 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip case AmdGpu::ImageType::Color3D: // x, y, z, [lod] return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; case AmdGpu::ImageType::Cube: // x, y, face, [lod] - return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_written, - inst_info.is_array), + return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), + inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array), body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", image.GetType()); } }(); - inst.SetArg(1, coords); - if (inst_info.has_lod) { - ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead || - inst.GetOpcode() == IR::Opcode::ImageWrite); - ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && - image.GetType() != AmdGpu::ImageType::Color2DMsaaArray); - inst.SetArg(2, arg); - } else if ((image.GetType() == AmdGpu::ImageType::Color2DMsaa || - image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) && - (inst.GetOpcode() == IR::Opcode::ImageRead || - inst.GetOpcode() == IR::Opcode::ImageWrite)) { - inst.SetArg(3, arg); - } -} + const auto has_ms = image.GetType() == AmdGpu::ImageType::Color2DMsaa || + image.GetType() == AmdGpu::ImageType::Color2DMsaaArray; + ASSERT(!inst_info.has_lod || !has_ms); + const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{}; + const auto ms = has_ms ? IR::U32{arg} : IR::U32{}; -void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { - const auto binding = inst.Arg(0).U32(); - const auto buffer_res = info.texture_buffers[binding]; - const auto buffer = buffer_res.GetSharp(info); - if (!buffer.Valid()) { - // Don't need to swizzle invalid buffer. - return; - } - - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { - inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect())); - } else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) { - const auto inst_info = inst.Flags(); - const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); - const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect()); - inst.ReplaceUsesWith(swizzled); - } -} - -void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { - const auto binding = inst.Arg(0).U32(); - const auto image_res = info.images[binding & 0xFFFF]; - const auto image = image_res.GetSharp(info); - if (!image.Valid() || !image_res.IsStorage(image)) { - // Don't need to swizzle invalid or non-storage image. - return; - } - - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect())); - } else if (inst.GetOpcode() == IR::Opcode::ImageRead) { - const auto inst_info = inst.Flags(); - const auto lod = inst.Arg(2); - const auto ms = inst.Arg(3); - const auto texel = - ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod}, - ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info); - const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect()); - inst.ReplaceUsesWith(swizzled); - } -} - -void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, - Descriptors& descriptors) { - // Insert gds binding in the shader if it doesn't exist already. - // The buffer is used for append/consume counters. - constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1}; - const u32 binding = descriptors.Add(BufferResource{ - .used_types = IR::Type::U32, - .inline_cbuf = GdsSharp, - .is_gds_buffer = true, - .is_written = true, - }); - - const auto pred = [](const IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetUserData) { - return inst; + const auto is_storage = image_res.IsStorage(image); + if (inst.GetOpcode() == IR::Opcode::ImageRead) { + auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info); + if (is_storage) { + // Storage image requires shader swizzle. + texel = ApplySwizzle(ir, texel, image.DstSelect()); } - return std::nullopt; - }; + const auto converted = + ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion()); + inst.ReplaceUsesWith(converted); + } else { + inst.SetArg(1, coords); + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(2, lod); + inst.SetArg(3, ms); - // Attempt to deduce the GDS address of counter at compile time. - const u32 gds_addr = [&] { - const IR::Value& gds_offset = inst.Arg(0); - if (gds_offset.IsImmediate()) { - // Nothing to do, offset is known. - return gds_offset.U32() & 0xFFFF; + auto texel = inst.Arg(4); + if (is_storage) { + // Storage image requires shader swizzle. + texel = ApplySwizzle(ir, texel, image.DstSelect()); + } + const auto converted = + ApplyWriteNumberConversionVec4(ir, texel, image.GetNumberConversion()); + inst.SetArg(4, converted); } - const auto result = IR::BreadthFirstSearch(&inst, pred); - ASSERT_MSG(result, "Unable to track M0 source"); - - // M0 must be set by some user data register. - const IR::Inst* prod = gds_offset.InstRecursive(); - const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg()); - u32 m0_val = info.user_data[ud_reg] >> 16; - if (prod->GetOpcode() == IR::Opcode::IAdd32) { - m0_val += prod->Arg(1).U32(); - } - return m0_val & 0xFFFF; - }(); - - // Patch instruction. - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.SetArg(0, ir.Imm32(gds_addr >> 2)); - inst.SetArg(1, ir.Imm32(binding)); + } } void ResourceTrackingPass(IR::Program& program) { // Iterate resource instructions and patch them after finding the sharp. auto& info = program.info; + // Pass 1: Track resource sharps Descriptors descriptors{info}; for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { if (IsBufferInstruction(inst)) { - PatchBufferInstruction(*block, inst, info, descriptors); - continue; - } - if (IsTextureBufferInstruction(inst)) { - PatchTextureBufferInstruction(*block, inst, info, descriptors); - continue; - } - if (IsImageInstruction(inst)) { - PatchImageInstruction(*block, inst, info, descriptors); - continue; - } - if (IsDataRingInstruction(inst)) { - PatchDataRingInstruction(*block, inst, info, descriptors); + PatchBufferSharp(*block, inst, info, descriptors); + } else if (IsTextureBufferInstruction(inst)) { + PatchTextureBufferSharp(*block, inst, info, descriptors); + } else if (IsImageInstruction(inst)) { + PatchImageSharp(*block, inst, info, descriptors); + } else if (IsDataRingInstruction(inst)) { + PatchDataRingAccess(*block, inst, info, descriptors); } } } - // Second pass to reinterpret format read/write where needed, since we now know - // the bindings and their properties. + + // Pass 2: Patch instruction args for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { - if (IsTextureBufferInstruction(inst)) { - PatchTextureBufferInterpretation(*block, inst, info); - continue; - } - if (IsImageInstruction(inst)) { - PatchImageInterpretation(*block, inst, info); + if (IsBufferInstruction(inst)) { + PatchBufferArgs(*block, inst, info); + } else if (IsTextureBufferInstruction(inst)) { + PatchTextureBufferArgs(*block, inst, info); + } else if (IsImageInstruction(inst)) { + PatchImageArgs(*block, inst, info); } } } diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h index 73d587a5..b65b1992 100644 --- a/src/shader_recompiler/ir/reinterpret.h +++ b/src/shader_recompiler/ir/reinterpret.h @@ -4,7 +4,7 @@ #pragma once #include "shader_recompiler/ir/ir_emitter.h" -#include "video_core/amdgpu/resource.h" +#include "video_core/amdgpu/types.h" namespace Shader::IR { @@ -21,4 +21,66 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp return swizzled; } +/// Applies a number conversion in the read direction. +inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value, + const AmdGpu::NumberConversion& conversion) { + switch (conversion) { + case AmdGpu::NumberConversion::None: + return value; + case AmdGpu::NumberConversion::UintToUscaled: + return ir.ConvertUToF(32, 32, ir.BitCast(value)); + case AmdGpu::NumberConversion::SintToSscaled: + return ir.ConvertSToF(32, 32, ir.BitCast(value)); + case AmdGpu::NumberConversion::UnormToUbnorm: + // Convert 0...1 to -1...1 + return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f)); + default: + UNREACHABLE(); + } +} + +inline Value ApplyReadNumberConversionVec4(IREmitter& ir, const Value& value, + const AmdGpu::NumberConversion& conversion) { + if (conversion == AmdGpu::NumberConversion::None) { + return value; + } + const auto x = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion); + const auto y = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion); + const auto z = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion); + const auto w = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion); + return ir.CompositeConstruct(x, y, z, w); +} + +/// Applies a number conversion in the write direction. +inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value, + const AmdGpu::NumberConversion& conversion) { + switch (conversion) { + case AmdGpu::NumberConversion::None: + return value; + case AmdGpu::NumberConversion::UintToUscaled: + // Need to return float type to maintain IR semantics. + return ir.BitCast(U32{ir.ConvertFToU(32, value)}); + case AmdGpu::NumberConversion::SintToSscaled: + // Need to return float type to maintain IR semantics. + return ir.BitCast(U32{ir.ConvertFToS(32, value)}); + case AmdGpu::NumberConversion::UnormToUbnorm: + // Convert -1...1 to 0...1 + return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f)); + default: + UNREACHABLE(); + } +} + +inline Value ApplyWriteNumberConversionVec4(IREmitter& ir, const Value& value, + const AmdGpu::NumberConversion& conversion) { + if (conversion == AmdGpu::NumberConversion::None) { + return value; + } + const auto x = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion); + const auto y = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion); + const auto z = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion); + const auto w = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion); + return ir.CompositeConstruct(x, y, z, w); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 781a0b14..cf49b087 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -180,6 +180,7 @@ struct FragmentRuntimeInfo { std::array inputs; struct PsColorBuffer { AmdGpu::NumberFormat num_format; + AmdGpu::NumberConversion num_conversion; AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index f8a86c63..f58d2e2d 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -32,6 +32,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; AmdGpu::CompMapping dst_select{}; + AmdGpu::NumberConversion num_conversion{}; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -41,6 +42,7 @@ struct ImageSpecialization { bool is_integer = false; bool is_storage = false; AmdGpu::CompMapping dst_select{}; + AmdGpu::NumberConversion num_conversion{}; auto operator<=>(const ImageSpecialization&) const = default; }; @@ -107,6 +109,7 @@ struct StageSpecialization { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); spec.dst_select = sharp.DstSelect(); + spec.num_conversion = sharp.GetNumberConversion(); }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { @@ -116,6 +119,7 @@ struct StageSpecialization { if (spec.is_storage) { spec.dst_select = sharp.DstSelect(); } + spec.num_conversion = sharp.GetNumberConversion(); }); ForEachSharp(binding, fmasks, info->fmasks, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 0f178305..837b73d8 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -20,9 +20,9 @@ #include "common/types.h" #include "common/unique_function.h" #include "shader_recompiler/params.h" -#include "types.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" +#include "video_core/amdgpu/types.h" namespace Vulkan { class Rasterizer; @@ -902,6 +902,10 @@ struct Liverpool { : info.number_type.Value()); } + [[nodiscard]] NumberConversion GetNumberConversion() const { + return MapNumberConversion(info.number_type); + } + [[nodiscard]] CompMapping Swizzle() const { // clang-format off static constexpr std::array, 4> mrt_swizzles{{ @@ -938,7 +942,7 @@ struct Liverpool { const auto swap_idx = static_cast(info.comp_swap.Value()); const auto components_idx = NumComponents(info.format) - 1; const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; - return RemapComponents(info.format, mrt_swizzle); + return RemapSwizzle(info.format, mrt_swizzle); } }; diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp index b13fc2d1..881c33e4 100644 --- a/src/video_core/amdgpu/pixel_format.cpp +++ b/src/video_core/amdgpu/pixel_format.cpp @@ -100,7 +100,7 @@ std::string_view NameOf(NumberFormat fmt) { return "Srgb"; case NumberFormat::Ubnorm: return "Ubnorm"; - case NumberFormat::UbnromNz: + case NumberFormat::UbnormNz: return "UbnormNz"; case NumberFormat::Ubint: return "Ubint"; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 1d967385..ffee7964 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -11,96 +11,6 @@ namespace AmdGpu { -enum class CompSwizzle : u32 { - Zero = 0, - One = 1, - Red = 4, - Green = 5, - Blue = 6, - Alpha = 7, -}; - -struct CompMapping { - CompSwizzle r : 3; - CompSwizzle g : 3; - CompSwizzle b : 3; - CompSwizzle a : 3; - - auto operator<=>(const CompMapping& other) const = default; - - template - [[nodiscard]] std::array Apply(const std::array& data) const { - return { - ApplySingle(data, r), - ApplySingle(data, g), - ApplySingle(data, b), - ApplySingle(data, a), - }; - } - -private: - template - T ApplySingle(const std::array& data, const CompSwizzle swizzle) const { - switch (swizzle) { - case CompSwizzle::Zero: - return T(0); - case CompSwizzle::One: - return T(1); - case CompSwizzle::Red: - return data[0]; - case CompSwizzle::Green: - return data[1]; - case CompSwizzle::Blue: - return data[2]; - case CompSwizzle::Alpha: - return data[3]; - default: - UNREACHABLE(); - } - } -}; - -inline DataFormat RemapDataFormat(const DataFormat format) { - switch (format) { - case DataFormat::Format11_11_10: - return DataFormat::Format10_11_11; - case DataFormat::Format10_10_10_2: - return DataFormat::Format2_10_10_10; - case DataFormat::Format5_5_5_1: - return DataFormat::Format1_5_5_5; - default: - return format; - } -} - -inline NumberFormat RemapNumberFormat(const NumberFormat format) { - return format; -} - -inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) { - switch (format) { - case DataFormat::Format11_11_10: { - CompMapping result; - result.r = components.b; - result.g = components.g; - result.b = components.r; - result.a = components.a; - return result; - } - case DataFormat::Format10_10_10_2: - case DataFormat::Format5_5_5_1: { - CompMapping result; - result.r = components.a; - result.g = components.b; - result.b = components.g; - result.a = components.r; - return result; - } - default: - return components; - } -} - // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture] struct Buffer { u64 base_address : 44; @@ -140,7 +50,7 @@ struct Buffer { .b = CompSwizzle(dst_sel_z), .a = CompSwizzle(dst_sel_w), }; - return RemapComponents(DataFormat(data_format), dst_sel); + return RemapSwizzle(DataFormat(data_format), dst_sel); } NumberFormat GetNumberFmt() const noexcept { @@ -151,6 +61,10 @@ struct Buffer { return RemapDataFormat(DataFormat(data_format)); } + NumberConversion GetNumberConversion() const noexcept { + return MapNumberConversion(NumberFormat(num_format)); + } + u32 GetStride() const noexcept { return stride; } @@ -305,7 +219,7 @@ struct Image { .b = CompSwizzle(dst_sel_z), .a = CompSwizzle(dst_sel_w), }; - return RemapComponents(DataFormat(data_format), dst_sel); + return RemapSwizzle(DataFormat(data_format), dst_sel); } u32 Pitch() const { @@ -354,6 +268,10 @@ struct Image { return RemapNumberFormat(NumberFormat(num_format)); } + NumberConversion GetNumberConversion() const noexcept { + return MapNumberConversion(NumberFormat(num_format)); + } + TilingMode GetTilingMode() const { if (tiling_index >= 0 && tiling_index <= 7) { return tiling_index == 5 ? TilingMode::Texture_MicroTiled diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index fa849166..a19e5325 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -5,6 +5,7 @@ #include #include +#include "common/assert.h" #include "common/types.h" namespace AmdGpu { @@ -177,11 +178,130 @@ enum class NumberFormat : u32 { Float = 7, Srgb = 9, Ubnorm = 10, - UbnromNz = 11, + UbnormNz = 11, Ubint = 12, Ubscaled = 13, }; +enum class CompSwizzle : u32 { + Zero = 0, + One = 1, + Red = 4, + Green = 5, + Blue = 6, + Alpha = 7, +}; + +enum class NumberConversion : u32 { + None, + UintToUscaled, + SintToSscaled, + UnormToUbnorm, +}; + +struct CompMapping { + CompSwizzle r : 3; + CompSwizzle g : 3; + CompSwizzle b : 3; + CompSwizzle a : 3; + + auto operator<=>(const CompMapping& other) const = default; + + template + [[nodiscard]] std::array Apply(const std::array& data) const { + return { + ApplySingle(data, r), + ApplySingle(data, g), + ApplySingle(data, b), + ApplySingle(data, a), + }; + } + +private: + template + T ApplySingle(const std::array& data, const CompSwizzle swizzle) const { + switch (swizzle) { + case CompSwizzle::Zero: + return T(0); + case CompSwizzle::One: + return T(1); + case CompSwizzle::Red: + return data[0]; + case CompSwizzle::Green: + return data[1]; + case CompSwizzle::Blue: + return data[2]; + case CompSwizzle::Alpha: + return data[3]; + default: + UNREACHABLE(); + } + } +}; + +inline DataFormat RemapDataFormat(const DataFormat format) { + switch (format) { + case DataFormat::Format11_11_10: + return DataFormat::Format10_11_11; + case DataFormat::Format10_10_10_2: + return DataFormat::Format2_10_10_10; + case DataFormat::Format5_5_5_1: + return DataFormat::Format1_5_5_5; + default: + return format; + } +} + +inline NumberFormat RemapNumberFormat(const NumberFormat format) { + switch (format) { + case NumberFormat::Uscaled: + return NumberFormat::Uint; + case NumberFormat::Sscaled: + return NumberFormat::Sint; + case NumberFormat::Ubnorm: + return NumberFormat::Unorm; + default: + return format; + } +} + +inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) { + switch (format) { + case DataFormat::Format11_11_10: { + CompMapping result; + result.r = swizzle.b; + result.g = swizzle.g; + result.b = swizzle.r; + result.a = swizzle.a; + return result; + } + case DataFormat::Format10_10_10_2: + case DataFormat::Format5_5_5_1: { + CompMapping result; + result.r = swizzle.a; + result.g = swizzle.b; + result.b = swizzle.g; + result.a = swizzle.r; + return result; + } + default: + return swizzle; + } +} + +inline NumberConversion MapNumberConversion(const NumberFormat format) { + switch (format) { + case NumberFormat::Uscaled: + return NumberConversion::UintToUscaled; + case NumberFormat::Sscaled: + return NumberConversion::SintToSscaled; + case NumberFormat::Ubnorm: + return NumberConversion::UnormToUbnorm; + default: + return NumberConversion::None; + } +} + } // namespace AmdGpu template <> diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 690d26cf..9695e127 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -447,7 +447,7 @@ static constexpr vk::FormatFeatureFlags2 GetNumberFormatFeatureFlags( case AmdGpu::NumberFormat::Srgb: return ImageRead | Mrt; case AmdGpu::NumberFormat::Ubnorm: - case AmdGpu::NumberFormat::UbnromNz: + case AmdGpu::NumberFormat::UbnormNz: case AmdGpu::NumberFormat::Ubint: case AmdGpu::NumberFormat::Ubscaled: return ImageRead; @@ -468,6 +468,7 @@ static constexpr SurfaceFormatInfo CreateSurfaceFormatInfo(const AmdGpu::DataFor } std::span SurfaceFormats() { + // Uscaled, Sscaled, and Ubnorm formats are automatically remapped and handled in shader. static constexpr std::array formats{ // Invalid CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm, @@ -490,7 +491,7 @@ std::span SurfaceFormats() { vk::Format::eUndefined), CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm, vk::Format::eUndefined), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnromNz, + CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnormNz, vk::Format::eUndefined), CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint, vk::Format::eUndefined), @@ -501,10 +502,6 @@ std::span SurfaceFormats() { vk::Format::eR8Unorm), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm, vk::Format::eR8Snorm), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uscaled, - vk::Format::eR8Uscaled), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sscaled, - vk::Format::eR8Sscaled), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint, vk::Format::eR8Uint), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint, @@ -516,10 +513,6 @@ std::span SurfaceFormats() { vk::Format::eR16Unorm), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm, vk::Format::eR16Snorm), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uscaled, - vk::Format::eR16Uscaled), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sscaled, - vk::Format::eR16Sscaled), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint, vk::Format::eR16Uint), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint, @@ -531,10 +524,6 @@ std::span SurfaceFormats() { vk::Format::eR8G8Unorm), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm, vk::Format::eR8G8Snorm), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uscaled, - vk::Format::eR8G8Uscaled), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sscaled, - vk::Format::eR8G8Sscaled), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint, vk::Format::eR8G8Uint), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint, @@ -553,10 +542,6 @@ std::span SurfaceFormats() { vk::Format::eR16G16Unorm), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm, vk::Format::eR16G16Snorm), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uscaled, - vk::Format::eR16G16Uscaled), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sscaled, - vk::Format::eR16G16Sscaled), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint, vk::Format::eR16G16Uint), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint, @@ -573,10 +558,6 @@ std::span SurfaceFormats() { vk::Format::eA2B10G10R10UnormPack32), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm, vk::Format::eA2B10G10R10SnormPack32), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uscaled, - vk::Format::eA2B10G10R10UscaledPack32), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sscaled, - vk::Format::eA2B10G10R10SscaledPack32), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint, vk::Format::eA2B10G10R10UintPack32), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint, @@ -586,10 +567,6 @@ std::span SurfaceFormats() { vk::Format::eR8G8B8A8Unorm), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm, vk::Format::eR8G8B8A8Snorm), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uscaled, - vk::Format::eR8G8B8A8Uscaled), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sscaled, - vk::Format::eR8G8B8A8Sscaled), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint, vk::Format::eR8G8B8A8Uint), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint, @@ -608,10 +585,6 @@ std::span SurfaceFormats() { vk::Format::eR16G16B16A16Unorm), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm, vk::Format::eR16G16B16A16Snorm), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, - AmdGpu::NumberFormat::Uscaled, vk::Format::eR16G16B16A16Uscaled), - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, - AmdGpu::NumberFormat::Sscaled, vk::Format::eR16G16B16A16Sscaled), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint, vk::Format::eR16G16B16A16Uint), CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index c8f4999b..fa10831a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -32,6 +32,7 @@ struct GraphicsPipelineKey { u32 num_color_attachments; std::array color_formats; std::array color_num_formats; + std::array color_num_conversions; std::array color_swizzles; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ba069dae..9cfc7c27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -168,6 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { info.fs_info.color_buffers[i] = { .num_format = graphics_key.color_num_formats[i], + .num_conversion = graphics_key.color_num_conversions[i], .swizzle = graphics_key.color_swizzles[i], }; } @@ -302,6 +303,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.num_color_attachments = 0; key.color_formats.fill(vk::Format::eUndefined); key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); + key.color_num_conversions.fill(AmdGpu::NumberConversion::None); key.blend_controls.fill({}); key.write_masks.fill({}); key.color_swizzles.fill({}); @@ -330,6 +332,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_formats[remapped_cb] = LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt()); key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt(); + key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion(); key.color_swizzles[remapped_cb] = col_buf.Swizzle(); }