From 7d4f0da40ef3ed6a2cb3084a419f1e201afa02f3 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 1 Jul 2024 18:20:19 +0300 Subject: [PATCH 1/5] video_core: Fix some regressions --- src/core/address_space.cpp | 4 ++-- src/shader_recompiler/frontend/translate/translate.cpp | 4 ++-- .../ir/passes/resource_tracking_pass.cpp | 5 +++-- src/video_core/amdgpu/liverpool.h | 4 ++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 +++++---- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 289556165..7749ec2dd 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -221,8 +221,8 @@ struct AddressSpace::Impl { void* hint_address = reinterpret_cast(SYSTEM_MANAGED_MIN); virtual_size = SystemSize + UserSize; virtual_base = reinterpret_cast( - mmap(reinterpret_cast(hint_address), virtual_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0)); + mmap(hint_address, virtual_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, -1, 0)); if (virtual_base == MAP_FAILED) { LOG_CRITICAL(Kernel_Vmm, "mmap failed: {}", strerror(errno)); throw std::bad_alloc{}; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 236c97b54..27d4691d2 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -197,8 +197,7 @@ void Translator::EmitFetch(const GcnInst& inst) { // Read the V# of the attribute to figure out component number and type. const auto buffer = info.ReadUd(attrib.sgpr_base, attrib.dword_offset); - const u32 num_components = AmdGpu::NumComponents(buffer.data_format); - for (u32 i = 0; i < num_components; i++) { + for (u32 i = 0; i < 4; i++) { const IR::F32 comp = [&] { switch (buffer.GetSwizzle(i)) { case AmdGpu::CompSwizzle::One: @@ -225,6 +224,7 @@ void Translator::EmitFetch(const GcnInst& inst) { attrib.instance_data); } + const u32 num_components = AmdGpu::NumComponents(buffer.data_format); info.vs_inputs.push_back({ .fmt = buffer.num_format, .binding = attrib.semantic, diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 21f168da4..7f91a63c3 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -348,8 +348,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } if (inst_info.explicit_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || - inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod); - const u32 pos = inst.GetOpcode() == IR::Opcode::ImageFetch ? 3 : 2; + inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod || + inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod); + const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3; inst.SetArg(pos, arg); } } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 5261857ab..2233fa0cb 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -683,8 +683,8 @@ struct Liverpool { BitField<0, 5, TilingMode> tile_mode_index; BitField<5, 5, u32> fmask_tile_mode_index; BitField<12, 3, u32> num_samples_log2; - BitField<15, 3, u32> num_fragments_log2; - BitField<18, 1, u32> force_dst_alpha_1; + BitField<15, 2, u32> num_fragments_log2; + BitField<17, 1, u32> force_dst_alpha_1; } attrib; INSERT_PADDING_WORDS(1); u32 cmask_base_address; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 826b64524..cabec162e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -93,8 +93,9 @@ void Rasterizer::BeginRendering() { const auto& hint = liverpool->last_cb_extent[col_buf_id]; const auto& image_view = texture_cache.RenderTarget(col_buf, hint); - state.width = std::min(state.width, hint.width); - state.height = std::min(state.height, hint.height); + const auto& image = texture_cache.GetImage(image_view.image_id); + state.width = std::min(state.width, image.info.size.width); + state.height = std::min(state.height, image.info.size.height); const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); state.color_attachments[state.num_color_attachments++] = { @@ -117,8 +118,8 @@ void Rasterizer::BeginRendering() { const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint, regs.depth_control.depth_write_enable); const auto& image = texture_cache.GetImage(image_view.image_id); - state.width = std::min(state.width, hint.width); - state.height = std::min(state.height, hint.height); + state.width = std::min(state.width, image.info.size.width); + state.height = std::min(state.height, image.info.size.height); state.depth_attachment = { .imageView = *image_view.image_view, .imageLayout = image.layout, From 2fe897eedaa2f85555cbe2cc74e8a7c241e1e618 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 1 Jul 2024 20:25:43 +0300 Subject: [PATCH 2/5] host_shaders: Fix R8G8 detiler --- src/video_core/host_shaders/detile_m8x2.comp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp index d93f9a7f3..1cebc12b3 100644 --- a/src/video_core/host_shaders/detile_m8x2.comp +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -56,6 +56,6 @@ void main() { for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) { uint p0 = (p[ofs] >> 8) & 0xff; uint p1 = p[ofs] & 0xff; - imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p0, p1, 0, 0)); + imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p1, p0, 0, 0)); } } From a603bc7d88e663d8ffd140a5916cd0612001c42c Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 1 Jul 2024 22:42:45 +0300 Subject: [PATCH 3/5] shader_recompiler: More instructions --- .../libraries/kernel/thread_management.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 9 ++++-- .../backend/spirv/emit_spirv_instructions.h | 2 +- src/shader_recompiler/frontend/format.cpp | 16 +++++----- .../frontend/translate/translate.cpp | 31 +++++++++++++++++-- .../frontend/translate/translate.h | 8 +++-- .../frontend/translate/vector_alu.cpp | 24 +++++++++++--- .../frontend/translate/vector_memory.cpp | 15 +++++++++ src/shader_recompiler/ir/ir_emitter.cpp | 4 ++- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 1 + src/shader_recompiler/ir/value.h | 1 + 12 files changed, 93 insertions(+), 21 deletions(-) diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index c5db12a71..2b526eed9 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -1357,6 +1357,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock); LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy); + LIB_FUNCTION("Op8TBGY5KHg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_wait); LIB_FUNCTION("Op8TBGY5KHg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_wait); LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast); LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 3e4cf0191..7a54f31c2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -113,8 +113,13 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod } } -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { - UNREACHABLE_MSG("SPIR-V Instruction"); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + const Id zero{ctx.f32_zero_value}; + return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords); } Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 316c17cb2..246d7c441 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -349,7 +349,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivatives, const IR::Value& offset, Id lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 379ed85fd..46a40a6e0 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -1826,17 +1826,17 @@ constexpr std::array InstructionFormatVOP1 = {{ {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Float64}, // 17 = V_CVT_F32_UBYTE0 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, // 18 = V_CVT_F32_UBYTE1 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, // 19 = V_CVT_F32_UBYTE2 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, // 20 = V_CVT_F32_UBYTE3 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, // 21 = V_CVT_U32_F64 {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Uint32}, // 22 = V_CVT_F64_U32 diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 27d4691d2..407ee3994 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -268,7 +268,10 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.V_AND_B32(inst); break; case Opcode::V_OR_B32: - translator.V_OR_B32(inst); + translator.V_OR_B32(false, inst); + break; + case Opcode::V_XOR_B32: + translator.V_OR_B32(true, inst); break; case Opcode::V_LSHLREV_B32: translator.V_LSHLREV_B32(inst); @@ -324,6 +327,24 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_CVT_PKRTZ_F16_F32: translator.V_CVT_PKRTZ_F16_F32(inst); break; + case Opcode::V_CVT_F32_F16: + translator.V_CVT_F32_F16(inst); + break; + case Opcode::V_CVT_F32_UBYTE0: + translator.V_CVT_F32_UBYTE(0, inst); + break; + case Opcode::V_CVT_F32_UBYTE1: + translator.V_CVT_F32_UBYTE(1, inst); + break; + case Opcode::V_CVT_F32_UBYTE2: + translator.V_CVT_F32_UBYTE(2, inst); + break; + case Opcode::V_CVT_F32_UBYTE3: + translator.V_CVT_F32_UBYTE(3, inst); + break; + case Opcode::V_BFREV_B32: + translator.V_BFREV_B32(inst); + break; case Opcode::V_FRACT_F32: translator.V_FRACT_F32(inst); break; @@ -355,6 +376,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::IMAGE_SAMPLE_L: translator.IMAGE_SAMPLE(inst); break; + case Opcode::IMAGE_GET_LOD: + translator.IMAGE_GET_LOD(inst); + break; case Opcode::IMAGE_GATHER4_C: translator.IMAGE_GATHER(inst); break; @@ -682,7 +706,10 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.V_SAD_U32(inst); break; case Opcode::V_BFE_U32: - translator.V_BFE_U32(inst); + translator.V_BFE_U32(false, inst); + break; + case Opcode::V_BFE_I32: + translator.V_BFE_U32(true, inst); break; case Opcode::V_MAD_I32_I24: translator.V_MAD_I32_I24(inst); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index ef5ff8b7d..1145de59a 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -71,9 +71,10 @@ public: void V_SAD(const GcnInst& inst); void V_MAC_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); + void V_CVT_F32_F16(const GcnInst& inst); void V_MUL_F32(const GcnInst& inst); void V_CNDMASK_B32(const GcnInst& inst); - void V_OR_B32(const GcnInst& inst); + void V_OR_B32(bool is_xor, const GcnInst& inst); void V_AND_B32(const GcnInst& inst); void V_LSHLREV_B32(const GcnInst& inst); void V_ADD_I32(const GcnInst& inst); @@ -110,7 +111,7 @@ public: void V_LSHRREV_B32(const GcnInst& inst); void V_MUL_HI_U32(bool is_signed, const GcnInst& inst); void V_SAD_U32(const GcnInst& inst); - void V_BFE_U32(const GcnInst& inst); + void V_BFE_U32(bool is_signed, const GcnInst& inst); void V_MAD_I32_I24(const GcnInst& inst); void V_MUL_I32_I24(const GcnInst& inst); void V_SUB_I32(const GcnInst& inst); @@ -130,6 +131,8 @@ public: void V_CMP_NE_U64(const GcnInst& inst); void V_BFI_B32(const GcnInst& inst); void V_NOT_B32(const GcnInst& inst); + void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst); + void V_BFREV_B32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); @@ -149,6 +152,7 @@ public: void IMAGE_GATHER(const GcnInst& inst); void IMAGE_STORE(const GcnInst& inst); void IMAGE_LOAD(bool has_mip, const GcnInst& inst); + void IMAGE_GET_LOD(const GcnInst& inst); // Export void EXP(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 1dbb9062b..72b2d76a8 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -26,6 +26,11 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } +void Translator::V_CVT_F32_F16(const GcnInst& inst) { + const IR::U32 src0 = GetSrc(inst.src[0]); + SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0)); +} + void Translator::V_MUL_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); } @@ -54,11 +59,11 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, IR::U32F32{result}); } -void Translator::V_OR_B32(const GcnInst& inst) { +void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.BitwiseOr(src0, src1)); + ir.SetVectorReg(dst_reg, is_xor ? ir.BitwiseXor(src0, src1) : ir.BitwiseOr(src0, src1)); } void Translator::V_AND_B32(const GcnInst& inst) { @@ -345,11 +350,11 @@ void Translator::V_SAD_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2)); } -void Translator::V_BFE_U32(const GcnInst& inst) { +void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; - SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2)); + SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed)); } void Translator::V_MAD_I32_I24(const GcnInst& inst) { @@ -486,4 +491,15 @@ void Translator::V_NOT_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitwiseNot(src0)); } +void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 byte = ir.BitFieldExtract(src0, ir.Imm32(8 * index), ir.Imm32(8)); + SetDst(inst.dst[0], ir.ConvertUToF(32, 32, byte)); +} + +void Translator::V_BFREV_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.BitReverse(src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 71ca7c2ed..f12b4e2f2 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -307,4 +307,19 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info); } +void Translator::IMAGE_GET_LOD(const GcnInst& inst) { + const auto& mimg = inst.control.mimg; + IR::VectorReg dst_reg{inst.dst[0].code}; + IR::VectorReg addr_reg{inst.src[0].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + + const IR::Value handle = ir.GetScalarReg(tsharp_reg); + const IR::Value body = ir.CompositeConstruct( + ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), + ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + const IR::Value lod = ir.ImageQueryLod(handle, body, {}); + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)}); + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)}); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index aa95f239d..09bb3580f 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1119,6 +1119,8 @@ F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Val switch (dest_bitsize) { case 32: switch (src_bitsize) { + case 16: + return Inst(Opcode::ConvertF32U16, value); case 32: return Inst(Opcode::ConvertF32U32, value); } @@ -1139,7 +1141,7 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s : ConvertUToF(dest_bitsize, src_bitsize, value); } -U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { +U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 917de458d..cf74afc04 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -196,7 +196,7 @@ public: [[nodiscard]] F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, const Value& value); - [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); + [[nodiscard]] U16U32U64 UConvert(size_t result_bitsize, const U16U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 18c0ce0bf..a9b895d24 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -257,6 +257,7 @@ OPCODE(ConvertF32S32, F32, U32, OPCODE(ConvertF32U32, F32, U32, ) OPCODE(ConvertF64S32, F64, U32, ) OPCODE(ConvertF64U32, F64, U32, ) +OPCODE(ConvertF32U16, F32, U16, ) // Image operations OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index 8c97f4950..a43c17f5b 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -221,6 +221,7 @@ using F32 = TypedValue; using F64 = TypedValue; using U32F32 = TypedValue; using U32U64 = TypedValue; +using U16U32U64 = TypedValue; using F32F64 = TypedValue; using F16F32F64 = TypedValue; using UAny = TypedValue; From fe5bfa9d61ea25fc4b6f55255cf309f6ec6dbe46 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 1 Jul 2024 22:52:07 +0300 Subject: [PATCH 4/5] texture_cache: Always validate for now --- src/video_core/texture_cache/texture_cache.cpp | 2 +- src/video_core/texture_cache/tile_manager.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 752d01118..02811735c 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -140,7 +140,7 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r RegisterMeta(info, image_id); Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { + if (True(image.flags & ImageFlagBits::CpuModified)) { RefreshImage(image); TrackImage(image, image_id); } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 3a431231d..e9818d75a 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -194,6 +194,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { [[fallthrough]]; case vk::Format::eBc3UnormBlock: case vk::Format::eBc7SrgbBlock: + case vk::Format::eBc7UnormBlock: return vk::Format::eR32G32B32A32Uint; default: break; From afba6dbd6671e57dbf2b2807d5f39747dcc8c0e1 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Mon, 1 Jul 2024 23:48:30 +0300 Subject: [PATCH 5/5] clang format fix --- src/shader_recompiler/frontend/format.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 46a40a6e0..91417d5b3 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -1826,17 +1826,13 @@ constexpr std::array InstructionFormatVOP1 = {{ {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Float64}, // 17 = V_CVT_F32_UBYTE0 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, - ScalarType::Float32}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32}, // 18 = V_CVT_F32_UBYTE1 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, - ScalarType::Float32}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32}, // 19 = V_CVT_F32_UBYTE2 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, - ScalarType::Float32}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32}, // 20 = V_CVT_F32_UBYTE3 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, - ScalarType::Float32}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32}, // 21 = V_CVT_U32_F64 {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Uint32}, // 22 = V_CVT_F64_U32