From 9e618c0e0c14d9fd7daf040509d71392356054be Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Fri, 6 Dec 2024 19:54:59 +0200 Subject: [PATCH] video_core: Add multipler to handle special cases of texture buffer stride mismatch (#1640) * page_manager: Enable userfaultfd by default * Much faster than page faults and causes less problems * shader_recompiler: Add texel buffer multiplier * Fixes format mismatch assert when vsharp stride is multiple of format stride * shader_recompiler: Specialize UBOs on size * Some games can perform manual vertex pulling and thus bind read only buffers of varying size. We only recompile when the vsharp size is larger than size in shader, in opposite case its not needed * clang format --- CMakeLists.txt | 4 ++++ .../backend/spirv/emit_spirv_context_get_set.cpp | 8 ++++++-- .../backend/spirv/spirv_emit_context.cpp | 2 ++ .../backend/spirv/spirv_emit_context.h | 1 + src/shader_recompiler/info.h | 5 +++++ src/shader_recompiler/specialization.h | 10 ++++++++-- src/video_core/page_manager.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++-- 8 files changed, 30 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 378b8f78..ae6d1d74 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -875,6 +875,10 @@ target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAlloca target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + target_compile_definitions(shadps4 PRIVATE ENABLE_USERFAULTFD) +endif() + if (APPLE) option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF) if (USE_SYSTEM_VULKAN_LOADER) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d8c0a17b..b578f0c5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -326,7 +326,9 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& buffer = ctx.texture_buffers[handle]; const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); - const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); + const Id coord = + ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift), + buffer.coord_offset); Id texel = buffer.is_storage ? ctx.OpImageRead(buffer.result_type, tex_buffer, coord) : ctx.OpImageFetch(buffer.result_type, tex_buffer, coord); if (buffer.is_integer) { @@ -372,7 +374,9 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { const auto& buffer = ctx.texture_buffers[handle]; const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); - const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); + const Id coord = + ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift), + buffer.coord_offset); if (buffer.is_integer) { value = ctx.OpBitcast(buffer.result_type, value); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4ce9f422..5c7278c6 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -207,6 +207,8 @@ void EmitContext::DefineBufferOffsets() { push_data_block, ConstU32(half), ConstU32(comp))}; const Id value{OpLoad(U32[1], ptr)}; tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U)); + tex_buffer.coord_shift = + OpBitFieldUExtract(U32[1], value, ConstU32(offset + 6U), ConstU32(2U)); Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding)); } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 1c5da946..4e5e7dd3 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -223,6 +223,7 @@ public: struct TextureBufferDefinition { Id id; Id coord_offset; + Id coord_shift; u32 binding; Id image_type; Id result_type; diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index d382d0e7..494bbb4b 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -105,6 +105,11 @@ struct PushData { ASSERT(offset < 256 && binding < buf_offsets.size()); buf_offsets[binding] = offset; } + + void AddTexelOffset(u32 binding, u32 multiplier, u32 texel_offset) { + ASSERT(texel_offset < 64 && multiplier < 16); + buf_offsets[binding] = texel_offset | ((std::bit_width(multiplier) - 1) << 6); + } }; static_assert(sizeof(PushData) <= 128, "PushData size is greater than minimum size guaranteed by Vulkan spec"); diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 82c06464..2a3bd62f 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -9,7 +9,6 @@ #include "frontend/fetch_shader.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" -#include "shader_recompiler/ir/passes/srt.h" namespace Shader { @@ -22,8 +21,12 @@ struct VsAttribSpecialization { struct BufferSpecialization { u16 stride : 14; u16 is_storage : 1; + u32 size = 0; - auto operator<=>(const BufferSpecialization&) const = default; + bool operator==(const BufferSpecialization& other) const { + return stride == other.stride && is_storage == other.is_storage && + (size >= other.is_storage || is_storage); + } }; struct TextureBufferSpecialization { @@ -86,6 +89,9 @@ struct StageSpecialization { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.stride = sharp.GetStride(); spec.is_storage = desc.IsStorage(sharp); + if (!spec.is_storage) { + spec.size = sharp.GetSize(); + } }); ForEachSharp(binding, tex_buffers, info->texture_buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index d26a7067..80b91b82 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -29,7 +29,7 @@ namespace VideoCore { constexpr size_t PAGESIZE = 4_KB; constexpr size_t PAGEBITS = 12; -#if ENABLE_USERFAULTFD +#ifdef ENABLE_USERFAULTFD struct PageManager::Impl { Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e2b6d974..4e858c0d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -548,12 +548,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id); const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; - ASSERT_MSG(fmt_stride == vsharp.GetStride(), + const u32 buf_stride = vsharp.GetStride(); + ASSERT_MSG(buf_stride % fmt_stride == 0, "Texel buffer stride must match format stride"); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding.buffer, adjust / fmt_stride); + push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride); buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt());