video_core: Add multipler to handle special cases of texture buffer stride mismatch (#1640)

* page_manager: Enable userfaultfd by default

* Much faster than page faults and causes less problems

* shader_recompiler: Add texel buffer multiplier

* Fixes format mismatch assert when vsharp stride is multiple of format stride

* shader_recompiler: Specialize UBOs on size

* Some games can perform manual vertex pulling and thus bind read only buffers of varying size. We only recompile when the vsharp size is larger than size in shader, in opposite case its not needed

* clang format
This commit is contained in:
TheTurtle 2024-12-06 19:54:59 +02:00 committed by GitHub
parent 1bfa1499ae
commit 43d5d22500
8 changed files with 30 additions and 7 deletions

View file

@ -875,6 +875,10 @@ target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAlloca
target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
target_compile_definitions(shadps4 PRIVATE ENABLE_USERFAULTFD)
endif()
if (APPLE)
option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF)
if (USE_SYSTEM_VULKAN_LOADER)

View file

@ -326,7 +326,9 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& buffer = ctx.texture_buffers[handle];
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
const Id coord =
ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift),
buffer.coord_offset);
Id texel = buffer.is_storage ? ctx.OpImageRead(buffer.result_type, tex_buffer, coord)
: ctx.OpImageFetch(buffer.result_type, tex_buffer, coord);
if (buffer.is_integer) {
@ -372,7 +374,9 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
const auto& buffer = ctx.texture_buffers[handle];
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
const Id coord =
ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift),
buffer.coord_offset);
if (buffer.is_integer) {
value = ctx.OpBitcast(buffer.result_type, value);
}

View file

@ -207,6 +207,8 @@ void EmitContext::DefineBufferOffsets() {
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U));
tex_buffer.coord_shift =
OpBitFieldUExtract(U32[1], value, ConstU32(offset + 6U), ConstU32(2U));
Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding));
}
}

View file

@ -223,6 +223,7 @@ public:
struct TextureBufferDefinition {
Id id;
Id coord_offset;
Id coord_shift;
u32 binding;
Id image_type;
Id result_type;

View file

@ -105,6 +105,11 @@ struct PushData {
ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset;
}
void AddTexelOffset(u32 binding, u32 multiplier, u32 texel_offset) {
ASSERT(texel_offset < 64 && multiplier < 16);
buf_offsets[binding] = texel_offset | ((std::bit_width(multiplier) - 1) << 6);
}
};
static_assert(sizeof(PushData) <= 128,
"PushData size is greater than minimum size guaranteed by Vulkan spec");

View file

@ -9,7 +9,6 @@
#include "frontend/fetch_shader.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/passes/srt.h"
namespace Shader {
@ -22,8 +21,12 @@ struct VsAttribSpecialization {
struct BufferSpecialization {
u16 stride : 14;
u16 is_storage : 1;
u32 size = 0;
auto operator<=>(const BufferSpecialization&) const = default;
bool operator==(const BufferSpecialization& other) const {
return stride == other.stride && is_storage == other.is_storage &&
(size >= other.is_storage || is_storage);
}
};
struct TextureBufferSpecialization {
@ -86,6 +89,9 @@ struct StageSpecialization {
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
spec.is_storage = desc.IsStorage(sharp);
if (!spec.is_storage) {
spec.size = sharp.GetSize();
}
});
ForEachSharp(binding, tex_buffers, info->texture_buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {

View file

@ -29,7 +29,7 @@ namespace VideoCore {
constexpr size_t PAGESIZE = 4_KB;
constexpr size_t PAGEBITS = 12;
#if ENABLE_USERFAULTFD
#ifdef ENABLE_USERFAULTFD
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} {
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);

View file

@ -548,12 +548,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
const u32 buf_stride = vsharp.GetStride();
ASSERT_MSG(buf_stride % fmt_stride == 0,
"Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride);
buffer_view =
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());