mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-27 08:51:47 +00:00
video_core: Add multipler to handle special cases of texture buffer stride mismatch (#1640)
* page_manager: Enable userfaultfd by default * Much faster than page faults and causes less problems * shader_recompiler: Add texel buffer multiplier * Fixes format mismatch assert when vsharp stride is multiple of format stride * shader_recompiler: Specialize UBOs on size * Some games can perform manual vertex pulling and thus bind read only buffers of varying size. We only recompile when the vsharp size is larger than size in shader, in opposite case its not needed * clang format
This commit is contained in:
parent
d05846a327
commit
9e618c0e0c
|
@ -875,6 +875,10 @@ target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAlloca
|
|||
target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
|
||||
target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
target_compile_definitions(shadps4 PRIVATE ENABLE_USERFAULTFD)
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF)
|
||||
if (USE_SYSTEM_VULKAN_LOADER)
|
||||
|
|
|
@ -326,7 +326,9 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
|||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
const auto& buffer = ctx.texture_buffers[handle];
|
||||
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
|
||||
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
|
||||
const Id coord =
|
||||
ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift),
|
||||
buffer.coord_offset);
|
||||
Id texel = buffer.is_storage ? ctx.OpImageRead(buffer.result_type, tex_buffer, coord)
|
||||
: ctx.OpImageFetch(buffer.result_type, tex_buffer, coord);
|
||||
if (buffer.is_integer) {
|
||||
|
@ -372,7 +374,9 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
const auto& buffer = ctx.texture_buffers[handle];
|
||||
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
|
||||
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
|
||||
const Id coord =
|
||||
ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift),
|
||||
buffer.coord_offset);
|
||||
if (buffer.is_integer) {
|
||||
value = ctx.OpBitcast(buffer.result_type, value);
|
||||
}
|
||||
|
|
|
@ -207,6 +207,8 @@ void EmitContext::DefineBufferOffsets() {
|
|||
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||
const Id value{OpLoad(U32[1], ptr)};
|
||||
tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U));
|
||||
tex_buffer.coord_shift =
|
||||
OpBitFieldUExtract(U32[1], value, ConstU32(offset + 6U), ConstU32(2U));
|
||||
Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -223,6 +223,7 @@ public:
|
|||
struct TextureBufferDefinition {
|
||||
Id id;
|
||||
Id coord_offset;
|
||||
Id coord_shift;
|
||||
u32 binding;
|
||||
Id image_type;
|
||||
Id result_type;
|
||||
|
|
|
@ -105,6 +105,11 @@ struct PushData {
|
|||
ASSERT(offset < 256 && binding < buf_offsets.size());
|
||||
buf_offsets[binding] = offset;
|
||||
}
|
||||
|
||||
void AddTexelOffset(u32 binding, u32 multiplier, u32 texel_offset) {
|
||||
ASSERT(texel_offset < 64 && multiplier < 16);
|
||||
buf_offsets[binding] = texel_offset | ((std::bit_width(multiplier) - 1) << 6);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(PushData) <= 128,
|
||||
"PushData size is greater than minimum size guaranteed by Vulkan spec");
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include "frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/backend/bindings.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
|
@ -22,8 +21,12 @@ struct VsAttribSpecialization {
|
|||
struct BufferSpecialization {
|
||||
u16 stride : 14;
|
||||
u16 is_storage : 1;
|
||||
u32 size = 0;
|
||||
|
||||
auto operator<=>(const BufferSpecialization&) const = default;
|
||||
bool operator==(const BufferSpecialization& other) const {
|
||||
return stride == other.stride && is_storage == other.is_storage &&
|
||||
(size >= other.is_storage || is_storage);
|
||||
}
|
||||
};
|
||||
|
||||
struct TextureBufferSpecialization {
|
||||
|
@ -86,6 +89,9 @@ struct StageSpecialization {
|
|||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.stride = sharp.GetStride();
|
||||
spec.is_storage = desc.IsStorage(sharp);
|
||||
if (!spec.is_storage) {
|
||||
spec.size = sharp.GetSize();
|
||||
}
|
||||
});
|
||||
ForEachSharp(binding, tex_buffers, info->texture_buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace VideoCore {
|
|||
constexpr size_t PAGESIZE = 4_KB;
|
||||
constexpr size_t PAGEBITS = 12;
|
||||
|
||||
#if ENABLE_USERFAULTFD
|
||||
#ifdef ENABLE_USERFAULTFD
|
||||
struct PageManager::Impl {
|
||||
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} {
|
||||
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||
|
|
|
@ -548,12 +548,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
|
||||
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
|
||||
const u32 buf_stride = vsharp.GetStride();
|
||||
ASSERT_MSG(buf_stride % fmt_stride == 0,
|
||||
"Texel buffer stride must match format stride");
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
|
||||
push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride);
|
||||
buffer_view =
|
||||
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
|
|
Loading…
Reference in a new issue