diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 57ea476f1..6ab213864 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -9,18 +9,33 @@ namespace Shader::Backend::SPIRV { Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { const Id shift_id{ctx.ConstU32(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; - return ctx.OpLoad(ctx.U32[1], pointer); + if (ctx.info.has_emulated_shared_memory) { + const Id pointer = + ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); + return ctx.OpLoad(ctx.U32[1], pointer); + } else { + const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); + return ctx.OpLoad(ctx.U32[1], pointer); + } } Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { const Id shift_id{ctx.ConstU32(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))}; - const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; - const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; - return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), - ctx.OpLoad(ctx.U32[1], rhs_pointer)); + if (ctx.info.has_emulated_shared_memory) { + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, + ctx.u32_zero_value, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, + ctx.u32_zero_value, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); + } else { + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); + } } Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { @@ -29,8 +44,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { std::array values{}; for (u32 i = 0; i < 4; ++i) { const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))}; - const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; - values[i] = ctx.OpLoad(ctx.U32[1], pointer); + if (ctx.info.has_emulated_shared_memory) { + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, + ctx.u32_zero_value, index)}; + values[i] = ctx.OpLoad(ctx.U32[1], pointer); + } else { + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + values[i] = ctx.OpLoad(ctx.U32[1], pointer); + } } return ctx.OpCompositeConstruct(ctx.U32[4], values); } @@ -38,18 +59,33 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { const Id shift{ctx.ConstU32(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; - const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); - ctx.OpStore(pointer, value); + if (ctx.info.has_emulated_shared_memory) { + const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, + ctx.u32_zero_value, word_offset); + ctx.OpStore(pointer, value); + } else { + const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); + ctx.OpStore(pointer, value); + } } void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { const Id shift{ctx.ConstU32(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))}; - const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; - const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; - ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); - ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); + if (ctx.info.has_emulated_shared_memory) { + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, + ctx.u32_zero_value, word_offset)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, + ctx.u32_zero_value, next_offset)}; + ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); + } else { + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; + ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); + } } void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { @@ -57,8 +93,14 @@ void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; for (u32 i = 0; i < 4; ++i) { const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))}; - const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; - ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); + if (ctx.info.has_emulated_shared_memory) { + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, + ctx.u32_zero_value, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); + } else { + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); + } } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index b0bf5aa0a..4c169823f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -65,17 +65,17 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace -EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, - const Info& info_, Bindings& binding_) +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, Info& info_, + Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); + DefineSharedMemory(); DefineBuffers(); DefineTextureBuffers(); DefineImagesAndSamplers(); - DefineSharedMemory(); } EmitContext::~EmitContext() = default; @@ -852,20 +852,45 @@ void EmitContext::DefineSharedMemory() { if (!info.uses_shared) { return; } + const u32 max_shared_memory_size = runtime_info.cs_info.max_shared_memory_size; u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; if (shared_memory_size == 0) { shared_memory_size = DefaultSharedMemSize; } - const u32 max_shared_memory_size = runtime_info.cs_info.max_shared_memory_size; - ASSERT(shared_memory_size <= max_shared_memory_size); - const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], ConstU32(num_elements))}; - shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); - shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); - shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); - interfaces.push_back(shared_memory_u32); + + if (shared_memory_size <= max_shared_memory_size) { + shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); + shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); + Name(shared_memory_u32, "shared_mem"); + interfaces.push_back(shared_memory_u32); + } else { + shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type); + shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); + + Decorate(type, spv::Decoration::ArrayStride, 4); + + const Id struct_type{TypeStruct(type)}; + Name(struct_type, "shared_memory_buf"); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + const Id ssbo_id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::StorageBuffer)}; + Decorate(ssbo_id, spv::Decoration::Binding, binding.unified++); + Decorate(ssbo_id, spv::Decoration::DescriptorSet, 0U); + Name(ssbo_id, "shared_mem_ssbo"); + + shared_memory_u32 = ssbo_id; + + info.has_emulated_shared_memory = true; + info.shared_memory_size = shared_memory_size; + interfaces.push_back(ssbo_id); + } } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index f552055c0..ab42ecc5b 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -37,7 +37,7 @@ struct VectorIds { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info, + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, Info& info, Bindings& binding); ~EmitContext(); @@ -132,7 +132,7 @@ public: return ConstantComposite(type, constituents); } - const Info& info; + Info& info; const RuntimeInfo& runtime_info; const Profile& profile; Stage stage; diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index a5187aebd..20b78e869 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -259,9 +259,9 @@ void GcnDecodeContext::updateInstructionMeta(InstEncoding encoding) { ASSERT_MSG(instFormat.src_type != ScalarType::Undefined && instFormat.dst_type != ScalarType::Undefined, - "Instruction format table incomplete for opcode {} ({}, encoding = {})", + "Instruction format table incomplete for opcode {} ({}, encoding = 0x{:x})", magic_enum::enum_name(m_instruction.opcode), u32(m_instruction.opcode), - magic_enum::enum_name(encoding)); + u32(encoding)); m_instruction.inst_class = instFormat.inst_class; m_instruction.category = instFormat.inst_category; diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 2cde30629..9469eaad7 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -207,7 +207,9 @@ struct Info { bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool translation_failed{}; // indicates that shader has unsupported instructions + bool has_emulated_shared_memory{}; bool has_readconst{}; + u32 shared_memory_size{}; u8 mrt_mask{0u}; bool has_fetch_shader{false}; u32 fetch_shader_sgpr_base{0u}; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 523e63497..456000cf0 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -100,6 +100,9 @@ struct StageSpecialization { }); } u32 binding{}; + if (info->has_emulated_shared_memory) { + binding++; + } if (info->has_readconst) { binding++; } @@ -195,9 +198,15 @@ struct StageSpecialization { } } u32 binding{}; + if (info->has_emulated_shared_memory != other.info->has_emulated_shared_memory) { + return false; + } if (info->has_readconst != other.info->has_readconst) { return false; } + if (info->has_emulated_shared_memory) { + binding++; + } if (info->has_readconst) { binding++; } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 11ad0e96f..c779c1c45 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -17,7 +17,7 @@ namespace VideoCore { -static constexpr size_t GdsBufferSize = 64_KB; +static constexpr size_t DataShareBufferSize = 64_KB; static constexpr size_t StagingBufferSize = 1_GB; static constexpr size_t UboStreamBufferSize = 64_MB; @@ -28,9 +28,11 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s texture_cache{texture_cache_}, tracker{tracker_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, - gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize}, + gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize}, + lds_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, DataShareBufferSize}, memory_tracker{&tracker} { Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); + Vulkan::SetObjectName(instance.GetDevice(), lds_buffer.Handle(), "LDS Buffer"); // Ensure the first slot is used for the null buffer const auto null_id = diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 575ee2c60..088c22c12 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -68,6 +68,11 @@ public: return &gds_buffer; } + /// Returns a pointer to LDS device local buffer. + [[nodiscard]] const Buffer* GetLdsBuffer() const noexcept { + return &lds_buffer; + } + /// Retrieves the buffer with the specified id. [[nodiscard]] Buffer& GetBuffer(BufferId id) { return slot_buffers[id]; @@ -154,6 +159,7 @@ private: StreamBuffer staging_buffer; StreamBuffer stream_buffer; Buffer gds_buffer; + Buffer lds_buffer; std::shared_mutex mutex; Common::SlotVector slot_buffers; RangeSet gpu_modified_ranges; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 23faacfc2..afa598fca 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -29,6 +29,14 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler u32 binding{}; boost::container::small_vector bindings; + if (info->has_emulated_shared_memory) { + bindings.push_back({ + .binding = binding++, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute, + }); + } if (info->has_readconst) { bindings.push_back({ .binding = binding++, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 88b510eca..d03972b3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -175,7 +175,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { const bool is_depth_clear = regs.depth_render_control.depth_clear_enable || texture_cache.IsMetaCleared(htile_address, slice); const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable; - ASSERT(desc.view_info.range.extent.layers == 1); + ASSERT(desc.view_info.range.extent.levels == 1); state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); @@ -554,6 +554,21 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding } } + // Bind a SSBO to act as shared memory in case of not being able to use a workgroup buffer + // (e.g. when the compute shared memory is bigger than the GPU's shared memory) + if (stage.has_emulated_shared_memory) { + const auto* lds_buf = buffer_cache.GetLdsBuffer(); + buffer_infos.emplace_back(lds_buf->Handle(), 0, lds_buf->SizeBytes()); + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &buffer_infos.back(), + }); + } + // Bind the flattened user data buffer as a UBO so it's accessible to the shader if (stage.has_readconst) { const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);