mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-29 01:38:24 +00:00
renderer_vulkan: use LDS buffer as SSBO on unsupported shared memory size
This commit is contained in:
parent
461148c227
commit
06fe4cff42
|
@ -9,18 +9,33 @@ namespace Shader::Backend::SPIRV {
|
||||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
if (ctx.info.has_emulated_shared_memory) {
|
||||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
const Id pointer =
|
||||||
|
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
||||||
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
} else {
|
||||||
|
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||||
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||||
const Id shift_id{ctx.ConstU32(2U)};
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
if (ctx.info.has_emulated_shared_memory) {
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
ctx.u32_zero_value, base_index)};
|
||||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||||
|
ctx.u32_zero_value, next_index)};
|
||||||
|
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||||
|
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||||
|
} else {
|
||||||
|
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||||
|
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||||
|
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||||
|
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||||
|
@ -29,8 +44,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||||
std::array<Id, 4> values{};
|
std::array<Id, 4> values{};
|
||||||
for (u32 i = 0; i < 4; ++i) {
|
for (u32 i = 0; i < 4; ++i) {
|
||||||
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
|
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
if (ctx.info.has_emulated_shared_memory) {
|
||||||
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||||
|
ctx.u32_zero_value, index)};
|
||||||
|
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
} else {
|
||||||
|
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||||
|
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
||||||
}
|
}
|
||||||
|
@ -38,18 +59,33 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id shift{ctx.ConstU32(2U)};
|
const Id shift{ctx.ConstU32(2U)};
|
||||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
if (ctx.info.has_emulated_shared_memory) {
|
||||||
ctx.OpStore(pointer, value);
|
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||||
|
ctx.u32_zero_value, word_offset);
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
} else {
|
||||||
|
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id shift{ctx.ConstU32(2U)};
|
const Id shift{ctx.ConstU32(2U)};
|
||||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
if (ctx.info.has_emulated_shared_memory) {
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
ctx.u32_zero_value, word_offset)};
|
||||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||||
|
ctx.u32_zero_value, next_offset)};
|
||||||
|
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||||
|
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||||
|
} else {
|
||||||
|
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||||
|
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||||
|
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||||
|
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
@ -57,8 +93,14 @@ void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
for (u32 i = 0; i < 4; ++i) {
|
for (u32 i = 0; i < 4; ++i) {
|
||||||
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
|
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
if (ctx.info.has_emulated_shared_memory) {
|
||||||
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
|
||||||
|
ctx.u32_zero_value, index)};
|
||||||
|
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
||||||
|
} else {
|
||||||
|
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||||
|
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,17 +65,17 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, Info& info_,
|
||||||
const Info& info_, Bindings& binding_)
|
Bindings& binding_)
|
||||||
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||||
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
||||||
AddCapability(spv::Capability::Shader);
|
AddCapability(spv::Capability::Shader);
|
||||||
DefineArithmeticTypes();
|
DefineArithmeticTypes();
|
||||||
DefineInterfaces();
|
DefineInterfaces();
|
||||||
|
DefineSharedMemory();
|
||||||
DefineBuffers();
|
DefineBuffers();
|
||||||
DefineTextureBuffers();
|
DefineTextureBuffers();
|
||||||
DefineImagesAndSamplers();
|
DefineImagesAndSamplers();
|
||||||
DefineSharedMemory();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitContext::~EmitContext() = default;
|
EmitContext::~EmitContext() = default;
|
||||||
|
@ -852,20 +852,45 @@ void EmitContext::DefineSharedMemory() {
|
||||||
if (!info.uses_shared) {
|
if (!info.uses_shared) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const u32 max_shared_memory_size = runtime_info.cs_info.max_shared_memory_size;
|
||||||
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||||
if (shared_memory_size == 0) {
|
if (shared_memory_size == 0) {
|
||||||
shared_memory_size = DefaultSharedMemSize;
|
shared_memory_size = DefaultSharedMemSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 max_shared_memory_size = runtime_info.cs_info.max_shared_memory_size;
|
|
||||||
ASSERT(shared_memory_size <= max_shared_memory_size);
|
|
||||||
|
|
||||||
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
||||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
|
||||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
if (shared_memory_size <= max_shared_memory_size) {
|
||||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||||
interfaces.push_back(shared_memory_u32);
|
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||||
|
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||||
|
Name(shared_memory_u32, "shared_mem");
|
||||||
|
interfaces.push_back(shared_memory_u32);
|
||||||
|
} else {
|
||||||
|
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
|
||||||
|
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
||||||
|
|
||||||
|
Decorate(type, spv::Decoration::ArrayStride, 4);
|
||||||
|
|
||||||
|
const Id struct_type{TypeStruct(type)};
|
||||||
|
Name(struct_type, "shared_memory_buf");
|
||||||
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
|
MemberName(struct_type, 0, "data");
|
||||||
|
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||||
|
|
||||||
|
const Id struct_pointer_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
|
||||||
|
const Id ssbo_id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::StorageBuffer)};
|
||||||
|
Decorate(ssbo_id, spv::Decoration::Binding, binding.unified++);
|
||||||
|
Decorate(ssbo_id, spv::Decoration::DescriptorSet, 0U);
|
||||||
|
Name(ssbo_id, "shared_mem_ssbo");
|
||||||
|
|
||||||
|
shared_memory_u32 = ssbo_id;
|
||||||
|
|
||||||
|
info.has_emulated_shared_memory = true;
|
||||||
|
info.shared_memory_size = shared_memory_size;
|
||||||
|
interfaces.push_back(ssbo_id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -37,7 +37,7 @@ struct VectorIds {
|
||||||
|
|
||||||
class EmitContext final : public Sirit::Module {
|
class EmitContext final : public Sirit::Module {
|
||||||
public:
|
public:
|
||||||
explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info,
|
explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, Info& info,
|
||||||
Bindings& binding);
|
Bindings& binding);
|
||||||
~EmitContext();
|
~EmitContext();
|
||||||
|
|
||||||
|
@ -132,7 +132,7 @@ public:
|
||||||
return ConstantComposite(type, constituents);
|
return ConstantComposite(type, constituents);
|
||||||
}
|
}
|
||||||
|
|
||||||
const Info& info;
|
Info& info;
|
||||||
const RuntimeInfo& runtime_info;
|
const RuntimeInfo& runtime_info;
|
||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
Stage stage;
|
Stage stage;
|
||||||
|
|
|
@ -259,9 +259,9 @@ void GcnDecodeContext::updateInstructionMeta(InstEncoding encoding) {
|
||||||
|
|
||||||
ASSERT_MSG(instFormat.src_type != ScalarType::Undefined &&
|
ASSERT_MSG(instFormat.src_type != ScalarType::Undefined &&
|
||||||
instFormat.dst_type != ScalarType::Undefined,
|
instFormat.dst_type != ScalarType::Undefined,
|
||||||
"Instruction format table incomplete for opcode {} ({}, encoding = {})",
|
"Instruction format table incomplete for opcode {} ({}, encoding = 0x{:x})",
|
||||||
magic_enum::enum_name(m_instruction.opcode), u32(m_instruction.opcode),
|
magic_enum::enum_name(m_instruction.opcode), u32(m_instruction.opcode),
|
||||||
magic_enum::enum_name(encoding));
|
u32(encoding));
|
||||||
|
|
||||||
m_instruction.inst_class = instFormat.inst_class;
|
m_instruction.inst_class = instFormat.inst_class;
|
||||||
m_instruction.category = instFormat.inst_category;
|
m_instruction.category = instFormat.inst_category;
|
||||||
|
|
|
@ -207,7 +207,9 @@ struct Info {
|
||||||
bool stores_tess_level_outer{};
|
bool stores_tess_level_outer{};
|
||||||
bool stores_tess_level_inner{};
|
bool stores_tess_level_inner{};
|
||||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||||
|
bool has_emulated_shared_memory{};
|
||||||
bool has_readconst{};
|
bool has_readconst{};
|
||||||
|
u32 shared_memory_size{};
|
||||||
u8 mrt_mask{0u};
|
u8 mrt_mask{0u};
|
||||||
bool has_fetch_shader{false};
|
bool has_fetch_shader{false};
|
||||||
u32 fetch_shader_sgpr_base{0u};
|
u32 fetch_shader_sgpr_base{0u};
|
||||||
|
|
|
@ -100,6 +100,9 @@ struct StageSpecialization {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
if (info->has_emulated_shared_memory) {
|
||||||
|
binding++;
|
||||||
|
}
|
||||||
if (info->has_readconst) {
|
if (info->has_readconst) {
|
||||||
binding++;
|
binding++;
|
||||||
}
|
}
|
||||||
|
@ -195,9 +198,15 @@ struct StageSpecialization {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
if (info->has_emulated_shared_memory != other.info->has_emulated_shared_memory) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (info->has_readconst != other.info->has_readconst) {
|
if (info->has_readconst != other.info->has_readconst) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (info->has_emulated_shared_memory) {
|
||||||
|
binding++;
|
||||||
|
}
|
||||||
if (info->has_readconst) {
|
if (info->has_readconst) {
|
||||||
binding++;
|
binding++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
static constexpr size_t GdsBufferSize = 64_KB;
|
static constexpr size_t DataShareBufferSize = 64_KB;
|
||||||
static constexpr size_t StagingBufferSize = 1_GB;
|
static constexpr size_t StagingBufferSize = 1_GB;
|
||||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||||
|
|
||||||
|
@ -28,9 +28,11 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||||
texture_cache{texture_cache_}, tracker{tracker_},
|
texture_cache{texture_cache_}, tracker{tracker_},
|
||||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize},
|
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||||
|
lds_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, DataShareBufferSize},
|
||||||
memory_tracker{&tracker} {
|
memory_tracker{&tracker} {
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||||
|
Vulkan::SetObjectName(instance.GetDevice(), lds_buffer.Handle(), "LDS Buffer");
|
||||||
|
|
||||||
// Ensure the first slot is used for the null buffer
|
// Ensure the first slot is used for the null buffer
|
||||||
const auto null_id =
|
const auto null_id =
|
||||||
|
|
|
@ -68,6 +68,11 @@ public:
|
||||||
return &gds_buffer;
|
return &gds_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a pointer to LDS device local buffer.
|
||||||
|
[[nodiscard]] const Buffer* GetLdsBuffer() const noexcept {
|
||||||
|
return &lds_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
/// Retrieves the buffer with the specified id.
|
/// Retrieves the buffer with the specified id.
|
||||||
[[nodiscard]] Buffer& GetBuffer(BufferId id) {
|
[[nodiscard]] Buffer& GetBuffer(BufferId id) {
|
||||||
return slot_buffers[id];
|
return slot_buffers[id];
|
||||||
|
@ -154,6 +159,7 @@ private:
|
||||||
StreamBuffer staging_buffer;
|
StreamBuffer staging_buffer;
|
||||||
StreamBuffer stream_buffer;
|
StreamBuffer stream_buffer;
|
||||||
Buffer gds_buffer;
|
Buffer gds_buffer;
|
||||||
|
Buffer lds_buffer;
|
||||||
std::shared_mutex mutex;
|
std::shared_mutex mutex;
|
||||||
Common::SlotVector<Buffer> slot_buffers;
|
Common::SlotVector<Buffer> slot_buffers;
|
||||||
RangeSet gpu_modified_ranges;
|
RangeSet gpu_modified_ranges;
|
||||||
|
|
|
@ -29,6 +29,14 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||||
|
|
||||||
|
if (info->has_emulated_shared_memory) {
|
||||||
|
bindings.push_back({
|
||||||
|
.binding = binding++,
|
||||||
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||||
|
});
|
||||||
|
}
|
||||||
if (info->has_readconst) {
|
if (info->has_readconst) {
|
||||||
bindings.push_back({
|
bindings.push_back({
|
||||||
.binding = binding++,
|
.binding = binding++,
|
||||||
|
|
|
@ -175,7 +175,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
||||||
const bool is_depth_clear = regs.depth_render_control.depth_clear_enable ||
|
const bool is_depth_clear = regs.depth_render_control.depth_clear_enable ||
|
||||||
texture_cache.IsMetaCleared(htile_address, slice);
|
texture_cache.IsMetaCleared(htile_address, slice);
|
||||||
const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable;
|
const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable;
|
||||||
ASSERT(desc.view_info.range.extent.layers == 1);
|
ASSERT(desc.view_info.range.extent.levels == 1);
|
||||||
|
|
||||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||||
|
@ -554,6 +554,21 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bind a SSBO to act as shared memory in case of not being able to use a workgroup buffer
|
||||||
|
// (e.g. when the compute shared memory is bigger than the GPU's shared memory)
|
||||||
|
if (stage.has_emulated_shared_memory) {
|
||||||
|
const auto* lds_buf = buffer_cache.GetLdsBuffer();
|
||||||
|
buffer_infos.emplace_back(lds_buf->Handle(), 0, lds_buf->SizeBytes());
|
||||||
|
set_writes.push_back({
|
||||||
|
.dstSet = VK_NULL_HANDLE,
|
||||||
|
.dstBinding = binding.unified++,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||||
|
.pBufferInfo = &buffer_infos.back(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
|
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
|
||||||
if (stage.has_readconst) {
|
if (stage.has_readconst) {
|
||||||
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
|
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
|
||||||
|
|
Loading…
Reference in a new issue