video_core: Add fallback path for pipelines with more than 32 bindings (#837)

* video_core: Small fixes

* renderer_vulkan: Add fallback path for pipelines with more than 32 bindings

* vk_resource_pool: Rewrite desc heap

* work
This commit is contained in:
TheTurtle 2024-09-10 20:54:39 +03:00 committed by GitHub
parent 96bf77e680
commit af7e826dee
27 changed files with 223 additions and 148 deletions

View file

@ -2155,6 +2155,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
int PS4_SYSV_ABI sceGnmSubmitDone() { int PS4_SYSV_ABI sceGnmSubmitDone() {
LOG_DEBUG(Lib_GnmDriver, "called"); LOG_DEBUG(Lib_GnmDriver, "called");
WaitGpuIdle();
if (!liverpool->IsGpuIdle()) { if (!liverpool->IsGpuIdle()) {
submission_lock = true; submission_lock = true;
} }

View file

@ -26,7 +26,6 @@
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/libraries/ngs2/ngs2.h" #include "core/libraries/ngs2/ngs2.h"
#include "core/libraries/rtc/rtc.h" #include "core/libraries/rtc/rtc.h"
#include "core/libraries/videoout/video_out.h"
#include "core/linker.h" #include "core/linker.h"
#include "core/memory.h" #include "core/memory.h"
#include "emulator.h" #include "emulator.h"

View file

@ -208,6 +208,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
if (info.uses_group_quad) { if (info.uses_group_quad) {
ctx.AddCapability(spv::Capability::GroupNonUniformQuad); ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
} }
if (info.uses_group_ballot) {
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
}
switch (program.info.stage) { switch (program.info.stage) {
case Stage::Compute: { case Stage::Compute: {
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};

View file

@ -305,7 +305,7 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
if (buffer.is_integer) { if (buffer.is_integer) {
value = ctx.OpBitcast(ctx.U32[4], value); value = ctx.OpBitcast(ctx.S32[4], value);
} }
ctx.OpImageWrite(tex_buffer, coord, value); ctx.OpImageWrite(tex_buffer, coord, value);
} }

View file

@ -27,7 +27,8 @@ Id EmitReadFirstLane(EmitContext& ctx, Id value) {
} }
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) { Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) {
UNREACHABLE(); return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value,
ctx.ConstU32(lane));
} }
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) { Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {

View file

@ -324,16 +324,18 @@ void EmitContext::DefineOutputs() {
void EmitContext::DefinePushDataBlock() { void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates // Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4]), "AuxData")}; const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4]), "AuxData")};
Decorate(struct_type, spv::Decoration::Block); Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0"); MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1"); MemberName(struct_type, 1, "sr1");
MemberName(struct_type, 2, "buf_offsets0"); MemberName(struct_type, 2, "buf_offsets0");
MemberName(struct_type, 3, "buf_offsets1"); MemberName(struct_type, 3, "buf_offsets1");
MemberName(struct_type, 4, "buf_offsets2");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U); MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U); MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U);
MemberDecorate(struct_type, 4, spv::Decoration::Offset, 40U);
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(push_data_block, "push_data"); Name(push_data_block, "push_data");
interfaces.push_back(push_data_block); interfaces.push_back(push_data_block);

View file

@ -171,7 +171,7 @@ T Translator::GetSrc(const InstOperand& operand) {
} }
} else { } else {
if (operand.input_modifier.abs) { if (operand.input_modifier.abs) {
LOG_WARNING(Render_Vulkan, "Input abs modifier on integer instruction"); value = ir.IAbs(value);
} }
if (operand.input_modifier.neg) { if (operand.input_modifier.neg) {
UNREACHABLE(); UNREACHABLE();

View file

@ -117,6 +117,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_ATOMIC(AtomicOp::Add, inst); return BUFFER_ATOMIC(AtomicOp::Add, inst);
case Opcode::BUFFER_ATOMIC_SWAP: case Opcode::BUFFER_ATOMIC_SWAP:
return BUFFER_ATOMIC(AtomicOp::Swap, inst); return BUFFER_ATOMIC(AtomicOp::Swap, inst);
case Opcode::BUFFER_ATOMIC_UMIN:
return BUFFER_ATOMIC(AtomicOp::Umin, inst);
case Opcode::BUFFER_ATOMIC_UMAX:
return BUFFER_ATOMIC(AtomicOp::Umax, inst);
default: default:
LogMissingOpcode(inst); LogMissingOpcode(inst);
} }
@ -280,6 +284,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
info.has_bias.Assign(flags.test(MimgModifier::LodBias)); info.has_bias.Assign(flags.test(MimgModifier::LodBias));
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
info.force_level0.Assign(flags.test(MimgModifier::Level0)); info.force_level0.Assign(flags.test(MimgModifier::Level0));
info.has_offset.Assign(flags.test(MimgModifier::Offset));
// info.explicit_lod.Assign(explicit_lod); // info.explicit_lod.Assign(explicit_lod);
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);

View file

@ -1,6 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <span> #include <span>
@ -89,7 +88,7 @@ struct PushData {
u32 step0; u32 step0;
u32 step1; u32 step1;
std::array<u8, 32> buf_offsets; std::array<u8, 48> buf_offsets;
void AddOffset(u32 binding, u32 offset) { void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size()); ASSERT(offset < 256 && binding < buf_offsets.size());
@ -166,6 +165,7 @@ struct Info {
bool has_image_query{}; bool has_image_query{};
bool uses_lane_id{}; bool uses_lane_id{};
bool uses_group_quad{}; bool uses_group_quad{};
bool uses_group_ballot{};
bool uses_shared{}; bool uses_shared{};
bool uses_fp16{}; bool uses_fp16{};
bool uses_step_rates{}; bool uses_step_rates{};
@ -181,6 +181,7 @@ struct Info {
const u32* base = user_data.data(); const u32* base = user_data.data();
if (ptr_index != IR::NumScalarRegs) { if (ptr_index != IR::NumScalarRegs) {
std::memcpy(&base, &user_data[ptr_index], sizeof(base)); std::memcpy(&base, &user_data[ptr_index], sizeof(base));
base = reinterpret_cast<const u32*>(VAddr(base) & 0xFFFFFFFFFFFFULL);
} }
std::memcpy(&data, base + dword_offset, sizeof(T)); std::memcpy(&data, base + dword_offset, sizeof(T));
return data; return data;

View file

@ -21,8 +21,7 @@ void LowerSharedMemToRegisters(IR::Program& program) {
const IR::Inst* prod = inst.Arg(0).InstRecursive(); const IR::Inst* prod = inst.Arg(0).InstRecursive();
const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) { const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
const IR::Inst* write_prod = write->Arg(0).InstRecursive(); const IR::Inst* write_prod = write->Arg(0).InstRecursive();
return write_prod->Arg(1).U32() == prod->Arg(1).U32() && return write_prod->Arg(1).U32() == prod->Arg(1).U32();
write_prod->Arg(0) == prod->Arg(0);
}); });
ASSERT(it != ds_writes.end()); ASSERT(it != ds_writes.end());
// Replace data read with value written. // Replace data read with value written.

View file

@ -98,22 +98,7 @@ bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
} }
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
switch (inst.GetOpcode()) { return IR::Type::U32;
case IR::Opcode::LoadBufferU32:
case IR::Opcode::LoadBufferU32x2:
case IR::Opcode::LoadBufferU32x3:
case IR::Opcode::LoadBufferU32x4:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicSwap32:
return IR::Type::U32;
default:
UNREACHABLE();
}
} }
bool IsImageAtomicInstruction(const IR::Inst& inst) { bool IsImageAtomicInstruction(const IR::Inst& inst) {
@ -223,12 +208,8 @@ public:
u32 Add(const SamplerResource& desc) { u32 Add(const SamplerResource& desc) {
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) { const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
if (desc.sgpr_base == existing.sgpr_base && return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset) { desc.dword_offset == existing.dword_offset;
return true;
}
// Samplers with different bindings might still be the same.
return existing.GetSharp(info) == desc.GetSharp(info);
})}; })};
return index; return index;
} }

View file

@ -39,6 +39,11 @@ void Visit(Info& info, IR::Inst& inst) {
case IR::Opcode::QuadShuffle: case IR::Opcode::QuadShuffle:
info.uses_group_quad = true; info.uses_group_quad = true;
break; break;
case IR::Opcode::ReadLane:
case IR::Opcode::ReadFirstLane:
case IR::Opcode::WriteLane:
info.uses_group_ballot = true;
break;
case IR::Opcode::Discard: case IR::Opcode::Discard:
case IR::Opcode::DiscardCond: case IR::Opcode::DiscardCond:
info.has_discard = true; info.has_discard = true;

View file

@ -37,14 +37,14 @@ struct ImageSpecialization {
* after the first compilation of a module. * after the first compilation of a module.
*/ */
struct StageSpecialization { struct StageSpecialization {
static constexpr size_t MaxStageResources = 32; static constexpr size_t MaxStageResources = 64;
const Shader::Info* info; const Shader::Info* info;
RuntimeInfo runtime_info; RuntimeInfo runtime_info;
std::bitset<MaxStageResources> bitset{}; std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<BufferSpecialization, 16> buffers; boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers; boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 8> images; boost::container::small_vector<ImageSpecialization, 16> images;
u32 start_binding{}; u32 start_binding{};
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,

View file

@ -187,6 +187,11 @@ struct PM4CmdSetData {
BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+ BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+
///< Program to zero for other opcodes and on SI ///< Program to zero for other opcodes and on SI
}; };
u32 data[0];
[[nodiscard]] u32 Size() const {
return header.count << 2u;
}
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args> template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) { static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) {
@ -350,6 +355,16 @@ struct PM4CmdEventWriteEop {
} }
}; };
struct PM4CmdAcquireMem {
PM4Type3Header header;
u32 cp_coher_cntl;
u32 cp_coher_size_lo;
u32 cp_coher_size_hi;
u32 cp_coher_base_lo;
u32 cp_coher_base_hi;
u32 poll_interval;
};
enum class DmaDataDst : u32 { enum class DmaDataDst : u32 {
Memory = 0, Memory = 0,
Gds = 1, Gds = 1,
@ -467,6 +482,10 @@ struct PM4CmdWriteData {
}; };
u32 data[0]; u32 data[0];
u32 Size() const {
return (header.count.Value() - 2) * 4;
}
template <typename T> template <typename T>
void Address(T addr) { void Address(T addr) {
addr64 = static_cast<u64>(addr); addr64 = static_cast<u64>(addr);

View file

@ -577,9 +577,6 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
return false; return false;
} }
Image& image = texture_cache.GetImage(image_id); Image& image = texture_cache.GetImage(image_id);
if (image.info.guest_size_bytes > size) {
return false;
}
boost::container::small_vector<vk::BufferImageCopy, 8> copies; boost::container::small_vector<vk::BufferImageCopy, 8> copies;
u32 offset = buffer.Offset(image.cpu_addr); u32 offset = buffer.Offset(image.cpu_addr);
const u32 num_layers = image.info.resources.layers; const u32 num_layers = image.info.resources.layers;
@ -604,11 +601,13 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
}); });
offset += mip_ofs * num_layers; offset += mip_ofs * num_layers;
} }
scheduler.EndRendering(); if (!copies.empty()) {
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, const auto cmdbuf = scheduler.CommandBuffer();
copies); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
copies);
}
return true; return true;
} }

View file

@ -12,9 +12,11 @@
namespace Vulkan { namespace Vulkan {
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_, ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
vk::PipelineCache pipeline_cache, u64 compute_key_, DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache,
const Shader::Info& info_, vk::ShaderModule module) u64 compute_key_, const Shader::Info& info_,
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{&info_} { vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, compute_key{compute_key_},
info{&info_} {
const vk::PipelineShaderStageCreateInfo shader_ci = { const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute, .stage = vk::ShaderStageFlagBits::eCompute,
.module = module, .module = module,
@ -66,8 +68,12 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
.size = sizeof(Shader::PushData), .size = sizeof(Shader::PushData),
}; };
uses_push_descriptors = binding < instance.MaxPushDescriptors();
const auto flags = uses_push_descriptors
? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR
: vk::DescriptorSetLayoutCreateFlagBits{};
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, .flags = flags,
.bindingCount = static_cast<u32>(bindings.size()), .bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(), .pBindings = bindings.data(),
}; };
@ -101,8 +107,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const { VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures. // Bind resource buffers and textures.
boost::container::static_vector<vk::BufferView, 8> buffer_views; boost::container::static_vector<vk::BufferView, 8> buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos; boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos; boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes; boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers; boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
Shader::PushData push_data{}; Shader::PushData push_data{};
@ -265,9 +271,21 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
cmdbuf.pipelineBarrier2(dependencies); cmdbuf.pipelineBarrier2(dependencies);
} }
if (uses_push_descriptors) {
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
set_writes);
} else {
const auto desc_set = desc_heap.Commit(*desc_layout);
for (auto& set_write : set_writes) {
set_write.dstSet = desc_set;
}
instance.GetDevice().updateDescriptorSets(set_writes, {});
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set,
{});
}
cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data), cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
&push_data); &push_data);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes);
return true; return true;
} }

View file

@ -16,12 +16,13 @@ namespace Vulkan {
class Instance; class Instance;
class Scheduler; class Scheduler;
class DescriptorHeap;
class ComputePipeline { class ComputePipeline {
public: public:
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler, explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
vk::PipelineCache pipeline_cache, u64 compute_key, DescriptorHeap& desc_heap, vk::PipelineCache pipeline_cache,
const Shader::Info& info, vk::ShaderModule module); u64 compute_key, const Shader::Info& info, vk::ShaderModule module);
~ComputePipeline(); ~ComputePipeline();
[[nodiscard]] vk::Pipeline Handle() const noexcept { [[nodiscard]] vk::Pipeline Handle() const noexcept {
@ -34,11 +35,13 @@ public:
private: private:
const Instance& instance; const Instance& instance;
Scheduler& scheduler; Scheduler& scheduler;
DescriptorHeap& desc_heap;
vk::UniquePipeline pipeline; vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout; vk::UniqueDescriptorSetLayout desc_layout;
u64 compute_key; u64 compute_key;
const Shader::Info* info; const Shader::Info* info;
bool uses_push_descriptors{};
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -17,11 +17,11 @@
namespace Vulkan { namespace Vulkan {
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
const GraphicsPipelineKey& key_, DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
vk::PipelineCache pipeline_cache, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos, std::span<const Shader::Info*, MaxShaderStages> infos,
std::span<const vk::ShaderModule> modules) std::span<const vk::ShaderModule> modules)
: instance{instance_}, scheduler{scheduler_}, key{key_} { : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, key{key_} {
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
std::ranges::copy(infos, stages.begin()); std::ranges::copy(infos, stages.begin());
BuildDescSetLayout(); BuildDescSetLayout();
@ -301,7 +301,6 @@ GraphicsPipeline::~GraphicsPipeline() = default;
void GraphicsPipeline::BuildDescSetLayout() { void GraphicsPipeline::BuildDescSetLayout() {
u32 binding{}; u32 binding{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
for (const auto* stage : stages) { for (const auto* stage : stages) {
if (!stage) { if (!stage) {
continue; continue;
@ -343,8 +342,12 @@ void GraphicsPipeline::BuildDescSetLayout() {
}); });
} }
} }
uses_push_descriptors = binding < instance.MaxPushDescriptors();
const auto flags = uses_push_descriptors
? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR
: vk::DescriptorSetLayoutCreateFlagBits{};
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, .flags = flags,
.bindingCount = static_cast<u32>(bindings.size()), .bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(), .pBindings = bindings.data(),
}; };
@ -446,10 +449,10 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
}); });
} }
boost::container::static_vector<AmdGpu::Image, 16> tsharps; boost::container::static_vector<AmdGpu::Image, 32> tsharps;
for (const auto& image_desc : stage->images) { for (const auto& image_desc : stage->images) {
const auto tsharp = image_desc.GetSharp(*stage); const auto tsharp = image_desc.GetSharp(*stage);
if (tsharp) { if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
tsharps.emplace_back(tsharp); tsharps.emplace_back(tsharp);
VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth};
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
@ -510,8 +513,18 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
} }
if (!set_writes.empty()) { if (!set_writes.empty()) {
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, if (uses_push_descriptors) {
set_writes); cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
set_writes);
} else {
const auto desc_set = desc_heap.Commit(*desc_layout);
for (auto& set_write : set_writes) {
set_write.dstSet = desc_set;
}
instance.GetDevice().updateDescriptorSets(set_writes, {});
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
desc_set, {});
}
} }
cmdbuf.pushConstants(*pipeline_layout, cmdbuf.pushConstants(*pipeline_layout,
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0U, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0U,

View file

@ -19,6 +19,7 @@ static constexpr u32 MaxShaderStages = 5;
class Instance; class Instance;
class Scheduler; class Scheduler;
class DescriptorHeap;
using Liverpool = AmdGpu::Liverpool; using Liverpool = AmdGpu::Liverpool;
@ -59,7 +60,8 @@ struct GraphicsPipelineKey {
class GraphicsPipeline { class GraphicsPipeline {
public: public:
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler, explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, DescriptorHeap& desc_heap, const GraphicsPipelineKey& key,
vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> stages, std::span<const Shader::Info*, MaxShaderStages> stages,
std::span<const vk::ShaderModule> modules); std::span<const vk::ShaderModule> modules);
~GraphicsPipeline(); ~GraphicsPipeline();
@ -98,11 +100,14 @@ private:
private: private:
const Instance& instance; const Instance& instance;
Scheduler& scheduler; Scheduler& scheduler;
DescriptorHeap& desc_heap;
vk::UniquePipeline pipeline; vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout; vk::UniqueDescriptorSetLayout desc_layout;
std::array<const Shader::Info*, MaxShaderStages> stages{}; std::array<const Shader::Info*, MaxShaderStages> stages{};
GraphicsPipelineKey key; GraphicsPipelineKey key;
bool uses_push_descriptors{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -176,8 +176,10 @@ bool Instance::CreateDevice() {
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
const vk::StructureChain properties_chain = physical_device.getProperties2< const vk::StructureChain properties_chain = physical_device.getProperties2<
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR, vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>(); vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties,
vk::PhysicalDevicePushDescriptorPropertiesKHR>();
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize; subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
push_descriptor_props = properties_chain.get<vk::PhysicalDevicePushDescriptorPropertiesKHR>();
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size); LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
features = feature_chain.get().features; features = feature_chain.get().features;

View file

@ -207,6 +207,11 @@ public:
return properties.limits.maxTexelBufferElements; return properties.limits.maxTexelBufferElements;
} }
/// Returns the maximum number of push descriptors.
u32 MaxPushDescriptors() const {
return push_descriptor_props.maxPushDescriptors;
}
/// Returns true if shaders can declare the ClipDistance attribute /// Returns true if shaders can declare the ClipDistance attribute
bool IsShaderClipDistanceSupported() const { bool IsShaderClipDistanceSupported() const {
return features.shaderClipDistance; return features.shaderClipDistance;
@ -242,6 +247,7 @@ private:
vk::PhysicalDevice physical_device; vk::PhysicalDevice physical_device;
vk::UniqueDevice device; vk::UniqueDevice device;
vk::PhysicalDeviceProperties properties; vk::PhysicalDeviceProperties properties;
vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
vk::PhysicalDeviceFeatures features; vk::PhysicalDeviceFeatures features;
vk::DriverIdKHR driver_id; vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback{}; vk::UniqueDebugUtilsMessengerEXT debug_callback{};

View file

@ -24,6 +24,15 @@ using Shader::VsOutput;
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
} }
constexpr static std::array DescriptorHeapSizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 8192},
vk::DescriptorPoolSize{vk::DescriptorType::eStorageBuffer, 1024},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 128},
vk::DescriptorPoolSize{vk::DescriptorType::eStorageTexelBuffer, 128},
vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 8192},
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
};
void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
const AmdGpu::Liverpool::VsOutputControl& ctl) { const AmdGpu::Liverpool::VsOutputControl& ctl) {
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
@ -120,7 +129,8 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_) AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_} { : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
desc_heap{instance, scheduler.GetMasterSemaphore(), DescriptorHeapSizes} {
profile = Shader::Profile{ profile = Shader::Profile{
.supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
.subgroup_size = instance.SubgroupSize(), .subgroup_size = instance.SubgroupSize(),
@ -153,8 +163,8 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
} }
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) { if (is_new) {
it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, it.value() = std::make_unique<GraphicsPipeline>(
*pipeline_cache, infos, modules); instance, scheduler, desc_heap, graphics_key, *pipeline_cache, infos, modules);
} }
const GraphicsPipeline* pipeline = it->second.get(); const GraphicsPipeline* pipeline = it->second.get();
return pipeline; return pipeline;
@ -166,8 +176,8 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
} }
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
if (is_new) { if (is_new) {
it.value() = std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, it.value() = std::make_unique<ComputePipeline>(
compute_key, *infos[0], modules[0]); instance, scheduler, desc_heap, *pipeline_cache, compute_key, *infos[0], modules[0]);
} }
const ComputePipeline* pipeline = it->second.get(); const ComputePipeline* pipeline = it->second.get();
return pipeline; return pipeline;

View file

@ -9,6 +9,7 @@
#include "shader_recompiler/specialization.h" #include "shader_recompiler/specialization.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Shader { namespace Shader {
struct Info; struct Info;
@ -66,6 +67,7 @@ private:
const Instance& instance; const Instance& instance;
Scheduler& scheduler; Scheduler& scheduler;
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
DescriptorHeap desc_heap;
vk::UniquePipelineCache pipeline_cache; vk::UniquePipelineCache pipeline_cache;
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
Shader::Profile profile{}; Shader::Profile profile{};

View file

@ -3,8 +3,8 @@
#include <cstddef> #include <cstddef>
#include <optional> #include <optional>
#include <unordered_map>
#include "common/assert.h" #include "common/assert.h"
#include "common/scope_exit.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_resource_pool.h"
@ -103,88 +103,86 @@ vk::CommandBuffer CommandPool::Commit() {
return cmd_buffers[index]; return cmd_buffers[index];
} }
constexpr u32 DESCRIPTOR_SET_BATCH = 32; DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore_,
std::span<const vk::DescriptorPoolSize> pool_sizes_,
DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
std::span<const vk::DescriptorSetLayoutBinding> bindings,
u32 descriptor_heap_count_) u32 descriptor_heap_count_)
: ResourcePool{master_semaphore, DESCRIPTOR_SET_BATCH}, device{instance.GetDevice()}, : device{instance.GetDevice()}, master_semaphore{master_semaphore_},
descriptor_heap_count{descriptor_heap_count_} { descriptor_heap_count{descriptor_heap_count_}, pool_sizes{pool_sizes_} {
// Create descriptor set layout. CreateDescriptorPool();
const vk::DescriptorSetLayoutCreateInfo layout_ci = {
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
};
descriptor_set_layout = device.createDescriptorSetLayoutUnique(layout_ci);
if (instance.HasDebuggingToolAttached()) {
SetObjectName(device, *descriptor_set_layout, "DescriptorSetLayout");
}
// Build descriptor set pool counts.
std::unordered_map<vk::DescriptorType, u16> descriptor_type_counts;
for (const auto& binding : bindings) {
descriptor_type_counts[binding.descriptorType] += binding.descriptorCount;
}
for (const auto& [type, count] : descriptor_type_counts) {
auto& pool_size = pool_sizes.emplace_back();
pool_size.descriptorCount = count * descriptor_heap_count;
pool_size.type = type;
}
// Create descriptor pool
AppendDescriptorPool();
} }
DescriptorHeap::~DescriptorHeap() = default; DescriptorHeap::~DescriptorHeap() {
device.destroyDescriptorPool(curr_pool);
for (const auto [pool, tick] : pending_pools) {
master_semaphore->Wait(tick);
device.destroyDescriptorPool(pool);
}
}
void DescriptorHeap::Allocate(std::size_t begin, std::size_t end) { vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) {
ASSERT(end - begin == DESCRIPTOR_SET_BATCH); const u64 set_key = std::bit_cast<u64>(set_layout);
descriptor_sets.resize(end); const auto [it, _] = descriptor_sets.try_emplace(set_key);
hashes.resize(end);
std::array<vk::DescriptorSetLayout, DESCRIPTOR_SET_BATCH> layouts; // Check if allocated sets exist and pick one.
layouts.fill(*descriptor_set_layout); if (!it->second.empty()) {
const auto desc_set = it->second.back();
it.value().pop_back();
return desc_set;
}
DescSetBatch desc_sets(DescriptorSetBatch);
std::array<vk::DescriptorSetLayout, DescriptorSetBatch> layouts;
layouts.fill(set_layout);
u32 current_pool = 0;
vk::DescriptorSetAllocateInfo alloc_info = { vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = *pools[current_pool], .descriptorPool = curr_pool,
.descriptorSetCount = DESCRIPTOR_SET_BATCH, .descriptorSetCount = DescriptorSetBatch,
.pSetLayouts = layouts.data(), .pSetLayouts = layouts.data(),
}; };
// Attempt to allocate the descriptor set batch. If the pool has run out of space, use a new // Attempt to allocate the descriptor set batch.
// one. auto result = device.allocateDescriptorSets(&alloc_info, desc_sets.data());
while (true) { if (result == vk::Result::eSuccess) {
const auto result = const auto desc_set = desc_sets.back();
device.allocateDescriptorSets(&alloc_info, descriptor_sets.data() + begin); desc_sets.pop_back();
if (result == vk::Result::eSuccess) { it.value() = std::move(desc_sets);
break; return desc_set;
}
if (result == vk::Result::eErrorOutOfPoolMemory) {
current_pool++;
if (current_pool == pools.size()) {
LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!");
AppendDescriptorPool();
}
alloc_info.descriptorPool = *pools[current_pool];
}
} }
// The pool has run out. Record current tick and place it in pending list.
ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory,
"Unexpected error during descriptor set allocation {}", vk::to_string(result));
pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick());
if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) {
curr_pool = pool;
pending_pools.pop_front();
device.resetDescriptorPool(curr_pool);
} else {
CreateDescriptorPool();
}
// Attempt to allocate again with fresh pool.
alloc_info.descriptorPool = curr_pool;
result = device.allocateDescriptorSets(&alloc_info, desc_sets.data());
ASSERT_MSG(result == vk::Result::eSuccess,
"Unexpected error during descriptor set allocation {}", vk::to_string(result));
// We've changed pool so also reset descriptor batch cache.
descriptor_sets.clear();
const auto desc_set = desc_sets.back();
desc_sets.pop_back();
descriptor_sets[set_key] = std::move(desc_sets);
return desc_set;
} }
vk::DescriptorSet DescriptorHeap::Commit() { void DescriptorHeap::CreateDescriptorPool() {
const std::size_t index = CommitResource();
return descriptor_sets[index];
}
void DescriptorHeap::AppendDescriptorPool() {
const vk::DescriptorPoolCreateInfo pool_info = { const vk::DescriptorPoolCreateInfo pool_info = {
.flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind, .flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind,
.maxSets = descriptor_heap_count, .maxSets = descriptor_heap_count,
.poolSizeCount = static_cast<u32>(pool_sizes.size()), .poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data(), .pPoolSizes = pool_sizes.data(),
}; };
auto& pool = pools.emplace_back(); curr_pool = device.createDescriptorPool(pool_info);
pool = device.createDescriptorPoolUnique(pool_info);
} }
} // namespace Vulkan } // namespace Vulkan

View file

@ -3,7 +3,9 @@
#pragma once #pragma once
#include <deque>
#include <vector> #include <vector>
#include <boost/container/static_vector.hpp>
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
#include "common/types.h" #include "common/types.h"
@ -62,32 +64,29 @@ private:
std::vector<vk::CommandBuffer> cmd_buffers; std::vector<vk::CommandBuffer> cmd_buffers;
}; };
class DescriptorHeap final : public ResourcePool { class DescriptorHeap final {
static constexpr u32 DescriptorSetBatch = 32;
public: public:
explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore, explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
std::span<const vk::DescriptorSetLayoutBinding> bindings, std::span<const vk::DescriptorPoolSize> pool_sizes,
u32 descriptor_heap_count = 1024); u32 descriptor_heap_count = 1024);
~DescriptorHeap() override; ~DescriptorHeap();
const vk::DescriptorSetLayout& Layout() const { vk::DescriptorSet Commit(vk::DescriptorSetLayout set_layout);
return *descriptor_set_layout;
}
void Allocate(std::size_t begin, std::size_t end) override;
vk::DescriptorSet Commit();
private: private:
void AppendDescriptorPool(); void CreateDescriptorPool();
private: private:
vk::Device device; vk::Device device;
vk::UniqueDescriptorSetLayout descriptor_set_layout; MasterSemaphore* master_semaphore;
u32 descriptor_heap_count; u32 descriptor_heap_count;
std::vector<vk::DescriptorPoolSize> pool_sizes; std::span<const vk::DescriptorPoolSize> pool_sizes;
std::vector<vk::UniqueDescriptorPool> pools; vk::DescriptorPool curr_pool;
std::vector<vk::DescriptorSet> descriptor_sets; std::deque<std::pair<vk::DescriptorPool, u64>> pending_pools;
std::vector<std::size_t> hashes; using DescSetBatch = boost::container::static_vector<vk::DescriptorSet, DescriptorSetBatch>;
tsl::robin_map<u64, DescSetBatch> descriptor_sets;
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -73,7 +73,6 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
if (!info.IsBlockCoded() && !info.IsPacked()) { if (!info.IsBlockCoded() && !info.IsPacked()) {
usage |= vk::ImageUsageFlagBits::eColorAttachment; usage |= vk::ImageUsageFlagBits::eColorAttachment;
} }
// In cases where an image is created as a render/depth target and cleared with compute, // In cases where an image is created as a render/depth target and cleared with compute,
// we cannot predict whether it will be used as a storage image. A proper solution would // we cannot predict whether it will be used as a storage image. A proper solution would
// involve re-creating the resource with a new configuration and copying previous content // involve re-creating the resource with a new configuration and copying previous content

View file

@ -69,7 +69,12 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept
: is_storage{is_storage_} { : is_storage{is_storage_} {
type = ConvertImageViewType(image.GetType()); type = ConvertImageViewType(image.GetType());
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); const auto dfmt = image.GetDataFmt();
auto nfmt = image.GetNumberFmt();
if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) {
nfmt = AmdGpu::NumberFormat::Unorm;
}
format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt);
range.base.level = image.base_level; range.base.level = image.base_level;
range.base.layer = image.base_array; range.base.layer = image.base_array;
range.extent.levels = image.last_level + 1; range.extent.levels = image.last_level + 1;
@ -143,7 +148,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
.aspectMask = aspect, .aspectMask = aspect,
.baseMipLevel = info.range.base.level, .baseMipLevel = info.range.base.level,
.levelCount = info.range.extent.levels - info.range.base.level, .levelCount = info.range.extent.levels - info.range.base.level,
.baseArrayLayer = info_.range.base.layer, .baseArrayLayer = info.range.base.layer,
.layerCount = info.range.extent.layers - info.range.base.layer, .layerCount = info.range.extent.layers - info.range.base.layer,
}, },
}; };