renderer: added support for instance step rates

This commit is contained in:
psucien 2024-07-06 17:01:43 +02:00
parent 986ed0662c
commit cfbe8b9e6d
9 changed files with 129 additions and 26 deletions

View file

@ -135,15 +135,33 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)};
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
if (param.num_components > 1) {
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
if (param.buffer_handle < 0) {
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
if (param.num_components > 1) {
const Id pointer{
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
} else {
return ctx.OpLoad(param.component_type, param.id);
}
} else {
return ctx.OpLoad(param.component_type, param.id);
const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value;
const auto step_rate = ctx.OpLoad(
ctx.U32[1],
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.instance_step_rates, rate_idx));
const auto offset = ctx.OpIAdd(
ctx.U32[1],
ctx.OpIMul(
ctx.U32[1],
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
ctx.ConstU32(param.num_components)),
ctx.ConstU32(comp));
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
}
}
switch (attr) {

View file

@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
void EmitContext::DefineInputs(const Info& info) {
switch (stage) {
case Stage::Vertex:
case Stage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(instance_step_rates, "step_rates");
interfaces.push_back(instance_step_rates);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
const u32 rate_idx =
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
: 1;
// Note that we pass index rather than Id
input_params[input.binding] = {
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
};
} else {
Id id{DefineInput(type, input.binding)};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
Name(id, fmt::format("vs_instance_attr{}", input.binding));
} else {
Name(id, fmt::format("vs_in_attr{}", input.binding));
}
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
}
}
break;
}
case Stage::Fragment:
if (info.uses_group_quad) {
subgroup_local_invocation_id = DefineVariable(
@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) {
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name =
fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
buffer.is_instance_data
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
sizeof(float) * CHAR_BIT)
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");

View file

@ -165,6 +165,8 @@ public:
Id output_position{};
Id vertex_index{};
Id instance_id{};
Id instance_step_rates{};
Id base_vertex{};
Id frag_coord{};
Id front_facing{};
@ -214,6 +216,7 @@ public:
Id pointer_type;
Id component_type;
u32 num_components;
s32 buffer_handle{-1};
};
std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{};

View file

@ -235,9 +235,21 @@ void Translator::EmitFetch(const GcnInst& inst) {
ir.SetVectorReg(dst_reg++, comp);
}
if (attrib.instance_data == 2 || attrib.instance_data == 3) {
LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}",
attrib.instance_data);
// In case of programmable step rates we need to fallback to instance data pulling in
// shader, so VBs should be bound as regular data buffers
s32 instance_buf_handle = -1;
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.stride = buffer.GetStride(),
.num_records = buffer.num_records,
.used_types = IR::Type::F32,
.is_instance_data = true,
});
instance_buf_handle = s32(info.buffers.size() - 1);
}
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
@ -247,7 +259,8 @@ void Translator::EmitFetch(const GcnInst& inst) {
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data),
.instance_step_rate = step_rate,
.instance_data_buf = instance_buf_handle,
});
}
}

View file

@ -77,7 +77,8 @@ struct BufferResource {
u32 num_records;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
bool is_storage;
bool is_storage{false};
bool is_instance_data{false};
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
};
@ -116,6 +117,7 @@ struct Info {
u8 sgpr_base;
u8 dword_offset;
InstanceIdType instance_step_rate;
s32 instance_data_buf;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};

View file

@ -887,7 +887,10 @@ struct Liverpool {
IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
u32 enable_primitive_id;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1);
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
u32 vgt_instance_step_rate_0;
u32 vgt_instance_step_rate_1;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
PolygonOffset poly_offset;
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
AaConfig aa_config;
@ -1046,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);

View file

@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
stages[i] = *infos[i];
}
BuildDescSetLayout();
const vk::PushConstantRange push_constants = {
.stageFlags = vk::ShaderStageFlagBits::eVertex,
.offset = 0,
.size = 2 * sizeof(u32),
};
const vk::DescriptorSetLayout set_layout = *desc_layout;
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U,
.pSetLayouts = &set_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constants,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
// Skip attribute binding as the data will be pulled by shader
continue;
}
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
// Calculate buffers memory overlaps
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue;
}
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) {
continue;

View file

@ -67,20 +67,24 @@ public:
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept {
vk::Pipeline Handle() const noexcept {
return *pipeline;
}
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
vk::PipelineLayout GetLayout() const {
return *pipeline_layout;
}
bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[0] == EmbeddedVsHash;
}
[[nodiscard]] auto GetWriteMasks() const {
auto GetWriteMasks() const {
return key.write_masks;
}
[[nodiscard]] bool IsDepthEnabled() const {
bool IsDepthEnabled() const {
return key.depth.depth_enable.Value();
}

View file

@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
UpdateDynamicState(*pipeline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
const u32 step_rates[] = {
regs.vgt_instance_step_rate_0,
regs.vgt_instance_step_rate_1,
};
cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u,
sizeof(step_rates), &step_rates);
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {