mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-02-18 11:30:11 +00:00
renderer_vulkan: Parse fetch shader per-pipeline (#1656)
* shader_recompiler: Read image format info directly from sharps instead of storing in shader info. * renderer_vulkan: Parse fetch shader per-pipeline * Few minor fixes. * shader_recompiler: Specialize on vertex attribute number types. * shader_recompiler: Move GetDrawOffsets to fetch shader
This commit is contained in:
parent
74b091fd08
commit
920acb8d8b
src
|
@ -187,7 +187,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
|
||||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) {
|
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
const auto type = ctx.info.images[handle & 0xFFFF].type;
|
const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info);
|
||||||
|
const auto type = sharp.GetBoundType();
|
||||||
const Id zero = ctx.u32_zero_value;
|
const Id zero = ctx.u32_zero_value;
|
||||||
const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
|
const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
|
||||||
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
|
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/ir/passes/srt.h"
|
#include "shader_recompiler/ir/passes/srt.h"
|
||||||
#include "video_core/amdgpu/types.h"
|
#include "video_core/amdgpu/types.h"
|
||||||
|
|
||||||
|
@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() {
|
||||||
}
|
}
|
||||||
|
|
||||||
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||||
switch (fmt) {
|
switch (GetNumberClass(fmt)) {
|
||||||
case AmdGpu::NumberFormat::Float:
|
case AmdGpu::NumberClass::Float:
|
||||||
case AmdGpu::NumberFormat::Unorm:
|
|
||||||
case AmdGpu::NumberFormat::Snorm:
|
|
||||||
case AmdGpu::NumberFormat::SnormNz:
|
|
||||||
case AmdGpu::NumberFormat::Sscaled:
|
|
||||||
case AmdGpu::NumberFormat::Uscaled:
|
|
||||||
case AmdGpu::NumberFormat::Srgb:
|
|
||||||
return ctx.F32;
|
return ctx.F32;
|
||||||
case AmdGpu::NumberFormat::Sint:
|
case AmdGpu::NumberClass::Sint:
|
||||||
return ctx.S32;
|
return ctx.S32;
|
||||||
case AmdGpu::NumberFormat::Uint:
|
case AmdGpu::NumberClass::Uint:
|
||||||
return ctx.U32;
|
return ctx.U32;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||||
|
|
||||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
||||||
u32 num_components, bool output) {
|
u32 num_components, bool output) {
|
||||||
switch (fmt) {
|
switch (GetNumberClass(fmt)) {
|
||||||
case AmdGpu::NumberFormat::Float:
|
case AmdGpu::NumberClass::Float:
|
||||||
case AmdGpu::NumberFormat::Unorm:
|
|
||||||
case AmdGpu::NumberFormat::Snorm:
|
|
||||||
case AmdGpu::NumberFormat::SnormNz:
|
|
||||||
case AmdGpu::NumberFormat::Sscaled:
|
|
||||||
case AmdGpu::NumberFormat::Uscaled:
|
|
||||||
case AmdGpu::NumberFormat::Srgb:
|
|
||||||
return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
|
return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
|
||||||
case AmdGpu::NumberFormat::Uint:
|
case AmdGpu::NumberClass::Uint:
|
||||||
return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
|
return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
|
||||||
case AmdGpu::NumberFormat::Sint:
|
case AmdGpu::NumberClass::Sint:
|
||||||
return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
|
return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@ -280,33 +269,42 @@ void EmitContext::DefineInputs() {
|
||||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||||
|
|
||||||
for (const auto& input : info.vs_inputs) {
|
const auto fetch_shader = Gcn::ParseFetchShader(info);
|
||||||
ASSERT(input.binding < IR::NumParams);
|
if (!fetch_shader) {
|
||||||
const Id type{GetAttributeType(*this, input.fmt)[4]};
|
break;
|
||||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
|
}
|
||||||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
|
ASSERT(attrib.semantic < IR::NumParams);
|
||||||
|
const auto sharp = attrib.GetSharp(info);
|
||||||
|
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
|
||||||
|
if (attrib.UsesStepRates()) {
|
||||||
const u32 rate_idx =
|
const u32 rate_idx =
|
||||||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
|
attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
|
||||||
: 1;
|
: 1;
|
||||||
|
const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
|
||||||
|
const auto buffer =
|
||||||
|
std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
|
||||||
|
return buffer.instance_attrib == attrib.semantic;
|
||||||
|
});
|
||||||
// Note that we pass index rather than Id
|
// Note that we pass index rather than Id
|
||||||
input_params[input.binding] = SpirvAttribute{
|
input_params[attrib.semantic] = SpirvAttribute{
|
||||||
.id = rate_idx,
|
.id = rate_idx,
|
||||||
.pointer_type = input_u32,
|
.pointer_type = input_u32,
|
||||||
.component_type = U32[1],
|
.component_type = U32[1],
|
||||||
.num_components = input.num_components,
|
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||||
.is_integer = true,
|
.is_integer = true,
|
||||||
.is_loaded = false,
|
.is_loaded = false,
|
||||||
.buffer_handle = input.instance_data_buf,
|
.buffer_handle = int(buffer - info.buffers.begin()),
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
Id id{DefineInput(type, input.binding)};
|
Id id{DefineInput(type, attrib.semantic)};
|
||||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
|
if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
|
||||||
Name(id, fmt::format("vs_instance_attr{}", input.binding));
|
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
|
||||||
} else {
|
} else {
|
||||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
|
||||||
}
|
}
|
||||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id, 4, false);
|
input_params[attrib.semantic] =
|
||||||
|
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -553,9 +551,10 @@ void EmitContext::DefineBuffers() {
|
||||||
|
|
||||||
void EmitContext::DefineTextureBuffers() {
|
void EmitContext::DefineTextureBuffers() {
|
||||||
for (const auto& desc : info.texture_buffers) {
|
for (const auto& desc : info.texture_buffers) {
|
||||||
const bool is_integer =
|
const auto sharp = desc.GetSharp(info);
|
||||||
desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint;
|
const auto nfmt = sharp.GetNumberFmt();
|
||||||
const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)};
|
const bool is_integer = AmdGpu::IsInteger(nfmt);
|
||||||
|
const VectorIds& sampled_type{GetAttributeType(*this, nfmt)};
|
||||||
const u32 sampled = desc.is_written ? 2 : 1;
|
const u32 sampled = desc.is_written ? 2 : 1;
|
||||||
const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
|
const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
|
||||||
sampled, spv::ImageFormat::Unknown)};
|
sampled, spv::ImageFormat::Unknown)};
|
||||||
|
@ -650,10 +649,11 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||||
const auto image = ctx.info.ReadUdSharp<AmdGpu::Image>(desc.sharp_idx);
|
const auto image = desc.GetSharp(ctx.info);
|
||||||
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
|
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
|
||||||
|
const auto type = image.GetBoundType();
|
||||||
const u32 sampled = desc.is_storage ? 2 : 1;
|
const u32 sampled = desc.is_storage ? 2 : 1;
|
||||||
switch (desc.type) {
|
switch (type) {
|
||||||
case AmdGpu::ImageType::Color1D:
|
case AmdGpu::ImageType::Color1D:
|
||||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
|
||||||
case AmdGpu::ImageType::Color1DArray:
|
case AmdGpu::ImageType::Color1DArray:
|
||||||
|
@ -672,14 +672,15 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
throw InvalidArgument("Invalid texture type {}", desc.type);
|
throw InvalidArgument("Invalid texture type {}", type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineImagesAndSamplers() {
|
void EmitContext::DefineImagesAndSamplers() {
|
||||||
for (const auto& image_desc : info.images) {
|
for (const auto& image_desc : info.images) {
|
||||||
const bool is_integer = image_desc.nfmt == AmdGpu::NumberFormat::Uint ||
|
const auto sharp = image_desc.GetSharp(info);
|
||||||
image_desc.nfmt == AmdGpu::NumberFormat::Sint;
|
const auto nfmt = sharp.GetNumberFmt();
|
||||||
const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
|
const bool is_integer = AmdGpu::IsInteger(nfmt);
|
||||||
|
const VectorIds& data_types = GetAttributeType(*this, nfmt);
|
||||||
const Id sampled_type = data_types[1];
|
const Id sampled_type = data_types[1];
|
||||||
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
||||||
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
||||||
|
|
|
@ -34,8 +34,14 @@ namespace Shader::Gcn {
|
||||||
* We take the reverse way, extract the original input semantics from these instructions.
|
* We take the reverse way, extract the original input semantics from these instructions.
|
||||||
**/
|
**/
|
||||||
|
|
||||||
FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
|
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
|
||||||
FetchShaderData data{};
|
if (!info.has_fetch_shader) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
const u32* code;
|
||||||
|
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
|
||||||
|
|
||||||
|
FetchShaderData data{.code = code};
|
||||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||||
GcnDecodeContext decoder;
|
GcnDecodeContext decoder;
|
||||||
|
|
||||||
|
@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
|
||||||
u32 semantic_index = 0;
|
u32 semantic_index = 0;
|
||||||
while (!code_slice.atEnd()) {
|
while (!code_slice.atEnd()) {
|
||||||
const auto inst = decoder.decodeInstruction(code_slice);
|
const auto inst = decoder.decodeInstruction(code_slice);
|
||||||
*out_size += inst.length;
|
data.size += inst.length;
|
||||||
|
|
||||||
if (inst.opcode == Opcode::S_SETPC_B64) {
|
if (inst.opcode == Opcode::S_SETPC_B64) {
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -3,26 +3,80 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <ranges>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "shader_recompiler/info.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
struct VertexAttribute {
|
struct VertexAttribute {
|
||||||
|
enum InstanceIdType : u8 {
|
||||||
|
None = 0,
|
||||||
|
OverStepRate0 = 1,
|
||||||
|
OverStepRate1 = 2,
|
||||||
|
Plain = 3,
|
||||||
|
};
|
||||||
|
|
||||||
u8 semantic; ///< Semantic index of the attribute
|
u8 semantic; ///< Semantic index of the attribute
|
||||||
u8 dest_vgpr; ///< Destination VGPR to load first component.
|
u8 dest_vgpr; ///< Destination VGPR to load first component.
|
||||||
u8 num_elements; ///< Number of components to load
|
u8 num_elements; ///< Number of components to load
|
||||||
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
||||||
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
||||||
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
||||||
|
|
||||||
|
[[nodiscard]] InstanceIdType GetStepRate() const {
|
||||||
|
return static_cast<InstanceIdType>(instance_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool UsesStepRates() const {
|
||||||
|
const auto step_rate = GetStepRate();
|
||||||
|
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
|
||||||
|
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const VertexAttribute& other) const {
|
||||||
|
return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
|
||||||
|
num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
|
||||||
|
dword_offset == other.dword_offset && instance_data == other.instance_data;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FetchShaderData {
|
struct FetchShaderData {
|
||||||
|
const u32* code;
|
||||||
|
u32 size = 0;
|
||||||
std::vector<VertexAttribute> attributes;
|
std::vector<VertexAttribute> attributes;
|
||||||
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
|
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
|
||||||
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
|
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
|
||||||
|
|
||||||
|
[[nodiscard]] bool UsesStepRates() const {
|
||||||
|
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
|
||||||
|
return attribute.UsesStepRates();
|
||||||
|
}) != attributes.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs,
|
||||||
|
const Info& info) const {
|
||||||
|
u32 vertex_offset = regs.index_offset;
|
||||||
|
u32 instance_offset = 0;
|
||||||
|
if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
|
||||||
|
vertex_offset = info.user_data[vertex_offset_sgpr];
|
||||||
|
}
|
||||||
|
if (instance_offset_sgpr != -1) {
|
||||||
|
instance_offset = info.user_data[instance_offset_sgpr];
|
||||||
|
}
|
||||||
|
return {vertex_offset, instance_offset};
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const FetchShaderData& other) const {
|
||||||
|
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
|
||||||
|
instance_offset_sgpr == other.instance_offset_sgpr;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
FetchShaderData ParseFetchShader(const u32* code, u32* out_size);
|
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
|
||||||
|
|
||||||
void Translator::EmitFetch(const GcnInst& inst) {
|
void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
// Read the pointer to the fetch shader assembly.
|
// Read the pointer to the fetch shader assembly.
|
||||||
const u32 sgpr_base = inst.src[0].code;
|
info.has_fetch_shader = true;
|
||||||
const u32* code;
|
info.fetch_shader_sgpr_base = inst.src[0].code;
|
||||||
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
|
|
||||||
|
|
||||||
// Parse the assembly to generate a list of attributes.
|
const auto fetch_data = ParseFetchShader(info);
|
||||||
u32 fetch_size{};
|
ASSERT(fetch_data.has_value());
|
||||||
const auto fetch_data = ParseFetchShader(code, &fetch_size);
|
|
||||||
|
|
||||||
if (Config::dumpShaders()) {
|
if (Config::dumpShaders()) {
|
||||||
using namespace Common::FS;
|
using namespace Common::FS;
|
||||||
|
@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
|
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
|
||||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||||
file.WriteRaw<u8>(code, fetch_size);
|
file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
|
||||||
}
|
}
|
||||||
|
|
||||||
info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr;
|
for (const auto& attrib : fetch_data->attributes) {
|
||||||
info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
|
|
||||||
|
|
||||||
for (const auto& attrib : fetch_data.attributes) {
|
|
||||||
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
||||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||||
|
|
||||||
|
@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
|
|
||||||
// In case of programmable step rates we need to fallback to instance data pulling in
|
// In case of programmable step rates we need to fallback to instance data pulling in
|
||||||
// shader, so VBs should be bound as regular data buffers
|
// shader, so VBs should be bound as regular data buffers
|
||||||
s32 instance_buf_handle = -1;
|
if (attrib.UsesStepRates()) {
|
||||||
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
|
|
||||||
if (step_rate == Info::VsInput::OverStepRate0 ||
|
|
||||||
step_rate == Info::VsInput::OverStepRate1) {
|
|
||||||
info.buffers.push_back({
|
info.buffers.push_back({
|
||||||
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
||||||
.used_types = IR::Type::F32,
|
.used_types = IR::Type::F32,
|
||||||
.is_instance_data = true,
|
.is_instance_data = true,
|
||||||
|
.instance_attrib = attrib.semantic,
|
||||||
});
|
});
|
||||||
instance_buf_handle = s32(info.buffers.size() - 1);
|
|
||||||
info.uses_step_rates = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
|
||||||
info.vs_inputs.push_back({
|
|
||||||
.fmt = buffer.GetNumberFmt(),
|
|
||||||
.binding = attrib.semantic,
|
|
||||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
|
||||||
.sgpr_base = attrib.sgpr_base,
|
|
||||||
.dword_offset = attrib.dword_offset,
|
|
||||||
.instance_step_rate = step_rate,
|
|
||||||
.instance_data_buf = instance_buf_handle,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@ struct BufferResource {
|
||||||
AmdGpu::Buffer inline_cbuf;
|
AmdGpu::Buffer inline_cbuf;
|
||||||
bool is_gds_buffer{};
|
bool is_gds_buffer{};
|
||||||
bool is_instance_data{};
|
bool is_instance_data{};
|
||||||
|
u8 instance_attrib{};
|
||||||
bool is_written{};
|
bool is_written{};
|
||||||
|
|
||||||
bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
|
bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
|
||||||
|
@ -57,7 +58,6 @@ using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
|
||||||
|
|
||||||
struct TextureBufferResource {
|
struct TextureBufferResource {
|
||||||
u32 sharp_idx;
|
u32 sharp_idx;
|
||||||
AmdGpu::NumberFormat nfmt;
|
|
||||||
bool is_written{};
|
bool is_written{};
|
||||||
|
|
||||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||||
|
@ -66,8 +66,6 @@ using TextureBufferResourceList = boost::container::small_vector<TextureBufferRe
|
||||||
|
|
||||||
struct ImageResource {
|
struct ImageResource {
|
||||||
u32 sharp_idx;
|
u32 sharp_idx;
|
||||||
AmdGpu::ImageType type;
|
|
||||||
AmdGpu::NumberFormat nfmt;
|
|
||||||
bool is_storage{};
|
bool is_storage{};
|
||||||
bool is_depth{};
|
bool is_depth{};
|
||||||
bool is_atomic{};
|
bool is_atomic{};
|
||||||
|
@ -115,24 +113,6 @@ static_assert(sizeof(PushData) <= 128,
|
||||||
* Contains general information generated by the shader recompiler for an input program.
|
* Contains general information generated by the shader recompiler for an input program.
|
||||||
*/
|
*/
|
||||||
struct Info {
|
struct Info {
|
||||||
struct VsInput {
|
|
||||||
enum InstanceIdType : u8 {
|
|
||||||
None = 0,
|
|
||||||
OverStepRate0 = 1,
|
|
||||||
OverStepRate1 = 2,
|
|
||||||
Plain = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
AmdGpu::NumberFormat fmt;
|
|
||||||
u16 binding;
|
|
||||||
u16 num_components;
|
|
||||||
u8 sgpr_base;
|
|
||||||
u8 dword_offset;
|
|
||||||
InstanceIdType instance_step_rate;
|
|
||||||
s32 instance_data_buf;
|
|
||||||
};
|
|
||||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
|
||||||
|
|
||||||
struct AttributeFlags {
|
struct AttributeFlags {
|
||||||
bool Get(IR::Attribute attrib, u32 comp = 0) const {
|
bool Get(IR::Attribute attrib, u32 comp = 0) const {
|
||||||
return flags[Index(attrib)] & (1 << comp);
|
return flags[Index(attrib)] & (1 << comp);
|
||||||
|
@ -179,9 +159,6 @@ struct Info {
|
||||||
|
|
||||||
CopyShaderData gs_copy_data;
|
CopyShaderData gs_copy_data;
|
||||||
|
|
||||||
s8 vertex_offset_sgpr = -1;
|
|
||||||
s8 instance_offset_sgpr = -1;
|
|
||||||
|
|
||||||
BufferResourceList buffers;
|
BufferResourceList buffers;
|
||||||
TextureBufferResourceList texture_buffers;
|
TextureBufferResourceList texture_buffers;
|
||||||
ImageResourceList images;
|
ImageResourceList images;
|
||||||
|
@ -208,10 +185,11 @@ struct Info {
|
||||||
bool uses_shared{};
|
bool uses_shared{};
|
||||||
bool uses_fp16{};
|
bool uses_fp16{};
|
||||||
bool uses_fp64{};
|
bool uses_fp64{};
|
||||||
bool uses_step_rates{};
|
|
||||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||||
bool has_readconst{};
|
bool has_readconst{};
|
||||||
u8 mrt_mask{0u};
|
u8 mrt_mask{0u};
|
||||||
|
bool has_fetch_shader{false};
|
||||||
|
u32 fetch_shader_sgpr_base{0u};
|
||||||
|
|
||||||
explicit Info(Stage stage_, ShaderParams params)
|
explicit Info(Stage stage_, ShaderParams params)
|
||||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||||
|
@ -252,18 +230,6 @@ struct Info {
|
||||||
bnd.user_data += ud_mask.NumRegs();
|
bnd.user_data += ud_mask.NumRegs();
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const {
|
|
||||||
u32 vertex_offset = regs.index_offset;
|
|
||||||
u32 instance_offset = 0;
|
|
||||||
if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
|
|
||||||
vertex_offset = user_data[vertex_offset_sgpr];
|
|
||||||
}
|
|
||||||
if (instance_offset_sgpr != -1) {
|
|
||||||
instance_offset = user_data[instance_offset_sgpr];
|
|
||||||
}
|
|
||||||
return {vertex_offset, instance_offset};
|
|
||||||
}
|
|
||||||
|
|
||||||
void RefreshFlatBuf() {
|
void RefreshFlatBuf() {
|
||||||
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
|
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
|
||||||
ASSERT(user_data.size() <= NumUserDataRegs);
|
ASSERT(user_data.size() <= NumUserDataRegs);
|
||||||
|
@ -284,7 +250,12 @@ constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
||||||
return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
|
const auto image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
|
||||||
|
if (!image.Valid()) {
|
||||||
|
// Fall back to null image if unbound.
|
||||||
|
return AmdGpu::Image::Null();
|
||||||
|
}
|
||||||
|
return image;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
||||||
|
|
|
@ -381,7 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||||
.sharp_idx = sharp,
|
.sharp_idx = sharp,
|
||||||
.nfmt = buffer.GetNumberFmt(),
|
|
||||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -660,11 +659,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
|
|
||||||
u32 image_binding = descriptors.Add(ImageResource{
|
u32 image_binding = descriptors.Add(ImageResource{
|
||||||
.sharp_idx = tsharp,
|
.sharp_idx = tsharp,
|
||||||
.type = type,
|
|
||||||
.nfmt = image.GetNumberFmt(),
|
|
||||||
.is_storage = is_storage,
|
.is_storage = is_storage,
|
||||||
.is_depth = bool(inst_info.is_depth),
|
.is_depth = bool(inst_info.is_depth),
|
||||||
.is_atomic = IsImageAtomicInstruction(inst),
|
.is_atomic = IsImageAtomicInstruction(inst),
|
||||||
|
|
|
@ -22,6 +22,7 @@ struct Profile {
|
||||||
bool support_fp32_denorm_preserve{};
|
bool support_fp32_denorm_preserve{};
|
||||||
bool support_fp32_denorm_flush{};
|
bool support_fp32_denorm_flush{};
|
||||||
bool support_explicit_workgroup_layout{};
|
bool support_explicit_workgroup_layout{};
|
||||||
|
bool support_legacy_vertex_attributes{};
|
||||||
bool has_broken_spirv_clamp{};
|
bool has_broken_spirv_clamp{};
|
||||||
bool lower_left_origin_mode{};
|
bool lower_left_origin_mode{};
|
||||||
bool needs_manual_interpolation{};
|
bool needs_manual_interpolation{};
|
||||||
|
|
|
@ -6,12 +6,19 @@
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/backend/bindings.h"
|
#include "shader_recompiler/backend/bindings.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/ir/passes/srt.h"
|
#include "shader_recompiler/ir/passes/srt.h"
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
|
struct VsAttribSpecialization {
|
||||||
|
AmdGpu::NumberClass num_class{};
|
||||||
|
|
||||||
|
auto operator<=>(const VsAttribSpecialization&) const = default;
|
||||||
|
};
|
||||||
|
|
||||||
struct BufferSpecialization {
|
struct BufferSpecialization {
|
||||||
u16 stride : 14;
|
u16 stride : 14;
|
||||||
u16 is_storage : 1;
|
u16 is_storage : 1;
|
||||||
|
@ -50,6 +57,8 @@ struct StageSpecialization {
|
||||||
|
|
||||||
const Shader::Info* info;
|
const Shader::Info* info;
|
||||||
RuntimeInfo runtime_info;
|
RuntimeInfo runtime_info;
|
||||||
|
Gcn::FetchShaderData fetch_shader_data{};
|
||||||
|
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
|
||||||
std::bitset<MaxStageResources> bitset{};
|
std::bitset<MaxStageResources> bitset{};
|
||||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||||
|
@ -57,9 +66,19 @@ struct StageSpecialization {
|
||||||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||||
Backend::Bindings start{};
|
Backend::Bindings start{};
|
||||||
|
|
||||||
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
|
||||||
Backend::Bindings start_)
|
const Profile& profile_, Backend::Bindings start_)
|
||||||
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
|
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
|
||||||
|
if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) {
|
||||||
|
fetch_shader_data = *fetch_shader;
|
||||||
|
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
|
||||||
|
// Specialize shader on VS input number types to follow spec.
|
||||||
|
ForEachSharp(vs_attribs, fetch_shader_data.attributes,
|
||||||
|
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
|
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
if (info->has_readconst) {
|
if (info->has_readconst) {
|
||||||
binding++;
|
binding++;
|
||||||
|
@ -75,8 +94,7 @@ struct StageSpecialization {
|
||||||
});
|
});
|
||||||
ForEachSharp(binding, images, info->images,
|
ForEachSharp(binding, images, info->images,
|
||||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||||
spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray
|
spec.type = sharp.GetBoundType();
|
||||||
: sharp.GetType();
|
|
||||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||||
});
|
});
|
||||||
ForEachSharp(binding, fmasks, info->fmasks,
|
ForEachSharp(binding, fmasks, info->fmasks,
|
||||||
|
@ -86,6 +104,17 @@ struct StageSpecialization {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||||
|
for (const auto& desc : desc_list) {
|
||||||
|
auto& spec = spec_list.emplace_back();
|
||||||
|
const auto sharp = desc.GetSharp(*info);
|
||||||
|
if (!sharp) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
func(spec, desc, sharp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||||
for (const auto& desc : desc_list) {
|
for (const auto& desc : desc_list) {
|
||||||
auto& spec = spec_list.emplace_back();
|
auto& spec = spec_list.emplace_back();
|
||||||
|
@ -106,6 +135,14 @@ struct StageSpecialization {
|
||||||
if (runtime_info != other.runtime_info) {
|
if (runtime_info != other.runtime_info) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (fetch_shader_data != other.fetch_shader_data) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < vs_attribs.size(); i++) {
|
||||||
|
if (vs_attribs[i] != other.vs_attribs[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
if (info->has_readconst != other.info->has_readconst) {
|
if (info->has_readconst != other.info->has_readconst) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -10,7 +10,24 @@
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
|
||||||
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
|
enum NumberClass {
|
||||||
|
Float,
|
||||||
|
Sint,
|
||||||
|
Uint,
|
||||||
|
};
|
||||||
|
|
||||||
|
[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
|
||||||
|
switch (nfmt) {
|
||||||
|
case NumberFormat::Sint:
|
||||||
|
return Sint;
|
||||||
|
case NumberFormat::Uint:
|
||||||
|
return Uint;
|
||||||
|
default:
|
||||||
|
return Float;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) {
|
||||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -304,6 +304,10 @@ struct Image {
|
||||||
const auto viewed_slice = last_array - base_array + 1;
|
const auto viewed_slice = last_array - base_array + 1;
|
||||||
return GetType() == ImageType::Cube && viewed_slice < 6;
|
return GetType() == ImageType::Cube && viewed_slice < 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ImageType GetBoundType() const noexcept {
|
||||||
|
return IsPartialCubemap() ? ImageType::Color2DArray : GetType();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
static_assert(sizeof(Image) == 32); // 256bits
|
static_assert(sizeof(Image) == 32); // 256bits
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
|
@ -107,7 +108,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
bool BufferCache::BindVertexBuffers(
|
||||||
|
const Shader::Info& vs_info, const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
|
||||||
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
|
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
|
||||||
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
|
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
|
||||||
SCOPE_EXIT {
|
SCOPE_EXIT {
|
||||||
|
@ -126,7 +128,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (vs_info.vs_inputs.empty()) {
|
if (!fetch_shader || fetch_shader->attributes.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,30 +152,29 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
||||||
// Calculate buffers memory overlaps
|
// Calculate buffers memory overlaps
|
||||||
bool has_step_rate = false;
|
bool has_step_rate = false;
|
||||||
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
|
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
|
||||||
for (const auto& input : vs_info.vs_inputs) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
if (attrib.UsesStepRates()) {
|
||||||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
|
||||||
has_step_rate = true;
|
has_step_rate = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& buffer = vs_info.ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
const auto& buffer = attrib.GetSharp(vs_info);
|
||||||
if (buffer.GetSize() == 0) {
|
if (buffer.GetSize() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
guest_buffers.emplace_back(buffer);
|
guest_buffers.emplace_back(buffer);
|
||||||
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
||||||
attributes.push_back({
|
attributes.push_back({
|
||||||
.location = input.binding,
|
.location = attrib.semantic,
|
||||||
.binding = input.binding,
|
.binding = attrib.semantic,
|
||||||
.format =
|
.format =
|
||||||
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||||
.offset = 0,
|
.offset = 0,
|
||||||
});
|
});
|
||||||
bindings.push_back({
|
bindings.push_back({
|
||||||
.binding = input.binding,
|
.binding = attrib.semantic,
|
||||||
.stride = buffer.GetStride(),
|
.stride = buffer.GetStride(),
|
||||||
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None
|
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
|
||||||
? vk::VertexInputRate::eVertex
|
? vk::VertexInputRate::eVertex
|
||||||
: vk::VertexInputRate::eInstance,
|
: vk::VertexInputRate::eInstance,
|
||||||
.divisor = 1,
|
.divisor = 1,
|
||||||
|
|
|
@ -20,8 +20,11 @@ struct Liverpool;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
struct Info;
|
namespace Gcn {
|
||||||
|
struct FetchShaderData;
|
||||||
}
|
}
|
||||||
|
struct Info;
|
||||||
|
} // namespace Shader
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
|
@ -76,7 +79,8 @@ public:
|
||||||
void InvalidateMemory(VAddr device_addr, u64 size);
|
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||||
|
|
||||||
/// Binds host vertex buffers for the current draw.
|
/// Binds host vertex buffers for the current draw.
|
||||||
bool BindVertexBuffers(const Shader::Info& vs_info);
|
bool BindVertexBuffers(const Shader::Info& vs_info,
|
||||||
|
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
|
||||||
|
|
||||||
/// Bind host index buffer for the current draw.
|
/// Bind host index buffer for the current draw.
|
||||||
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
|
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <utility>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
|
|
||||||
|
@ -10,6 +11,8 @@
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
|
|
||||||
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
|
@ -20,8 +23,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
|
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
|
||||||
vk::PipelineCache pipeline_cache,
|
vk::PipelineCache pipeline_cache,
|
||||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||||
|
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
|
||||||
std::span<const vk::ShaderModule> modules)
|
std::span<const vk::ShaderModule> modules)
|
||||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} {
|
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_},
|
||||||
|
fetch_shader{std::move(fetch_shader_)} {
|
||||||
const vk::Device device = instance.GetDevice();
|
const vk::Device device = instance.GetDevice();
|
||||||
std::ranges::copy(infos, stages.begin());
|
std::ranges::copy(infos, stages.begin());
|
||||||
BuildDescSetLayout();
|
BuildDescSetLayout();
|
||||||
|
@ -46,32 +51,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
|
|
||||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||||
if (!instance.IsVertexInputDynamicState()) {
|
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||||
const auto& vs_info = stages[u32(Shader::Stage::Vertex)];
|
const auto& vs_info = GetStage(Shader::Stage::Vertex);
|
||||||
for (const auto& input : vs_info->vs_inputs) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
if (attrib.UsesStepRates()) {
|
||||||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
|
||||||
// Skip attribute binding as the data will be pulled by shader
|
// Skip attribute binding as the data will be pulled by shader
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto buffer =
|
const auto buffer = attrib.GetSharp(vs_info);
|
||||||
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
|
||||||
if (buffer.GetSize() == 0) {
|
if (buffer.GetSize() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
vertex_attributes.push_back({
|
vertex_attributes.push_back({
|
||||||
.location = input.binding,
|
.location = attrib.semantic,
|
||||||
.binding = input.binding,
|
.binding = attrib.semantic,
|
||||||
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||||
.offset = 0,
|
.offset = 0,
|
||||||
});
|
});
|
||||||
vertex_bindings.push_back({
|
vertex_bindings.push_back({
|
||||||
.binding = input.binding,
|
.binding = attrib.semantic,
|
||||||
.stride = buffer.GetStride(),
|
.stride = buffer.GetStride(),
|
||||||
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None
|
.inputRate =
|
||||||
? vk::VertexInputRate::eVertex
|
attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
|
||||||
: vk::VertexInputRate::eInstance,
|
? vk::VertexInputRate::eVertex
|
||||||
|
: vk::VertexInputRate::eInstance,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include <xxhash.h>
|
#include <xxhash.h>
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||||
|
@ -59,9 +60,14 @@ public:
|
||||||
GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
|
GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
|
||||||
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
|
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
|
||||||
std::span<const Shader::Info*, MaxShaderStages> stages,
|
std::span<const Shader::Info*, MaxShaderStages> stages,
|
||||||
|
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,
|
||||||
std::span<const vk::ShaderModule> modules);
|
std::span<const vk::ShaderModule> modules);
|
||||||
~GraphicsPipeline();
|
~GraphicsPipeline();
|
||||||
|
|
||||||
|
const std::optional<const Shader::Gcn::FetchShaderData>& GetFetchShader() const noexcept {
|
||||||
|
return fetch_shader;
|
||||||
|
}
|
||||||
|
|
||||||
bool IsEmbeddedVs() const noexcept {
|
bool IsEmbeddedVs() const noexcept {
|
||||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||||
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
|
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
|
||||||
|
@ -94,6 +100,7 @@ private:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
GraphicsPipelineKey key;
|
GraphicsPipelineKey key;
|
||||||
|
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -265,6 +265,7 @@ bool Instance::CreateDevice() {
|
||||||
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||||
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
||||||
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||||
|
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||||
|
|
||||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||||
// with extensions.
|
// with extensions.
|
||||||
|
@ -403,6 +404,9 @@ bool Instance::CreateDevice() {
|
||||||
vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{
|
vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{
|
||||||
.fragmentShaderBarycentric = true,
|
.fragmentShaderBarycentric = true,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
||||||
|
.legacyVertexAttributes = true,
|
||||||
|
},
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
||||||
#endif
|
#endif
|
||||||
|
@ -445,6 +449,9 @@ bool Instance::CreateDevice() {
|
||||||
if (!fragment_shader_barycentric) {
|
if (!fragment_shader_barycentric) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
|
device_chain.unlink<vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
|
||||||
}
|
}
|
||||||
|
if (!legacy_vertex_attributes) {
|
||||||
|
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
|
||||||
|
}
|
||||||
|
|
||||||
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
||||||
if (device_result != vk::Result::eSuccess) {
|
if (device_result != vk::Result::eSuccess) {
|
||||||
|
|
|
@ -148,10 +148,16 @@ public:
|
||||||
return fragment_shader_barycentric;
|
return fragment_shader_barycentric;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
|
||||||
bool IsListRestartSupported() const {
|
bool IsListRestartSupported() const {
|
||||||
return list_restart;
|
return list_restart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_EXT_legacy_vertex_attributes is supported.
|
||||||
|
bool IsLegacyVertexAttributesSupported() const {
|
||||||
|
return legacy_vertex_attributes;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when geometry shaders are supported by the device
|
/// Returns true when geometry shaders are supported by the device
|
||||||
bool IsGeometryStageSupported() const {
|
bool IsGeometryStageSupported() const {
|
||||||
return features.geometryShader;
|
return features.geometryShader;
|
||||||
|
@ -320,6 +326,7 @@ private:
|
||||||
bool null_descriptor{};
|
bool null_descriptor{};
|
||||||
bool maintenance5{};
|
bool maintenance5{};
|
||||||
bool list_restart{};
|
bool list_restart{};
|
||||||
|
bool legacy_vertex_attributes{};
|
||||||
u64 min_imported_host_pointer_alignment{};
|
u64 min_imported_host_pointer_alignment{};
|
||||||
u32 subgroup_size{};
|
u32 subgroup_size{};
|
||||||
bool tooling_info{};
|
bool tooling_info{};
|
||||||
|
|
|
@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||||
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
||||||
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||||
.support_explicit_workgroup_layout = true,
|
.support_explicit_workgroup_layout = true,
|
||||||
|
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||||
};
|
};
|
||||||
|
@ -187,7 +188,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
||||||
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
|
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
|
||||||
if (is_new) {
|
if (is_new) {
|
||||||
it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key,
|
it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key,
|
||||||
*pipeline_cache, infos, modules);
|
*pipeline_cache, infos, fetch_shader, modules);
|
||||||
}
|
}
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
|
@ -304,8 +305,12 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||||
}
|
}
|
||||||
|
|
||||||
auto params = Liverpool::GetParams(*pgm);
|
auto params = Liverpool::GetParams(*pgm);
|
||||||
std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) =
|
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
|
||||||
GetProgram(stage_in, params, binding);
|
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
|
||||||
|
key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
|
||||||
|
if (fetch_shader_) {
|
||||||
|
fetch_shader = fetch_shader_;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -341,16 +346,14 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto* vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
|
const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
|
||||||
if (vs_info && !instance.IsVertexInputDynamicState()) {
|
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||||
u32 vertex_binding = 0;
|
u32 vertex_binding = 0;
|
||||||
for (const auto& input : vs_info->vs_inputs) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
if (attrib.UsesStepRates()) {
|
||||||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto& buffer =
|
const auto& buffer = attrib.GetSharp(*vs_info);
|
||||||
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
|
||||||
if (buffer.GetSize() == 0) {
|
if (buffer.GetSize() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -394,7 +397,7 @@ bool PipelineCache::RefreshComputeKey() {
|
||||||
Shader::Backend::Bindings binding{};
|
Shader::Backend::Bindings binding{};
|
||||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||||
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
||||||
std::tie(infos[0], modules[0], compute_key) =
|
std::tie(infos[0], modules[0], fetch_shader, compute_key) =
|
||||||
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -425,24 +428,26 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
||||||
return module;
|
return module;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram(
|
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
|
||||||
Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) {
|
PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||||
|
Shader::Backend::Bindings& binding) {
|
||||||
const auto runtime_info = BuildRuntimeInfo(stage);
|
const auto runtime_info = BuildRuntimeInfo(stage);
|
||||||
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
||||||
if (new_program) {
|
if (new_program) {
|
||||||
Program* program = program_pool.Create(stage, params);
|
Program* program = program_pool.Create(stage, params);
|
||||||
auto start = binding;
|
auto start = binding;
|
||||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||||
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
|
const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
|
||||||
program->AddPermut(module, std::move(spec));
|
program->AddPermut(module, std::move(spec));
|
||||||
it_pgm.value() = program;
|
it_pgm.value() = program;
|
||||||
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
|
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
|
||||||
|
HashCombine(params.hash, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
Program* program = it_pgm->second;
|
Program* program = it_pgm->second;
|
||||||
auto& info = program->info;
|
auto& info = program->info;
|
||||||
info.RefreshFlatBuf();
|
info.RefreshFlatBuf();
|
||||||
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
|
const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
|
||||||
size_t perm_idx = program->modules.size();
|
size_t perm_idx = program->modules.size();
|
||||||
vk::ShaderModule module{};
|
vk::ShaderModule module{};
|
||||||
|
|
||||||
|
@ -456,7 +461,8 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
|
||||||
module = it->module;
|
module = it->module;
|
||||||
perm_idx = std::distance(program->modules.begin(), it);
|
perm_idx = std::distance(program->modules.begin(), it);
|
||||||
}
|
}
|
||||||
return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx));
|
return std::make_tuple(&info, module, spec.fetch_shader_data,
|
||||||
|
HashCombine(params.hash, perm_idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||||
|
|
|
@ -47,8 +47,10 @@ public:
|
||||||
|
|
||||||
const ComputePipeline* GetComputePipeline();
|
const ComputePipeline* GetComputePipeline();
|
||||||
|
|
||||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(
|
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
|
||||||
Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding);
|
u64>
|
||||||
|
GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||||
|
Shader::Backend::Bindings& binding);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool RefreshGraphicsKey();
|
bool RefreshGraphicsKey();
|
||||||
|
@ -80,6 +82,7 @@ private:
|
||||||
tsl::robin_map<GraphicsPipelineKey, GraphicsPipeline*> graphics_pipelines;
|
tsl::robin_map<GraphicsPipelineKey, GraphicsPipeline*> graphics_pipelines;
|
||||||
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
||||||
std::array<vk::ShaderModule, MaxShaderStages> modules{};
|
std::array<vk::ShaderModule, MaxShaderStages> modules{};
|
||||||
|
std::optional<Shader::Gcn::FetchShaderData> fetch_shader{};
|
||||||
GraphicsPipelineKey graphics_key{};
|
GraphicsPipelineKey graphics_key{};
|
||||||
u64 compute_key{};
|
u64 compute_key{};
|
||||||
};
|
};
|
||||||
|
|
|
@ -187,13 +187,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
||||||
buffer_cache.BindVertexBuffers(vs_info);
|
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||||
|
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
|
||||||
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
|
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
|
||||||
|
|
||||||
BeginRendering(*pipeline, state);
|
BeginRendering(*pipeline, state);
|
||||||
UpdateDynamicState(*pipeline);
|
UpdateDynamicState(*pipeline);
|
||||||
|
|
||||||
const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs);
|
const auto [vertex_offset, instance_offset] = fetch_shader->GetDrawOffsets(regs, vs_info);
|
||||||
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||||
|
@ -243,7 +244,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
||||||
buffer_cache.BindVertexBuffers(vs_info);
|
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||||
|
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
|
||||||
buffer_cache.BindIndexBuffer(is_indexed, 0);
|
buffer_cache.BindIndexBuffer(is_indexed, 0);
|
||||||
|
|
||||||
const auto& [buffer, base] =
|
const auto& [buffer, base] =
|
||||||
|
@ -397,10 +399,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||||
if (!stage) {
|
if (!stage) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (stage->uses_step_rates) {
|
push_data.step0 = regs.vgt_instance_step_rate_0;
|
||||||
push_data.step0 = regs.vgt_instance_step_rate_0;
|
push_data.step1 = regs.vgt_instance_step_rate_1;
|
||||||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
|
||||||
}
|
|
||||||
stage->PushUd(binding, push_data);
|
stage->PushUd(binding, push_data);
|
||||||
|
|
||||||
BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);
|
BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);
|
||||||
|
|
|
@ -87,12 +87,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
|
||||||
range.extent.levels = image.last_level - image.base_level + 1;
|
range.extent.levels = image.last_level - image.base_level + 1;
|
||||||
}
|
}
|
||||||
range.extent.layers = image.last_array - image.base_array + 1;
|
range.extent.layers = image.last_array - image.base_array + 1;
|
||||||
type = ConvertImageViewType(image.GetType());
|
type = ConvertImageViewType(image.GetBoundType());
|
||||||
|
|
||||||
// Adjust view type for partial cubemaps and arrays
|
// Adjust view type for arrays
|
||||||
if (image.IsPartialCubemap()) {
|
|
||||||
type = vk::ImageViewType::e2DArray;
|
|
||||||
}
|
|
||||||
if (type == vk::ImageViewType::eCube) {
|
if (type == vk::ImageViewType::eCube) {
|
||||||
if (desc.is_array) {
|
if (desc.is_array) {
|
||||||
type = vk::ImageViewType::eCubeArray;
|
type = vk::ImageViewType::eCubeArray;
|
||||||
|
|
Loading…
Reference in a new issue