mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-28 02:26:07 +00:00
Tessellation (#1528)
* shader_recompiler: Tessellation WIP * fix compiler errors after merge DONT MERGE set log file to /dev/null DONT MERGE linux pthread bb fix save work DONT MERGE dump ir save more work fix mistake with ES shader skip list add input patch control points dynamic state random stuff * WIP Tessellation partial implementation. Squash commits * test: make local/tcs use attr arrays * attr arrays in TCS/TES * dont define empty attr arrays * switch to special opcodes for tess tcs/tes reads and tcs writes * impl tcs/tes read attr insts * rebase fix * save some work * save work probably broken and slow * put Vertex LogicalStage after TCS and TES to fix bindings * more refactors * refactor pattern matching and optimize modulos (disabled) * enable modulo opt * copyright * rebase fixes * remove some prints * remove some stuff * Add TCS/TES support for shader patching and use LogicalStage * refactor and handle wider DS instructions * get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR * stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs * delete some more stuff * update comments for current implementation * some cleanup * uint error * more cleanup * remove patch control points dynamic state (because runtime_info already depends on it) * fix potential problem with determining passthrough --------- Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com>
This commit is contained in:
parent
3e22622508
commit
3c0c921ef5
|
@ -664,6 +664,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
|
||||
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
|
||||
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
|
||||
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
|
||||
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ir_passes.h
|
||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
||||
|
@ -683,6 +684,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/ir/opcodes.cpp
|
||||
src/shader_recompiler/ir/opcodes.h
|
||||
src/shader_recompiler/ir/opcodes.inc
|
||||
src/shader_recompiler/ir/patch.cpp
|
||||
src/shader_recompiler/ir/patch.h
|
||||
src/shader_recompiler/ir/post_order.cpp
|
||||
src/shader_recompiler/ir/post_order.h
|
||||
src/shader_recompiler/ir/program.cpp
|
||||
|
|
|
@ -177,10 +177,11 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
|||
}
|
||||
}
|
||||
|
||||
void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module,
|
||||
std::span<const u32> spv, std::span<const u32> raw_code,
|
||||
std::span<const u32> patch_spv, bool is_patched) {
|
||||
shader_dump_list.emplace_back(name, module, std::vector<u32>{spv.begin(), spv.end()},
|
||||
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||
vk::ShaderModule module, std::span<const u32> spv,
|
||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||
bool is_patched) {
|
||||
shader_dump_list.emplace_back(name, l_stage, module, std::vector<u32>{spv.begin(), spv.end()},
|
||||
std::vector<u32>{raw_code.begin(), raw_code.end()},
|
||||
std::vector<u32>{patch_spv.begin(), patch_spv.end()}, is_patched);
|
||||
}
|
||||
|
|
|
@ -76,6 +76,7 @@ struct FrameDump {
|
|||
|
||||
struct ShaderDump {
|
||||
std::string name;
|
||||
Shader::LogicalStage l_stage;
|
||||
vk::ShaderModule module;
|
||||
|
||||
std::vector<u32> spv;
|
||||
|
@ -90,16 +91,17 @@ struct ShaderDump {
|
|||
std::string cache_isa_disasm{};
|
||||
std::string cache_patch_disasm{};
|
||||
|
||||
ShaderDump(std::string name, vk::ShaderModule module, std::vector<u32> spv,
|
||||
std::vector<u32> isa, std::vector<u32> patch_spv, bool is_patched)
|
||||
: name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)),
|
||||
patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
|
||||
ShaderDump(std::string name, Shader::LogicalStage l_stage, vk::ShaderModule module,
|
||||
std::vector<u32> spv, std::vector<u32> isa, std::vector<u32> patch_spv,
|
||||
bool is_patched)
|
||||
: name(std::move(name)), l_stage(l_stage), module(module), spv(std::move(spv)),
|
||||
isa(std::move(isa)), patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
|
||||
|
||||
ShaderDump(const ShaderDump& other) = delete;
|
||||
ShaderDump(ShaderDump&& other) noexcept
|
||||
: name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)},
|
||||
isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)},
|
||||
patch_source{std::move(other.patch_source)},
|
||||
: name{std::move(other.name)}, l_stage(other.l_stage), module{std::move(other.module)},
|
||||
spv{std::move(other.spv)}, isa{std::move(other.isa)},
|
||||
patch_spv{std::move(other.patch_spv)}, patch_source{std::move(other.patch_source)},
|
||||
cache_spv_disasm{std::move(other.cache_spv_disasm)},
|
||||
cache_isa_disasm{std::move(other.cache_isa_disasm)},
|
||||
cache_patch_disasm{std::move(other.cache_patch_disasm)} {}
|
||||
|
@ -108,6 +110,7 @@ struct ShaderDump {
|
|||
if (this == &other)
|
||||
return *this;
|
||||
name = std::move(other.name);
|
||||
l_stage = other.l_stage;
|
||||
module = std::move(other.module);
|
||||
spv = std::move(other.spv);
|
||||
isa = std::move(other.isa);
|
||||
|
@ -203,7 +206,8 @@ public:
|
|||
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||
const AmdGpu::Liverpool::Regs& regs, bool is_compute = false);
|
||||
|
||||
void CollectShader(const std::string& name, vk::ShaderModule module, std::span<const u32> spv,
|
||||
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||
vk::ShaderModule module, std::span<const u32> spv,
|
||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||
bool is_patched);
|
||||
};
|
||||
|
|
|
@ -158,16 +158,17 @@ bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) {
|
|||
DebugState.ShowDebugMessage(msg);
|
||||
}
|
||||
if (compile) {
|
||||
static std::map<std::string, std::string> stage_arg = {
|
||||
{"vs", "vert"},
|
||||
{"gs", "geom"},
|
||||
{"fs", "frag"},
|
||||
{"cs", "comp"},
|
||||
static std::map<Shader::LogicalStage, std::string> stage_arg = {
|
||||
{Shader::LogicalStage::Vertex, "vert"},
|
||||
{Shader::LogicalStage::TessellationControl, "tesc"},
|
||||
{Shader::LogicalStage::TessellationEval, "tese"},
|
||||
{Shader::LogicalStage::Geometry, "geom"},
|
||||
{Shader::LogicalStage::Fragment, "frag"},
|
||||
{Shader::LogicalStage::Compute, "comp"},
|
||||
};
|
||||
auto stage = stage_arg.find(value.name.substr(0, 2));
|
||||
auto stage = stage_arg.find(value.l_stage);
|
||||
if (stage == stage_arg.end()) {
|
||||
DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} +
|
||||
value.name.substr(0, 2));
|
||||
DebugState.ShowDebugMessage(std::string{"Invalid shader stage"});
|
||||
} else {
|
||||
std::string cmd =
|
||||
fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 "
|
||||
|
|
|
@ -1642,7 +1642,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) {
|
|||
|
||||
s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
if (!cmdbuf || size < 0x1E) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -1660,11 +1659,13 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
|
|||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
|
||||
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
|
||||
hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u,
|
||||
hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL
|
||||
hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG
|
||||
|
||||
// right padding?
|
||||
WriteTrailingNop<11>(cmdbuf);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <span>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
@ -13,6 +12,7 @@
|
|||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
|
|||
return arg.VectorReg();
|
||||
} else if constexpr (std::is_same_v<ArgType, const char*>) {
|
||||
return arg.StringLiteral();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
|
||||
return arg.Patch();
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <auto func, bool is_first_arg_inst, size_t... I>
|
||||
|
@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
|
|||
return main;
|
||||
}
|
||||
|
||||
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
|
||||
switch (primitive) {
|
||||
case AmdGpu::TessellationType::Isoline:
|
||||
return spv::ExecutionMode::Isolines;
|
||||
case AmdGpu::TessellationType::Triangle:
|
||||
return spv::ExecutionMode::Triangles;
|
||||
case AmdGpu::TessellationType::Quad:
|
||||
return spv::ExecutionMode::Quads;
|
||||
}
|
||||
UNREACHABLE_MSG("Tessellation primitive {}", primitive);
|
||||
}
|
||||
|
||||
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
|
||||
switch (spacing) {
|
||||
case AmdGpu::TessellationPartitioning::Integer:
|
||||
return spv::ExecutionMode::SpacingEqual;
|
||||
case AmdGpu::TessellationPartitioning::FracOdd:
|
||||
return spv::ExecutionMode::SpacingFractionalOdd;
|
||||
case AmdGpu::TessellationPartitioning::FracEven:
|
||||
return spv::ExecutionMode::SpacingFractionalEven;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Tessellation spacing {}", spacing);
|
||||
}
|
||||
|
||||
void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
|
||||
ctx.AddCapability(spv::Capability::Image1D);
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
|
@ -248,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||
if (info.uses_group_ballot) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
||||
}
|
||||
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
|
||||
const auto stage = info.l_stage;
|
||||
if (stage == LogicalStage::Vertex) {
|
||||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||
}
|
||||
if (info.stage == Stage::Geometry) {
|
||||
if (stage == LogicalStage::Geometry) {
|
||||
ctx.AddCapability(spv::Capability::Geometry);
|
||||
}
|
||||
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
|
||||
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
|
||||
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
|
||||
}
|
||||
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
|
||||
ctx.AddCapability(spv::Capability::Tessellation);
|
||||
}
|
||||
}
|
||||
|
||||
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
const auto& info = program.info;
|
||||
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
||||
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
||||
spv::ExecutionModel execution_model{};
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
switch (info.l_stage) {
|
||||
case LogicalStage::Compute: {
|
||||
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
||||
execution_model = spv::ExecutionModel::GLCompute;
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||
workgroup_size[1], workgroup_size[2]);
|
||||
break;
|
||||
}
|
||||
case Stage::Export:
|
||||
case Stage::Vertex:
|
||||
case LogicalStage::Vertex:
|
||||
execution_model = spv::ExecutionModel::Vertex;
|
||||
break;
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::TessellationControl:
|
||||
execution_model = spv::ExecutionModel::TessellationControl;
|
||||
ctx.AddCapability(spv::Capability::Tessellation);
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
||||
ctx.runtime_info.hs_info.NumOutputControlPoints());
|
||||
break;
|
||||
case LogicalStage::TessellationEval: {
|
||||
execution_model = spv::ExecutionModel::TessellationEvaluation;
|
||||
const auto& vs_info = ctx.runtime_info.vs_info;
|
||||
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
|
||||
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
|
||||
ctx.AddExecutionMode(main,
|
||||
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
|
||||
? spv::ExecutionMode::VertexOrderCcw
|
||||
: spv::ExecutionMode::VertexOrderCw);
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Fragment:
|
||||
execution_model = spv::ExecutionModel::Fragment;
|
||||
if (ctx.profile.lower_left_origin_mode) {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
|
||||
|
@ -292,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
case LogicalStage::Geometry:
|
||||
execution_model = spv::ExecutionModel::Geometry;
|
||||
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
|
||||
ctx.AddExecutionMode(main,
|
||||
|
@ -303,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
ctx.runtime_info.gs_info.num_invocations);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
||||
UNREACHABLE_MSG("Stage {}", u32(info.stage));
|
||||
}
|
||||
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
||||
}
|
||||
|
@ -349,7 +397,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
|
|||
const IR::Program& program, Bindings& binding) {
|
||||
EmitContext ctx{profile, runtime_info, program.info, binding};
|
||||
const Id main{DefineMain(ctx, program)};
|
||||
DefineEntryPoint(program, ctx, main);
|
||||
DefineEntryPoint(program.info, ctx, main);
|
||||
SetupCapabilities(program.info, profile, ctx);
|
||||
SetupFloatMode(ctx, profile, runtime_info, main);
|
||||
PatchPhiNodes(program, ctx);
|
||||
|
|
|
@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
|
|||
|
||||
void EmitBarrier(EmitContext& ctx) {
|
||||
const auto execution{spv::Scope::Workgroup};
|
||||
const auto memory{spv::Scope::Workgroup};
|
||||
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
|
||||
spv::MemorySemanticsMask::WorkgroupMemory};
|
||||
spv::Scope memory;
|
||||
spv::MemorySemanticsMask memory_semantics;
|
||||
if (ctx.l_stage == Shader::LogicalStage::TessellationControl) {
|
||||
memory = spv::Scope::Invocation;
|
||||
memory_semantics = spv::MemorySemanticsMask::MaskNone;
|
||||
} else {
|
||||
memory = spv::Scope::Workgroup;
|
||||
memory_semantics =
|
||||
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory;
|
||||
}
|
||||
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
|
||||
ctx.ConstU32(static_cast<u32>(memory)),
|
||||
ctx.ConstU32(static_cast<u32>(memory_semantics)));
|
||||
|
|
|
@ -4,6 +4,9 @@
|
|||
#include "common/assert.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
#include <magic_enum/magic_enum.hpp>
|
||||
|
||||
|
@ -45,13 +48,19 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
|||
|
||||
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& info{ctx.output_params.at(index)};
|
||||
ASSERT(info.num_components > 0);
|
||||
if (info.num_components == 1) {
|
||||
return info.id;
|
||||
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
|
||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||
return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
|
||||
ctx.ConstU32(element));
|
||||
} else {
|
||||
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||
const auto& info{ctx.output_params.at(attr_index)};
|
||||
ASSERT(info.num_components > 0);
|
||||
if (info.num_components == 1) {
|
||||
return info.id;
|
||||
} else {
|
||||
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (IR::IsMrt(attr)) {
|
||||
|
@ -82,9 +91,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
|
||||
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& info{ctx.output_params.at(index)};
|
||||
return {info.component_type, info.is_integer};
|
||||
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
|
||||
return {ctx.F32[1], false};
|
||||
} else {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& info{ctx.output_params.at(index)};
|
||||
return {info.component_type, info.is_integer};
|
||||
}
|
||||
}
|
||||
if (IR::IsMrt(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
|
||||
|
@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
|||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||
}
|
||||
|
||||
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
if (IR::IsPosition(attr)) {
|
||||
ASSERT(attr == IR::Attribute::Position0);
|
||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{
|
||||
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
|
||||
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
|
@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
|
|||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto param = ctx.input_params.at(param_id).id;
|
||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
|
||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
|
@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||
if (ctx.info.stage == Stage::Geometry) {
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
||||
if (ctx.info.l_stage == LogicalStage::Geometry) {
|
||||
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
|
||||
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
|
||||
ctx.info.l_stage == LogicalStage::TessellationEval) {
|
||||
if (IR::IsTessCoord(attr)) {
|
||||
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
|
||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
const auto pointer{
|
||||
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
|
||||
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||
} else if (IR::IsParam(attr)) {
|
||||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto param = ctx.input_params.at(param_id).id;
|
||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (IR::IsParam(attr)) {
|
||||
|
@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
|||
}
|
||||
return coord;
|
||||
}
|
||||
case IR::Attribute::TessellationEvaluationPointU:
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
|
||||
case IR::Attribute::TessellationEvaluationPointV:
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
|
||||
default:
|
||||
throw NotImplementedException("Read attribute {}", attr);
|
||||
UNREACHABLE_MSG("Read attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Attribute::PrimitiveId:
|
||||
ASSERT(ctx.info.stage == Stage::Geometry);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||
case IR::Attribute::InvocationId:
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
|
||||
ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||
case IR::Attribute::PatchVertices:
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
|
||||
case IR::Attribute::PackedHullInvocationInfo: {
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
// [0:8]: patch id within VGT
|
||||
// [8:12]: output control point id
|
||||
// But 0:8 should be treated as 0 for attribute addressing purposes
|
||||
if (ctx.runtime_info.hs_info.IsPassthrough()) {
|
||||
// Gcn shader would run with 1 thread, but we need to run a thread for
|
||||
// each output control point.
|
||||
// If Gcn shader uses this value, we should make sure all threads in the
|
||||
// Vulkan shader use 0
|
||||
return ctx.ConstU32(0u);
|
||||
} else {
|
||||
const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||
return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
|
||||
}
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
UNREACHABLE_MSG("Read U32 attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
|||
}
|
||||
}
|
||||
|
||||
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) {
|
||||
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array,
|
||||
vertex_index, attr_index, comp_index));
|
||||
}
|
||||
|
||||
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
|
||||
// Implied vertex index is invocation_id
|
||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||
Id pointer =
|
||||
ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
|
||||
ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index);
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
|
||||
const u32 index{IR::GenericPatchIndex(patch)};
|
||||
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||
const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32
|
||||
: ctx.input_f32};
|
||||
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
|
||||
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||
}
|
||||
|
||||
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
||||
const Id pointer{[&] {
|
||||
if (IR::IsGeneric(patch)) {
|
||||
const u32 index{IR::GenericPatchIndex(patch)};
|
||||
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
|
||||
}
|
||||
switch (patch) {
|
||||
case IR::Patch::TessellationLodLeft:
|
||||
case IR::Patch::TessellationLodRight:
|
||||
case IR::Patch::TessellationLodTop:
|
||||
case IR::Patch::TessellationLodBottom: {
|
||||
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
|
||||
const Id index_id{ctx.ConstU32(index)};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
|
||||
}
|
||||
case IR::Patch::TessellationLodInteriorU:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Patch::TessellationLodInteriorV:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
|
||||
default:
|
||||
UNREACHABLE_MSG("Patch {}", u32(patch));
|
||||
}
|
||||
}()};
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
namespace Shader::IR {
|
||||
enum class Attribute : u64;
|
||||
enum class ScalarReg : u32;
|
||||
enum class Patch : u64;
|
||||
class Inst;
|
||||
class Value;
|
||||
} // namespace Shader::IR
|
||||
|
@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
|
|||
void EmitReference(EmitContext&);
|
||||
void EmitPhiMove(EmitContext&);
|
||||
void EmitJoin(EmitContext& ctx);
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||
void EmitGetScc(EmitContext& ctx);
|
||||
void EmitGetExec(EmitContext& ctx);
|
||||
void EmitGetVcc(EmitContext& ctx);
|
||||
|
@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
|||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
|
||||
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index);
|
||||
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
|
||||
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
|
||||
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
|
||||
void EmitSetSampleMask(EmitContext& ctx, Id value);
|
||||
void EmitSetFragDepth(EmitContext& ctx, Id value);
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) {
|
|||
case Stage::Compute:
|
||||
return "cs";
|
||||
}
|
||||
throw InvalidArgument("Invalid stage {}", u32(stage));
|
||||
UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
|
||||
}
|
||||
|
||||
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
|
||||
|
@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
|||
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
||||
const Info& info_, Bindings& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||
profile{profile_}, stage{info.stage}, binding{binding_} {
|
||||
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces();
|
||||
|
@ -268,9 +269,8 @@ void EmitContext::DefineInputs() {
|
|||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
switch (stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex: {
|
||||
switch (l_stage) {
|
||||
case LogicalStage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
|
@ -311,12 +311,11 @@ void EmitContext::DefineInputs() {
|
|||
}
|
||||
input_params[attrib.semantic] =
|
||||
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::Fragment:
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
|
@ -351,15 +350,14 @@ void EmitContext::DefineInputs() {
|
|||
}
|
||||
input_params[semantic] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
||||
interfaces.push_back(attr_id);
|
||||
}
|
||||
break;
|
||||
case Stage::Compute:
|
||||
case LogicalStage::Compute:
|
||||
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
||||
local_invocation_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||
break;
|
||||
case Stage::Geometry: {
|
||||
case LogicalStage::Geometry: {
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
const auto gl_per_vertex =
|
||||
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
|
||||
|
@ -389,15 +387,129 @@ void EmitContext::DefineInputs() {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationControl: {
|
||||
invocation_id =
|
||||
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
|
||||
patch_vertices =
|
||||
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
|
||||
const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
||||
input_attr_array = DefineInput(patch_array_type, 0);
|
||||
Name(input_attr_array, "in_attrs");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationEval: {
|
||||
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
|
||||
const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
||||
input_attr_array = DefineInput(patch_array_type, 0);
|
||||
Name(input_attr_array, "in_attrs");
|
||||
}
|
||||
|
||||
u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4;
|
||||
for (size_t index = 0; index < 30; ++index) {
|
||||
if (!(info.uses_patches & (1U << index))) {
|
||||
continue;
|
||||
}
|
||||
const Id id{DefineInput(F32[4], patch_base_location + index)};
|
||||
Decorate(id, spv::Decoration::Patch);
|
||||
Name(id, fmt::format("patch_in{}", index));
|
||||
patches[index] = id;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineOutputs() {
|
||||
switch (stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex: {
|
||||
switch (l_stage) {
|
||||
case LogicalStage::Vertex: {
|
||||
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
|
||||
// Might cause problems linking with tcs
|
||||
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||
info.stores.Get(IR::Attribute::Position2) ||
|
||||
info.stores.Get(IR::Attribute::Position3);
|
||||
if (has_extra_pos_stores) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||
clip_distances =
|
||||
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
||||
cull_distances =
|
||||
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
||||
}
|
||||
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
|
||||
const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
output_attr_array = DefineOutput(type, 0);
|
||||
Name(output_attr_array, "out_attrs");
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < IR::NumParams; i++) {
|
||||
const IR::Attribute param{IR::Attribute::Param0 + i};
|
||||
if (!info.stores.GetAny(param)) {
|
||||
continue;
|
||||
}
|
||||
const u32 num_components = info.stores.NumComponents(param);
|
||||
const Id id{DefineOutput(F32[num_components], i)};
|
||||
Name(id, fmt::format("out_attr{}", i));
|
||||
output_params[i] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationControl: {
|
||||
if (info.stores_tess_level_outer) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(4U))};
|
||||
output_tess_level_outer =
|
||||
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
|
||||
Decorate(output_tess_level_outer, spv::Decoration::Patch);
|
||||
}
|
||||
if (info.stores_tess_level_inner) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(2U))};
|
||||
output_tess_level_inner =
|
||||
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
|
||||
Decorate(output_tess_level_inner, spv::Decoration::Patch);
|
||||
}
|
||||
|
||||
const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
const Id patch_array_type{TypeArray(
|
||||
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
|
||||
output_attr_array = DefineOutput(patch_array_type, 0);
|
||||
Name(output_attr_array, "out_attrs");
|
||||
}
|
||||
|
||||
u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4;
|
||||
for (size_t index = 0; index < 30; ++index) {
|
||||
if (!(info.uses_patches & (1U << index))) {
|
||||
continue;
|
||||
}
|
||||
const Id id{DefineOutput(F32[4], patch_base_location + index)};
|
||||
Decorate(id, spv::Decoration::Patch);
|
||||
Name(id, fmt::format("patch_out{}", index));
|
||||
patches[index] = id;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationEval: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||
info.stores.Get(IR::Attribute::Position2) ||
|
||||
|
@ -419,11 +531,10 @@ void EmitContext::DefineOutputs() {
|
|||
Name(id, fmt::format("out_attr{}", i));
|
||||
output_params[i] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::Fragment:
|
||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||
if (!info.stores.GetAny(mrt)) {
|
||||
|
@ -435,22 +546,22 @@ void EmitContext::DefineOutputs() {
|
|||
const Id id{DefineOutput(type, i)};
|
||||
Name(id, fmt::format("frag_color{}", i));
|
||||
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry: {
|
||||
case LogicalStage::Geometry: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
|
||||
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
||||
const Id id{DefineOutput(F32[4], attr_id)};
|
||||
Name(id, fmt::format("out_attr{}", attr_id));
|
||||
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
case LogicalStage::Compute:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,14 +46,18 @@ public:
|
|||
void DefineBufferOffsets();
|
||||
void DefineInterpolatedAttribs();
|
||||
|
||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||
Decorate(input_id, spv::Decoration::Location, location);
|
||||
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
||||
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||
const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)};
|
||||
if (location) {
|
||||
Decorate(input_id, spv::Decoration::Location, *location);
|
||||
}
|
||||
return input_id;
|
||||
}
|
||||
|
||||
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt) {
|
||||
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
|
||||
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt,
|
||||
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||
const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)};
|
||||
if (location) {
|
||||
Decorate(output_id, spv::Decoration::Location, *location);
|
||||
}
|
||||
|
@ -131,7 +135,8 @@ public:
|
|||
const Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
Stage stage{};
|
||||
Stage stage;
|
||||
LogicalStage l_stage{};
|
||||
|
||||
Id void_id{};
|
||||
Id U8{};
|
||||
|
@ -188,8 +193,15 @@ public:
|
|||
Id clip_distances{};
|
||||
Id cull_distances{};
|
||||
|
||||
Id patch_vertices{};
|
||||
Id output_tess_level_outer{};
|
||||
Id output_tess_level_inner{};
|
||||
Id tess_coord;
|
||||
std::array<Id, 30> patches{};
|
||||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id image_u32{};
|
||||
|
||||
|
@ -252,6 +264,8 @@ public:
|
|||
bool is_loaded{};
|
||||
s32 buffer_handle{-1};
|
||||
};
|
||||
Id input_attr_array;
|
||||
Id output_attr_array;
|
||||
std::array<SpirvAttribute, IR::NumParams> input_params{};
|
||||
std::array<SpirvAttribute, IR::NumParams> output_params{};
|
||||
std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};
|
||||
|
|
38
src/shader_recompiler/frontend/tessellation.h
Normal file
38
src/shader_recompiler/frontend/tessellation.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct TessellationDataConstantBuffer {
|
||||
u32 ls_stride;
|
||||
u32 hs_cp_stride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride :
|
||||
// ls_stride
|
||||
u32 num_patches; // num patches submitted in threadgroup
|
||||
u32 hs_output_base; // HullStateConstants::m_numInputCP::m_cpStride != 0 ?
|
||||
// HullStateConstants::m_numInputCP * ls_stride * num_patches : 0
|
||||
// basically 0 when passthrough
|
||||
u32 patch_const_size; // 16 * num_patch_attrs
|
||||
u32 patch_const_base; // hs_output_base + patch_output_size
|
||||
u32 patch_output_size; // output_cp_stride * num_output_cp_per_patch
|
||||
f32 off_chip_tessellation_factor_threshold;
|
||||
u32 first_edge_tess_factor_index;
|
||||
};
|
||||
|
||||
// Assign names to dword fields of TessellationDataConstantBuffer
|
||||
enum class TessConstantAttribute : u32 {
|
||||
LsStride,
|
||||
HsCpStride,
|
||||
HsNumPatch,
|
||||
HsOutputBase,
|
||||
PatchConstSize,
|
||||
PatchConstBase,
|
||||
PatchOutputSize,
|
||||
OffChipTessellationFactorThreshold,
|
||||
FirstEdgeTessFactorIndex,
|
||||
};
|
||||
|
||||
} // namespace Shader
|
|
@ -1,8 +1,8 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
|
@ -73,10 +73,11 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||
const IR::U32 value{GetSrc(inst.src[0])};
|
||||
|
||||
if (info.stage != Stage::Compute) {
|
||||
SetDst(inst.dst[0], value);
|
||||
} else {
|
||||
if (info.l_stage == LogicalStage::Compute ||
|
||||
info.l_stage == LogicalStage::TessellationControl) {
|
||||
SetDst(inst.dst[0], ir.ReadFirstLane(value));
|
||||
} else {
|
||||
SetDst(inst.dst[0], value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <bit>
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
@ -78,8 +80,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||
return S_BFM_B32(inst);
|
||||
case Opcode::S_MUL_I32:
|
||||
return S_MUL_I32(inst);
|
||||
case Opcode::S_BFE_I32:
|
||||
return S_BFE(inst, true);
|
||||
case Opcode::S_BFE_U32:
|
||||
return S_BFE_U32(inst);
|
||||
return S_BFE(inst, false);
|
||||
case Opcode::S_ABSDIFF_I32:
|
||||
return S_ABSDIFF_I32(inst);
|
||||
|
||||
|
@ -434,12 +438,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
|
||||
}
|
||||
|
||||
void Translator::S_BFE_U32(const GcnInst& inst) {
|
||||
void Translator::S_BFE(const GcnInst& inst, bool is_signed) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
|
||||
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
|
||||
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
|
||||
const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
@ -34,9 +36,8 @@ void Translator::EmitPrologue() {
|
|||
}
|
||||
|
||||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||
switch (info.stage) {
|
||||
case Stage::Vertex:
|
||||
case Stage::Export:
|
||||
switch (info.l_stage) {
|
||||
case LogicalStage::Vertex:
|
||||
// v0: vertex ID, always present
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||
// v1: instance ID, step rate 0
|
||||
|
@ -52,7 +53,7 @@ void Translator::EmitPrologue() {
|
|||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
||||
}
|
||||
break;
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::Fragment:
|
||||
dst_vreg = IR::VectorReg::V0;
|
||||
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
||||
++dst_vreg; // I
|
||||
|
@ -122,7 +123,30 @@ void Translator::EmitPrologue() {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case Stage::Compute:
|
||||
case LogicalStage::TessellationControl: {
|
||||
// Should be laid out like:
|
||||
// [0:8]: patch id within VGT
|
||||
// [8:12]: output control point id
|
||||
ir.SetVectorReg(IR::VectorReg::V1,
|
||||
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
|
||||
// TODO PrimitiveId is probably V2 but haven't seen it yet
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationEval:
|
||||
ir.SetVectorReg(IR::VectorReg::V0,
|
||||
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
|
||||
ir.SetVectorReg(IR::VectorReg::V1,
|
||||
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
|
||||
// V2 is similar to PrimitiveID but not the same. It seems to only be used in
|
||||
// compiler-generated address calculations. Its probably the patch id within the
|
||||
// patches running locally on a given VGT (or CU, whichever is the granularity of LDS
|
||||
// memory)
|
||||
// Set to 0. See explanation in comment describing hull/domain passes
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u));
|
||||
// V3 is the actual PrimitiveID as intended by the shader author.
|
||||
ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
case LogicalStage::Compute:
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||
|
@ -137,7 +161,7 @@ void Translator::EmitPrologue() {
|
|||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
case LogicalStage::Geometry:
|
||||
switch (runtime_info.gs_info.out_primitive[0]) {
|
||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
||||
|
@ -152,7 +176,7 @@ void Translator::EmitPrologue() {
|
|||
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Unknown shader stage");
|
||||
UNREACHABLE_MSG("Unknown shader stage");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -503,7 +527,8 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
|
|||
|
||||
// Special case for emitting fetch shader.
|
||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
||||
info.stage == Stage::Local);
|
||||
translator.EmitFetch(inst);
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -94,7 +94,8 @@ public:
|
|||
void S_ASHR_I32(const GcnInst& inst);
|
||||
void S_BFM_B32(const GcnInst& inst);
|
||||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_BFE_U32(const GcnInst& inst);
|
||||
void S_BFE(const GcnInst& inst, bool is_signed);
|
||||
void S_BFE_I32(const GcnInst& inst);
|
||||
void S_ABSDIFF_I32(const GcnInst& inst);
|
||||
void S_NOT_B32(const GcnInst& inst);
|
||||
|
||||
|
@ -217,7 +218,7 @@ public:
|
|||
|
||||
// VOP3a
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false);
|
||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
|
||||
void V_MAD_U32_U24(const GcnInst& inst);
|
||||
void V_CUBEID_F32(const GcnInst& inst);
|
||||
void V_CUBESC_F32(const GcnInst& inst);
|
||||
|
|
|
@ -1060,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
||||
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
|
||||
IR::U32 src1{GetSrc(inst.src[1])};
|
||||
IR::U32 src2{GetSrc(inst.src[2])};
|
||||
if (!src1.IsImmediate()) {
|
||||
src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F));
|
||||
}
|
||||
if (!src2.IsImmediate()) {
|
||||
src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F));
|
||||
}
|
||||
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
|
||||
}
|
||||
|
||||
|
|
|
@ -189,7 +189,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
buffer_info.globally_coherent.Assign(mtbuf.glc);
|
||||
buffer_info.system_coherent.Assign(mtbuf.slc);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
|
@ -247,11 +248,15 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
|
||||
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
|
||||
if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||
"Non immediate offset not supported");
|
||||
}
|
||||
|
||||
if (info.stage == Stage::Hull) {
|
||||
// printf("here\n"); // break
|
||||
}
|
||||
|
||||
IR::Value address = [&] -> IR::Value {
|
||||
if (is_ring) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||
|
@ -269,7 +274,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
buffer_info.globally_coherent.Assign(mtbuf.glc);
|
||||
buffer_info.system_coherent.Assign(mtbuf.slc);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "common/types.h"
|
||||
#include "shader_recompiler/backend/bindings.h"
|
||||
#include "shader_recompiler/frontend/copy_shader.h"
|
||||
#include "shader_recompiler/frontend/tessellation.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
|
@ -163,6 +164,7 @@ struct Info {
|
|||
UserDataMask ud_mask{};
|
||||
|
||||
CopyShaderData gs_copy_data;
|
||||
u32 uses_patches{};
|
||||
|
||||
BufferResourceList buffers;
|
||||
TextureBufferResourceList texture_buffers;
|
||||
|
@ -173,8 +175,12 @@ struct Info {
|
|||
PersistentSrtInfo srt_info;
|
||||
std::vector<u32> flattened_ud_buf;
|
||||
|
||||
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
|
||||
s32 tess_consts_dword_offset = -1;
|
||||
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
LogicalStage l_stage;
|
||||
|
||||
u64 pgm_hash{};
|
||||
VAddr pgm_base;
|
||||
|
@ -190,14 +196,16 @@ struct Info {
|
|||
bool uses_shared{};
|
||||
bool uses_fp16{};
|
||||
bool uses_fp64{};
|
||||
bool stores_tess_level_outer{};
|
||||
bool stores_tess_level_inner{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
bool has_readconst{};
|
||||
u8 mrt_mask{0u};
|
||||
bool has_fetch_shader{false};
|
||||
u32 fetch_shader_sgpr_base{0u};
|
||||
|
||||
explicit Info(Stage stage_, ShaderParams params)
|
||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
|
||||
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
user_data{params.user_data} {}
|
||||
|
||||
template <typename T>
|
||||
|
@ -244,6 +252,16 @@ struct Info {
|
|||
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
|
||||
}
|
||||
}
|
||||
|
||||
void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const {
|
||||
ASSERT(tess_consts_dword_offset >= 0); // We've already tracked the V# UD
|
||||
auto buf = ReadUdReg<AmdGpu::Buffer>(static_cast<u32>(tess_consts_ptr_base),
|
||||
static_cast<u32>(tess_consts_dword_offset));
|
||||
VAddr tess_constants_addr = buf.base_address;
|
||||
memcpy(&tess_constants,
|
||||
reinterpret_cast<TessellationDataConstantBuffer*>(tess_constants_addr),
|
||||
sizeof(tess_constants));
|
||||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||
|
|
|
@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) {
|
|||
return "VertexId";
|
||||
case Attribute::InstanceId:
|
||||
return "InstanceId";
|
||||
case Attribute::PrimitiveId:
|
||||
return "PrimitiveId";
|
||||
case Attribute::FragCoord:
|
||||
return "FragCoord";
|
||||
case Attribute::IsFrontFace:
|
||||
|
@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) {
|
|||
return "LocalInvocationId";
|
||||
case Attribute::LocalInvocationIndex:
|
||||
return "LocalInvocationIndex";
|
||||
case Attribute::InvocationId:
|
||||
return "InvocationId";
|
||||
case Attribute::PatchVertices:
|
||||
return "PatchVertices";
|
||||
case Attribute::TessellationEvaluationPointU:
|
||||
return "TessellationEvaluationPointU";
|
||||
case Attribute::TessellationEvaluationPointV:
|
||||
return "TessellationEvaluationPointV";
|
||||
case Attribute::PackedHullInvocationInfo:
|
||||
return "PackedHullInvocationInfo";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -72,8 +72,13 @@ enum class Attribute : u64 {
|
|||
LocalInvocationId = 75,
|
||||
LocalInvocationIndex = 76,
|
||||
FragCoord = 77,
|
||||
InstanceId0 = 78, // step rate 0
|
||||
InstanceId1 = 79, // step rate 1
|
||||
InstanceId0 = 78, // step rate 0
|
||||
InstanceId1 = 79, // step rate 1
|
||||
InvocationId = 80, // TCS id in output patch and instanced geometry shader id
|
||||
PatchVertices = 81,
|
||||
TessellationEvaluationPointU = 82,
|
||||
TessellationEvaluationPointV = 83,
|
||||
PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID
|
||||
Max,
|
||||
};
|
||||
|
||||
|
@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept {
|
|||
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
|
||||
}
|
||||
|
||||
constexpr bool IsTessCoord(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::TessellationEvaluationPointU &&
|
||||
attribute <= Attribute::TessellationEvaluationPointV;
|
||||
}
|
||||
|
||||
constexpr bool IsParam(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
|
|
@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size
|
|||
return fmt::format("{}", arg.VectorReg());
|
||||
case Type::Attribute:
|
||||
return fmt::format("{}", arg.Attribute());
|
||||
case Type::Patch:
|
||||
return fmt::format("{}", arg.Patch());
|
||||
default:
|
||||
return "<unknown immediate type>";
|
||||
}
|
||||
|
|
|
@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) {
|
|||
Inst(Opcode::SetM0, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||
|
@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
|||
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
||||
}
|
||||
|
||||
F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
|
||||
const U32& comp_index) {
|
||||
return Inst<F32>(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index);
|
||||
}
|
||||
|
||||
void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index,
|
||||
const U32& comp_index) {
|
||||
Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetPatch(Patch patch) {
|
||||
return Inst<F32>(Opcode::GetPatch, patch);
|
||||
}
|
||||
|
||||
void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||
Inst(Opcode::SetPatch, patch, value);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 32:
|
||||
|
@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
|
|||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeConstruct(std::span<const Value> elements) {
|
||||
switch (elements.size()) {
|
||||
case 2:
|
||||
return CompositeConstruct(elements[0], elements[1]);
|
||||
case 3:
|
||||
return CompositeConstruct(elements[0], elements[1], elements[2]);
|
||||
case 4:
|
||||
return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]);
|
||||
default:
|
||||
UNREACHABLE_MSG("Composite construct with greater than 4 elements");
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
|
||||
const auto read{[&](Opcode opcode, size_t limit) -> Value {
|
||||
if (element >= limit) {
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/condition.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
@ -80,10 +81,18 @@ public:
|
|||
|
||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
|
||||
IR::Value index = IR::Value(u32(0u)));
|
||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
[[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
|
||||
const U32& comp_index);
|
||||
void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index);
|
||||
|
||||
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||
void SetPatch(Patch patch, const F32& value);
|
||||
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
|
@ -138,6 +147,8 @@ public:
|
|||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
|
||||
const Value& e4);
|
||||
[[nodiscard]] Value CompositeConstruct(std::span<const Value> values);
|
||||
|
||||
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
|
||||
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
|
||||
|
||||
|
@ -335,6 +346,7 @@ private:
|
|||
template <typename T = Value, typename... Args>
|
||||
T Inst(Opcode op, Args... args) {
|
||||
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
|
||||
it->SetParent(block);
|
||||
return T{Value{&*it}};
|
||||
}
|
||||
|
||||
|
@ -352,6 +364,7 @@ private:
|
|||
u32 raw_flags{};
|
||||
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
|
||||
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
|
||||
it->SetParent(block);
|
||||
return T{Value{&*it}};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::Discard:
|
||||
case Opcode::DiscardCond:
|
||||
case Opcode::SetAttribute:
|
||||
case Opcode::SetTcsGenericAttribute:
|
||||
case Opcode::SetPatch:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::StoreBufferU32x2:
|
||||
case Opcode::StoreBufferU32x3:
|
||||
|
|
|
@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque};
|
|||
constexpr Type ScalarReg{Type::ScalarReg};
|
||||
constexpr Type VectorReg{Type::VectorReg};
|
||||
constexpr Type Attribute{Type::Attribute};
|
||||
constexpr Type SystemValue{Type::SystemValue};
|
||||
constexpr Type Patch{Type::Patch};
|
||||
constexpr Type U1{Type::U1};
|
||||
constexpr Type U8{Type::U8};
|
||||
constexpr Type U16{Type::U16};
|
||||
|
|
|
@ -60,6 +60,10 @@ OPCODE(SetGotoVariable, Void, U32,
|
|||
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
|
||||
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
OPCODE(GetPatch, F32, Patch, )
|
||||
OPCODE(SetPatch, Void, Patch, F32, )
|
||||
OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, )
|
||||
OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, )
|
||||
|
||||
// Flags
|
||||
OPCODE(GetScc, U1, Void, )
|
||||
|
|
|
@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FoldMul(IR::Block& block, IR::Inst& inst) {
|
||||
if (!FoldCommutative<T>(inst, [](T a, T b) { return a * b; })) {
|
||||
return;
|
||||
}
|
||||
const IR::Value rhs{inst.Arg(1)};
|
||||
if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
|
||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||
|
@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
|
||||
return;
|
||||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
FoldMul<u32>(block, inst);
|
||||
return;
|
||||
case IR::Opcode::UDiv32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
|
||||
ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0");
|
||||
return a / b;
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::UMod32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
|
||||
ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0");
|
||||
return a % b;
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::FPCmpClass32:
|
||||
FoldCmpClass(block, inst);
|
||||
|
|
4
src/shader_recompiler/ir/passes/constant_propogation.h
Normal file
4
src/shader_recompiler/ir/passes/constant_propogation.h
Normal file
|
@ -0,0 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
744
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
744
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
|
@ -0,0 +1,744 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/pattern_matching.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
/**
|
||||
* Tessellation shaders pass outputs to the next shader using LDS.
|
||||
* The Hull shader stage receives input control points stored in LDS.
|
||||
*
|
||||
* These passes attempt to resolve LDS accesses to attribute accesses and correctly
|
||||
* write to the tessellation factor tables.
|
||||
*
|
||||
* The LDS layout is:
|
||||
* - TCS inputs for patch 0
|
||||
* - TCS inputs for patch 1
|
||||
* - TCS inputs for patch 2
|
||||
* - ...
|
||||
* - TCS outputs for patch 0
|
||||
* - TCS outputs for patch 1
|
||||
* - TCS outputs for patch 2
|
||||
* - ...
|
||||
* - PatchConst TCS outputs for patch 0
|
||||
* - PatchConst TCS outputs for patch 1
|
||||
* - PatchConst TCS outputs for patch 2
|
||||
*
|
||||
*
|
||||
* If the Hull stage does not write any new control points the driver will
|
||||
* optimize LDS layout so input and output control point spaces overlap.
|
||||
* (Passthrough)
|
||||
*
|
||||
* The gnm driver requires a V# holding special constants to be bound
|
||||
* for reads by the shader.
|
||||
* The Hull and Domain shaders read values from this buffer which
|
||||
* contain size and offset information required to address input, output,
|
||||
* or PatchConst attributes within the current patch.
|
||||
* See the TessellationDataConstantBuffer struct to see the layout of this V#.
|
||||
*
|
||||
* Tessellation factors are stored to a special tessellation factor V# that is automatically bound
|
||||
* by the driver. This is the input to the fixed function tessellator that actually subdivides the
|
||||
* domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull
|
||||
* shader.
|
||||
* The offset into the tess factor buffer determines which factor the shader is writing.
|
||||
* Additionally, most hull shaders seem to redundantly write tess factors to PatchConst
|
||||
* attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes.
|
||||
*
|
||||
* LDS reads in the Hull shader can be from input control points, and in the the Domain shader can
|
||||
* be hs output control points (output from the perspective of the Hull shader) and patchconst
|
||||
* values.
|
||||
* LDS stores in the Hull shader can either be output control point writes or per-patch
|
||||
* (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its
|
||||
* followed by the geometry stage (but we havent seen this yet), so nothing special there.
|
||||
* The address calculations can vary significantly and can't be easily pattern matched. We are at
|
||||
* the mercy of instruction selection the ps4 compiler wanted to use.
|
||||
* Generally though, they could look something like this:
|
||||
* Input control point:
|
||||
* addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride
|
||||
* + attr# * 16 + component
|
||||
* Output control point:
|
||||
* addr = #patches * input_cp_stride * #input_cp_per_patch
|
||||
* + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride
|
||||
+ attr# * 16 + component
|
||||
* Per patch output:
|
||||
* addr = #patches * input_cp_stride * #cp_per_input_patch
|
||||
* + #patches * output_patch_stride
|
||||
* + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component
|
||||
*
|
||||
* output_patch_stride and output_cp_stride are usually compile time constants in the gcn
|
||||
*
|
||||
* Hull shaders can probably also read output control points corresponding to other threads, like
|
||||
* shared memory (but we havent seen this yet).
|
||||
* ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens.
|
||||
* They should also be able to read PatchConst values,
|
||||
* although not sure if this happens in practice.
|
||||
*
|
||||
* To determine which type of attribute (input, output, patchconst) we the check the users of
|
||||
* TessConstants V# reads to deduce which type of attribute a given load/store to LDS
|
||||
* is touching.
|
||||
*
|
||||
* In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control
|
||||
* point id (InvocationId) are packed in VGPR1 by the driver like
|
||||
* V1 = InvocationId << 8 | PatchIdInVgt
|
||||
* The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine
|
||||
* which is which.
|
||||
*
|
||||
* This pass does not attempt to deduce the exact attribute referenced in a LDS load/store.
|
||||
* Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's,
|
||||
* TES in's, and PatchConst in/out's.
|
||||
*
|
||||
* TCS/TES Input attributes:
|
||||
* We define input attributes using an array in the shader roughly like this:
|
||||
* // equivalent GLSL in TCS
|
||||
* layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES];
|
||||
*
|
||||
* Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#.
|
||||
* We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes.
|
||||
* For TES, the number of attributes comes from hs_cp_stride / 16.
|
||||
* The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input
|
||||
* patch (for Hull) or the hs output patch (for Domain).
|
||||
*
|
||||
* For input reads in TCS or TES, we emit SPIR-V like:
|
||||
* float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2];
|
||||
*
|
||||
* For output writes, we assume the control point index is InvocationId, since high level languages
|
||||
* impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks
|
||||
* like this:
|
||||
* layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES];
|
||||
* out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value;
|
||||
*
|
||||
* NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs
|
||||
* variable.
|
||||
*
|
||||
* Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole
|
||||
* which contains the attributes from many patches. On the other hand, higher level shading
|
||||
* languages restrict attribute access to the patch of the current thread, which is naturally a
|
||||
* restriction in SPIR-V also.
|
||||
* The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many
|
||||
* patches' attributes are just implementation details of the ps4 driver/compiler. To deal with
|
||||
* this, we can replace certain TessConstant V# reads with 0, which only contribute to the base
|
||||
* address of the current patch's attributes in LDS and not the indexes within the local patch.
|
||||
*
|
||||
* (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing
|
||||
* attributes to buffers and not caring about whether they are hs input, hs output, or patchconst
|
||||
* attributes)
|
||||
*
|
||||
*/
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace Shader::Optimiation::PatternMatching;
|
||||
|
||||
static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset,
|
||||
Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||
TessellationDataConstantBuffer& tess_constants) {
|
||||
info.tess_consts_ptr_base = sharp_ptr_base;
|
||||
info.tess_consts_dword_offset = sharp_dword_offset;
|
||||
info.ReadTessConstantBuffer(tess_constants);
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||
} else {
|
||||
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
struct TessSharpLocation {
|
||||
IR::ScalarReg ptr_base;
|
||||
u32 dword_off;
|
||||
};
|
||||
|
||||
std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buffer) {
|
||||
IR::Value sharp_ptr_base;
|
||||
IR::Value sharp_dword_offset;
|
||||
|
||||
IR::Value rv = IR::Value{read_const_buffer};
|
||||
IR::Value handle = read_const_buffer->Arg(0);
|
||||
|
||||
if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(),
|
||||
MatchIgnore(), MatchIgnore())
|
||||
.Match(handle)) {
|
||||
return TessSharpLocation{.ptr_base = IR::ScalarReg::Max,
|
||||
.dword_off = static_cast<u32>(sharp_dword_offset.ScalarReg())};
|
||||
} else if (M_COMPOSITECONSTRUCTU32X4(
|
||||
M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)),
|
||||
MatchIgnore()),
|
||||
MatchImm(sharp_dword_offset)),
|
||||
MatchIgnore(), MatchIgnore(), MatchIgnore())
|
||||
.Match(handle)) {
|
||||
return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(),
|
||||
.dword_off = sharp_dword_offset.U32()};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// Walker that helps deduce what type of attribute a DS instruction is reading
|
||||
// or writing, which could be an input control point, output control point,
|
||||
// or per-patch constant (PatchConst).
|
||||
// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users
|
||||
// recursively and increment a counter on the Load/WriteShared users.
|
||||
// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase),
|
||||
// and PatchConstBase (m_patchConstBase).
|
||||
// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch
|
||||
// is used as an addend to skip the region for input control points, and similarly
|
||||
// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region
|
||||
// for output control points.
|
||||
//
|
||||
// TODO: this will break if AMD compiler used distributive property like
|
||||
// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch)
|
||||
class TessConstantUseWalker {
|
||||
public:
|
||||
void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) {
|
||||
u32 inc;
|
||||
switch (attr) {
|
||||
case TessConstantAttribute::HsNumPatch:
|
||||
case TessConstantAttribute::HsOutputBase:
|
||||
inc = 1;
|
||||
break;
|
||||
case TessConstantAttribute::PatchConstBase:
|
||||
inc = 2;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
for (IR::Use use : read_const_buffer->Uses()) {
|
||||
MarkTessAttributeUsersHelper(use, inc);
|
||||
}
|
||||
|
||||
++seq_num;
|
||||
}
|
||||
|
||||
private:
|
||||
void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) {
|
||||
IR::Inst* inst = use.user;
|
||||
|
||||
switch (use.user->GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
u32 counter = inst->Flags<u32>();
|
||||
inst->SetFlags<u32>(counter + inc);
|
||||
// Stop here
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::Phi: {
|
||||
struct PhiCounter {
|
||||
u16 seq_num;
|
||||
u8 unique_edge;
|
||||
u8 counter;
|
||||
};
|
||||
|
||||
PhiCounter count = inst->Flags<PhiCounter>();
|
||||
ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand);
|
||||
// the point of seq_num is to tell us if we've already traversed this
|
||||
// phi on the current walk. Alternatively we could keep a set of phi's
|
||||
// seen on the current walk. This is to handle phi cycles
|
||||
if (count.seq_num == 0) {
|
||||
// First time we've encountered this phi
|
||||
count.seq_num = seq_num;
|
||||
// Mark the phi as having been traversed originally through this edge
|
||||
count.unique_edge = use.operand;
|
||||
count.counter = inc;
|
||||
} else if (count.seq_num < seq_num) {
|
||||
count.seq_num = seq_num;
|
||||
// For now, assume we are visiting this phi via the same edge
|
||||
// as on other walks. If not, some dataflow analysis might be necessary
|
||||
ASSERT(count.unique_edge == use.operand);
|
||||
count.counter += inc;
|
||||
} else {
|
||||
// count.seq_num == seq_num
|
||||
// there's a cycle, and we've already been here on this walk
|
||||
return;
|
||||
}
|
||||
inst->SetFlags<PhiCounter>(count);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (IR::Use use : inst->Uses()) {
|
||||
MarkTessAttributeUsersHelper(use, inc);
|
||||
}
|
||||
}
|
||||
|
||||
u32 seq_num{1u};
|
||||
};
|
||||
|
||||
enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
|
||||
|
||||
static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info,
|
||||
const Shader::RuntimeInfo& runtime_info) {
|
||||
u32 count = ring_access->Flags<u32>();
|
||||
if (count == 0) {
|
||||
return AttributeRegion::InputCP;
|
||||
} else if (info.l_stage == LogicalStage::TessellationControl &&
|
||||
runtime_info.hs_info.IsPassthrough()) {
|
||||
ASSERT(count <= 1);
|
||||
return AttributeRegion::PatchConst;
|
||||
} else {
|
||||
ASSERT(count <= 2);
|
||||
return AttributeRegion(count);
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsDivisibleByStride(IR::Value term, u32 stride) {
|
||||
IR::Value a, b;
|
||||
if (MatchU32(stride).Match(term)) {
|
||||
return true;
|
||||
} else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) ||
|
||||
M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) {
|
||||
return IsDivisibleByStride(a, stride);
|
||||
} else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) {
|
||||
return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if we can eliminate any addends
|
||||
static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector<IR::U32>& addends) {
|
||||
IR::Value a, b;
|
||||
if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) {
|
||||
bool ret = false;
|
||||
ret = TryOptimizeAddendInModulo(a, stride, addends);
|
||||
ret |= TryOptimizeAddendInModulo(b, stride, addends);
|
||||
return ret;
|
||||
} else if (!IsDivisibleByStride(addend, stride)) {
|
||||
addends.push_back(IR::U32{addend});
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// In calculation (a + b + ...) % stride
|
||||
// Use this fact
|
||||
// (a + b) mod N = (a mod N + b mod N) mod N
|
||||
// If any addend is divisible by stride, then we can replace it with 0 in the attribute
|
||||
// or component index calculation
|
||||
static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) {
|
||||
std::vector<IR::U32> addends;
|
||||
if (TryOptimizeAddendInModulo(addr, stride, addends)) {
|
||||
addr = ir.Imm32(0);
|
||||
for (auto& addend : addends) {
|
||||
addr = ir.IAdd(addr, addend);
|
||||
}
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
// TODO: can optimize div in control point index similarly to mod
|
||||
|
||||
// Read a TCS input (InputCP region) or TES input (OutputCP region)
|
||||
static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
|
||||
u32 off_dw) {
|
||||
if (off_dw > 0) {
|
||||
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||
}
|
||||
const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride));
|
||||
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||
const IR::U32 attr_index =
|
||||
ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||
const IR::U32 comp_index =
|
||||
ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||
return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
const Info& info = program.info;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4: {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto GetValue = [&](IR::Value data) -> IR::F32 {
|
||||
if (auto* inst = data.TryInstRecursive();
|
||||
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
|
||||
return IR::F32{inst->Arg(0)};
|
||||
}
|
||||
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
|
||||
};
|
||||
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
|
||||
IR::U32 index = IR::U32{inst.Arg(1)};
|
||||
ASSERT(index.IsImmediate());
|
||||
const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2;
|
||||
|
||||
const IR::Value data = inst.Arg(2);
|
||||
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
|
||||
// The hull outputs tess factors in different formats depending on the shader.
|
||||
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
|
||||
// with the 3 edge factors followed by the 1 interior factor.
|
||||
// For quads, it does 4 edge factors then 2 interior.
|
||||
// There is a tess factor stride member of the GNMX hull constants struct in
|
||||
// a hull program shader binary archive, but this doesn't seem to be
|
||||
// communicated to the driver.
|
||||
// The layout seems to be implied by the type of the abstract domain.
|
||||
switch (runtime_info.hs_info.tess_type) {
|
||||
case AmdGpu::TessellationType::Quad:
|
||||
ASSERT(gcn_factor_idx < 6);
|
||||
return IR::PatchFactor(gcn_factor_idx);
|
||||
case AmdGpu::TessellationType::Triangle:
|
||||
ASSERT(gcn_factor_idx < 4);
|
||||
if (gcn_factor_idx == 3) {
|
||||
return IR::Patch::TessellationLodInteriorU;
|
||||
}
|
||||
return IR::PatchFactor(gcn_factor_idx);
|
||||
default:
|
||||
// Point domain types haven't been seen so far
|
||||
UNREACHABLE_MSG("Unhandled tess type");
|
||||
}
|
||||
};
|
||||
|
||||
inst.Invalidate();
|
||||
if (num_dwords == 1) {
|
||||
ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data));
|
||||
break;
|
||||
}
|
||||
auto* inst = data.TryInstRecursive();
|
||||
ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
|
||||
for (s32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
const IR::U32 data{inst.Arg(1).Resolve()};
|
||||
|
||||
const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
|
||||
u32 off_dw) {
|
||||
const IR::F32 data_component = ir.BitCast<IR::F32, IR::U32>(value);
|
||||
|
||||
if (output_kind == AttributeRegion::OutputCP) {
|
||||
if (off_dw > 0) {
|
||||
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||
}
|
||||
u32 stride = runtime_info.hs_info.hs_output_cp_stride;
|
||||
// Invocation ID array index is implicit, handled by SPIRV backend
|
||||
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||
const IR::U32 attr_index = ir.ShiftRightLogical(
|
||||
ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||
const IR::U32 comp_index = ir.ShiftRightLogical(
|
||||
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||
ir.SetTcsGenericAttribute(data_component, attr_index, comp_index);
|
||||
} else {
|
||||
ASSERT(output_kind == AttributeRegion::PatchConst);
|
||||
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
|
||||
fmt::ptr(addr.Inst()));
|
||||
ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component);
|
||||
}
|
||||
};
|
||||
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
if (num_dwords == 1) {
|
||||
SetOutput(addr, data, region, 0);
|
||||
} else {
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i);
|
||||
}
|
||||
}
|
||||
inst.Invalidate();
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::Opcode::LoadSharedU32: {
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||
ASSERT_MSG(region == AttributeRegion::InputCP,
|
||||
"Unhandled read of output or patchconst attribute in hull shader");
|
||||
IR::Value attr_read;
|
||||
if (num_dwords == 1) {
|
||||
attr_read = ir.BitCast<IR::U32>(
|
||||
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
|
||||
} else {
|
||||
boost::container::static_vector<IR::Value, 4> read_components;
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
const IR::F32 component =
|
||||
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
|
||||
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||
}
|
||||
attr_read = ir.CompositeConstruct(read_components);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_info.hs_info.IsPassthrough()) {
|
||||
// Copy input attributes to output attributes, indexed by InvocationID
|
||||
// Passthrough should imply that input and output patches have same number of vertices
|
||||
IR::Block* entry_block = *program.blocks.begin();
|
||||
auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::Prologue;
|
||||
});
|
||||
ASSERT(it != entry_block->end());
|
||||
++it;
|
||||
ASSERT(it != entry_block->end());
|
||||
++it;
|
||||
// Prologue
|
||||
// SetExec #true
|
||||
// <- insert here
|
||||
// ...
|
||||
IR::IREmitter ir{*entry_block, it};
|
||||
|
||||
ASSERT(runtime_info.hs_info.ls_stride % 16 == 0);
|
||||
u32 num_attributes = runtime_info.hs_info.ls_stride / 16;
|
||||
const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId);
|
||||
for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) {
|
||||
for (u32 comp = 0; comp < 4; comp++) {
|
||||
IR::F32 attr_read =
|
||||
ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp));
|
||||
// InvocationId is implicit index for output control point writes
|
||||
ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp));
|
||||
}
|
||||
}
|
||||
// We could wrap the rest of the program in an if stmt
|
||||
// CopyInputAttrsToOutputs(); // psuedocode
|
||||
// if (InvocationId == 0) {
|
||||
// PatchConstFunction();
|
||||
// }
|
||||
// But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally
|
||||
}
|
||||
}
|
||||
|
||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
Info& info = program.info;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32: {
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
|
||||
if (region == AttributeRegion::OutputCP) {
|
||||
return ReadTessInputComponent(
|
||||
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
|
||||
} else {
|
||||
ASSERT(region == AttributeRegion::PatchConst);
|
||||
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
|
||||
}
|
||||
};
|
||||
IR::Value attr_read;
|
||||
if (num_dwords == 1) {
|
||||
attr_read = ir.BitCast<IR::U32>(GetInput(addr, 0));
|
||||
} else {
|
||||
boost::container::static_vector<IR::Value, 4> read_components;
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
const IR::F32 component = GetInput(addr, i);
|
||||
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||
}
|
||||
attr_read = ir.CompositeConstruct(read_components);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run before either hull or domain transform
|
||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
TessellationDataConstantBuffer tess_constants;
|
||||
Shader::Info& info = program.info;
|
||||
// Find the TessellationDataConstantBuffer V#
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
auto found_tess_consts_sharp = [&]() -> bool {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
IR::Value addr = inst.Arg(0);
|
||||
auto read_const_buffer = IR::BreadthFirstSearch(
|
||||
addr, [](IR::Inst* maybe_tess_const) -> std::optional<IR::Inst*> {
|
||||
if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return maybe_tess_const;
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
if (read_const_buffer) {
|
||||
auto sharp_location = FindTessConstantSharp(read_const_buffer.value());
|
||||
if (sharp_location) {
|
||||
if (info.tess_consts_dword_offset >= 0) {
|
||||
// Its possible theres a readconstbuffer that contributes to an
|
||||
// LDS address and isnt a TessConstant V# read. Could improve on
|
||||
// this somehow
|
||||
ASSERT_MSG(static_cast<s32>(sharp_location->dword_off) ==
|
||||
info.tess_consts_dword_offset &&
|
||||
sharp_location->ptr_base ==
|
||||
info.tess_consts_ptr_base,
|
||||
"TessConstants V# is ambiguous");
|
||||
}
|
||||
InitTessConstants(sharp_location->ptr_base,
|
||||
static_cast<s32>(sharp_location->dword_off), info,
|
||||
runtime_info, tess_constants);
|
||||
return true;
|
||||
}
|
||||
UNREACHABLE_MSG("Failed to match tess constant sharp");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}();
|
||||
|
||||
if (found_tess_consts_sharp) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(info.tess_consts_dword_offset >= 0);
|
||||
|
||||
TessConstantUseWalker walker;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
auto sharp_location = FindTessConstantSharp(&inst);
|
||||
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
|
||||
sharp_location->dword_off == info.tess_consts_dword_offset) {
|
||||
// The shader is reading from the TessConstants V#
|
||||
IR::Value index = inst.Arg(1);
|
||||
|
||||
ASSERT_MSG(index.IsImmediate(),
|
||||
"Tessellation constant read with dynamic index");
|
||||
u32 off_dw = index.U32();
|
||||
ASSERT(off_dw <=
|
||||
static_cast<u32>(TessConstantAttribute::FirstEdgeTessFactorIndex));
|
||||
|
||||
auto tess_const_attr = static_cast<TessConstantAttribute>(off_dw);
|
||||
switch (tess_const_attr) {
|
||||
case TessConstantAttribute::LsStride:
|
||||
// If not, we may need to make this runtime state for TES
|
||||
ASSERT(info.l_stage == LogicalStage::TessellationControl);
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride));
|
||||
break;
|
||||
case TessConstantAttribute::HsCpStride:
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride));
|
||||
break;
|
||||
case TessConstantAttribute::HsNumPatch:
|
||||
case TessConstantAttribute::HsOutputBase:
|
||||
case TessConstantAttribute::PatchConstBase:
|
||||
walker.MarkTessAttributeUsers(&inst, tess_const_attr);
|
||||
// We should be able to safely set these to 0 so that indexing happens only
|
||||
// within the local patch in the recompiled Vulkan shader. This assumes
|
||||
// these values only contribute to address calculations for in/out
|
||||
// attributes in the original gcn shader.
|
||||
// See the explanation for why we set V2 to 0 when emitting the prologue.
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||
break;
|
||||
case Shader::TessConstantAttribute::PatchConstSize:
|
||||
case Shader::TessConstantAttribute::PatchOutputSize:
|
||||
case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold:
|
||||
case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex:
|
||||
// May need to replace PatchConstSize and PatchOutputSize with 0
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Read past end of TessConstantsBuffer");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// These pattern matching are neccessary for now unless we support dynamic indexing of
|
||||
// PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single
|
||||
// vec4 array like in/out attrs. Not sure about tess factors.
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
// Replace the BFEs on V1 (packed with patch id within VGT and output cp id)
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) {
|
||||
IR::Inst& inst = *it;
|
||||
if (M_BITFIELDUEXTRACT(
|
||||
M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
|
||||
MatchIgnore()),
|
||||
MatchU32(0), MatchU32(8))
|
||||
.Match(IR::Value{&inst})) {
|
||||
IR::IREmitter emit(*block, it);
|
||||
// This is the patch id within the VGT, not the actual PrimitiveId
|
||||
// in the draw
|
||||
IR::Value replacement(0u);
|
||||
inst.ReplaceUsesWithAndRemove(replacement);
|
||||
} else if (M_BITFIELDUEXTRACT(
|
||||
M_GETATTRIBUTEU32(
|
||||
MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
|
||||
MatchIgnore()),
|
||||
MatchU32(8), MatchU32(5))
|
||||
.Match(IR::Value{&inst})) {
|
||||
IR::IREmitter ir(*block, it);
|
||||
IR::Value replacement;
|
||||
if (runtime_info.hs_info.IsPassthrough()) {
|
||||
// Deal with annoying pattern in BB where InvocationID use makes no
|
||||
// sense (in addr calculation for patchconst or tess factor write)
|
||||
replacement = ir.Imm32(0);
|
||||
} else {
|
||||
replacement = ir.GetAttributeU32(IR::Attribute::InvocationId);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(replacement);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -18,5 +18,8 @@ void CollectShaderInfoPass(IR::Program& program);
|
|||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage);
|
||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
|
@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||
};
|
||||
|
||||
switch (stage) {
|
||||
case Stage::Local: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU32: {
|
||||
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
|
||||
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
|
||||
|
||||
u32 offset = 0;
|
||||
const auto* addr = inst.Arg(0).InstRecursive();
|
||||
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
ASSERT(addr->Arg(1).IsImmediate());
|
||||
offset = addr->Arg(1).U32();
|
||||
}
|
||||
IR::Value data = inst.Arg(1).Resolve();
|
||||
for (s32 i = 0; i < num_components; i++) {
|
||||
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
||||
const auto comp = (offset / 4) % 4;
|
||||
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
|
||||
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
||||
offset += 4;
|
||||
}
|
||||
inst.Invalidate();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
case Stage::Export: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::LoadBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
||||
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
|
||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||
const auto bucket = offset.Resolve().U32() / 256u;
|
||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||
|
@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||
break;
|
||||
}
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto buffer_info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!buffer_info.system_coherent || !buffer_info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::GetUserData:
|
||||
info.ud_mask.Set(inst.Arg(0).ScalarReg());
|
||||
break;
|
||||
case IR::Opcode::SetPatch: {
|
||||
const auto patch = inst.Arg(0).Patch();
|
||||
if (patch <= IR::Patch::TessellationLodBottom) {
|
||||
info.stores_tess_level_outer = true;
|
||||
} else if (patch <= IR::Patch::TessellationLodInteriorV) {
|
||||
info.stores_tess_level_inner = true;
|
||||
} else {
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetPatch: {
|
||||
const auto patch = inst.Arg(0).Patch();
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
|
|
28
src/shader_recompiler/ir/patch.cpp
Normal file
28
src/shader_recompiler/ir/patch.cpp
Normal file
|
@ -0,0 +1,28 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
std::string NameOf(Patch patch) {
|
||||
switch (patch) {
|
||||
case Patch::TessellationLodLeft:
|
||||
return "TessellationLodLeft";
|
||||
case Patch::TessellationLodTop:
|
||||
return "TessellationLodTop";
|
||||
case Patch::TessellationLodRight:
|
||||
return "TessellationLodRight";
|
||||
case Patch::TessellationLodBottom:
|
||||
return "TessellationLodBottom";
|
||||
case Patch::TessellationLodInteriorU:
|
||||
return "TessellationLodInteriorU";
|
||||
case Patch::TessellationLodInteriorV:
|
||||
return "TessellationLodInteriorV";
|
||||
default:
|
||||
const u32 index = u32(patch) - u32(Patch::Component0);
|
||||
return fmt::format("Component{}", index);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
173
src/shader_recompiler/ir/patch.h
Normal file
173
src/shader_recompiler/ir/patch.h
Normal file
|
@ -0,0 +1,173 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
enum class Patch : u64 {
|
||||
TessellationLodLeft,
|
||||
TessellationLodTop,
|
||||
TessellationLodRight,
|
||||
TessellationLodBottom,
|
||||
TessellationLodInteriorU,
|
||||
TessellationLodInteriorV,
|
||||
Component0,
|
||||
Component1,
|
||||
Component2,
|
||||
Component3,
|
||||
Component4,
|
||||
Component5,
|
||||
Component6,
|
||||
Component7,
|
||||
Component8,
|
||||
Component9,
|
||||
Component10,
|
||||
Component11,
|
||||
Component12,
|
||||
Component13,
|
||||
Component14,
|
||||
Component15,
|
||||
Component16,
|
||||
Component17,
|
||||
Component18,
|
||||
Component19,
|
||||
Component20,
|
||||
Component21,
|
||||
Component22,
|
||||
Component23,
|
||||
Component24,
|
||||
Component25,
|
||||
Component26,
|
||||
Component27,
|
||||
Component28,
|
||||
Component29,
|
||||
Component30,
|
||||
Component31,
|
||||
Component32,
|
||||
Component33,
|
||||
Component34,
|
||||
Component35,
|
||||
Component36,
|
||||
Component37,
|
||||
Component38,
|
||||
Component39,
|
||||
Component40,
|
||||
Component41,
|
||||
Component42,
|
||||
Component43,
|
||||
Component44,
|
||||
Component45,
|
||||
Component46,
|
||||
Component47,
|
||||
Component48,
|
||||
Component49,
|
||||
Component50,
|
||||
Component51,
|
||||
Component52,
|
||||
Component53,
|
||||
Component54,
|
||||
Component55,
|
||||
Component56,
|
||||
Component57,
|
||||
Component58,
|
||||
Component59,
|
||||
Component60,
|
||||
Component61,
|
||||
Component62,
|
||||
Component63,
|
||||
Component64,
|
||||
Component65,
|
||||
Component66,
|
||||
Component67,
|
||||
Component68,
|
||||
Component69,
|
||||
Component70,
|
||||
Component71,
|
||||
Component72,
|
||||
Component73,
|
||||
Component74,
|
||||
Component75,
|
||||
Component76,
|
||||
Component77,
|
||||
Component78,
|
||||
Component79,
|
||||
Component80,
|
||||
Component81,
|
||||
Component82,
|
||||
Component83,
|
||||
Component84,
|
||||
Component85,
|
||||
Component86,
|
||||
Component87,
|
||||
Component88,
|
||||
Component89,
|
||||
Component90,
|
||||
Component91,
|
||||
Component92,
|
||||
Component93,
|
||||
Component94,
|
||||
Component95,
|
||||
Component96,
|
||||
Component97,
|
||||
Component98,
|
||||
Component99,
|
||||
Component100,
|
||||
Component101,
|
||||
Component102,
|
||||
Component103,
|
||||
Component104,
|
||||
Component105,
|
||||
Component106,
|
||||
Component107,
|
||||
Component108,
|
||||
Component109,
|
||||
Component110,
|
||||
Component111,
|
||||
Component112,
|
||||
Component113,
|
||||
Component114,
|
||||
Component115,
|
||||
Component116,
|
||||
Component117,
|
||||
Component118,
|
||||
Component119,
|
||||
};
|
||||
static_assert(static_cast<u64>(Patch::Component119) == 125);
|
||||
|
||||
constexpr bool IsGeneric(Patch patch) noexcept {
|
||||
return patch >= Patch::Component0 && patch <= Patch::Component119;
|
||||
}
|
||||
|
||||
constexpr Patch PatchFactor(u32 index) {
|
||||
return static_cast<Patch>(index);
|
||||
}
|
||||
|
||||
constexpr Patch PatchGeneric(u32 index) {
|
||||
return static_cast<Patch>(static_cast<u32>(Patch::Component0) + index);
|
||||
}
|
||||
|
||||
constexpr u32 GenericPatchIndex(Patch patch) {
|
||||
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
|
||||
}
|
||||
|
||||
constexpr u32 GenericPatchElement(Patch patch) {
|
||||
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string NameOf(Patch patch);
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Shader::IR::Patch> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(const Shader::IR::Patch patch, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch));
|
||||
}
|
||||
};
|
127
src/shader_recompiler/ir/pattern_matching.h
Normal file
127
src/shader_recompiler/ir/pattern_matching.h
Normal file
|
@ -0,0 +1,127 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::Optimiation::PatternMatching {
|
||||
|
||||
// Attempt at pattern matching for Insts and Values
|
||||
// Needs improvement, mostly a convenience
|
||||
|
||||
template <typename Derived>
|
||||
struct MatchObject {
|
||||
inline bool Match(IR::Value v) {
|
||||
return static_cast<Derived*>(this)->Match(v);
|
||||
}
|
||||
};
|
||||
|
||||
struct MatchValue : MatchObject<MatchValue> {
|
||||
MatchValue(IR::Value& return_val_) : return_val(return_val_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return_val = v;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Value& return_val;
|
||||
};
|
||||
|
||||
struct MatchIgnore : MatchObject<MatchIgnore> {
|
||||
MatchIgnore() {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct MatchImm : MatchObject<MatchImm> {
|
||||
MatchImm(IR::Value& v) : return_val(v) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
if (!v.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return_val = v;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Value& return_val;
|
||||
};
|
||||
|
||||
struct MatchAttribute : MatchObject<MatchAttribute> {
|
||||
MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return v.Type() == IR::Type::Attribute && v.Attribute() == attribute;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Attribute attribute;
|
||||
};
|
||||
|
||||
struct MatchU32 : MatchObject<MatchU32> {
|
||||
MatchU32(u32 imm_) : imm(imm_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 imm;
|
||||
};
|
||||
|
||||
template <IR::Opcode opcode, typename... Args>
|
||||
struct MatchInstObject : MatchObject<MatchInstObject<opcode>> {
|
||||
static_assert(sizeof...(Args) == IR::NumArgsOf(opcode));
|
||||
MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
IR::Inst* inst = v.TryInstRecursive();
|
||||
if (!inst || inst->GetOpcode() != opcode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool matched = true;
|
||||
|
||||
[&]<std::size_t... Is>(std::index_sequence<Is...>) {
|
||||
((matched = matched && std::get<Is>(pattern).Match(inst->Arg(Is))), ...);
|
||||
}(std::make_index_sequence<sizeof...(Args)>{});
|
||||
|
||||
return matched;
|
||||
}
|
||||
|
||||
private:
|
||||
using MatchArgs = std::tuple<Args&...>;
|
||||
MatchArgs pattern;
|
||||
};
|
||||
|
||||
template <IR::Opcode opcode, typename... Args>
|
||||
inline auto MakeInstPattern(Args&&... args) {
|
||||
return MatchInstObject<opcode, Args...>(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// Conveniences. TODO probably simpler way of doing this
|
||||
#define M_READCONST(...) MakeInstPattern<IR::Opcode::ReadConst>(__VA_ARGS__)
|
||||
#define M_GETUSERDATA(...) MakeInstPattern<IR::Opcode::GetUserData>(__VA_ARGS__)
|
||||
#define M_BITFIELDUEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldUExtract>(__VA_ARGS__)
|
||||
#define M_BITFIELDSEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldSExtract>(__VA_ARGS__)
|
||||
#define M_GETATTRIBUTEU32(...) MakeInstPattern<IR::Opcode::GetAttributeU32>(__VA_ARGS__)
|
||||
#define M_UMOD32(...) MakeInstPattern<IR::Opcode::UMod32>(__VA_ARGS__)
|
||||
#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern<IR::Opcode::ShiftRightLogical32>(__VA_ARGS__)
|
||||
#define M_IADD32(...) MakeInstPattern<IR::Opcode::IAdd32>(__VA_ARGS__)
|
||||
#define M_IMUL32(...) MakeInstPattern<IR::Opcode::IMul32>(__VA_ARGS__)
|
||||
#define M_BITWISEAND32(...) MakeInstPattern<IR::Opcode::BitwiseAnd32>(__VA_ARGS__)
|
||||
#define M_GETTESSGENERICATTRIBUTE(...) \
|
||||
MakeInstPattern<IR::Opcode::GetTessGenericAttribute>(__VA_ARGS__)
|
||||
#define M_SETTCSGENERICATTRIBUTE(...) \
|
||||
MakeInstPattern<IR::Opcode::SetTcsGenericAttribute>(__VA_ARGS__)
|
||||
#define M_COMPOSITECONSTRUCTU32X2(...) \
|
||||
MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(__VA_ARGS__)
|
||||
#define M_COMPOSITECONSTRUCTU32X4(...) \
|
||||
MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(__VA_ARGS__)
|
||||
|
||||
} // namespace Shader::Optimiation::PatternMatching
|
|
@ -49,7 +49,8 @@ union BufferInstInfo {
|
|||
BitField<0, 1, u32> index_enable;
|
||||
BitField<1, 1, u32> offset_enable;
|
||||
BitField<2, 12, u32> inst_offset;
|
||||
BitField<14, 1, u32> ring_access; // global + system coherency
|
||||
BitField<14, 1, u32> system_coherent;
|
||||
BitField<15, 1, u32> globally_coherent;
|
||||
};
|
||||
|
||||
enum class ScalarReg : u32 {
|
||||
|
|
|
@ -15,7 +15,7 @@ enum class Type {
|
|||
ScalarReg = 1 << 1,
|
||||
VectorReg = 1 << 2,
|
||||
Attribute = 1 << 3,
|
||||
SystemValue = 1 << 4,
|
||||
Patch = 1 << 4,
|
||||
U1 = 1 << 5,
|
||||
U8 = 1 << 6,
|
||||
U16 = 1 << 7,
|
||||
|
|
|
@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
|
|||
|
||||
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
|
||||
|
||||
Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {}
|
||||
|
||||
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
|
||||
|
||||
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
|
||||
|
@ -34,6 +35,7 @@ public:
|
|||
explicit Value(IR::ScalarReg reg) noexcept;
|
||||
explicit Value(IR::VectorReg reg) noexcept;
|
||||
explicit Value(IR::Attribute value) noexcept;
|
||||
explicit Value(IR::Patch patch) noexcept;
|
||||
explicit Value(bool value) noexcept;
|
||||
explicit Value(u8 value) noexcept;
|
||||
explicit Value(u16 value) noexcept;
|
||||
|
@ -56,6 +58,7 @@ public:
|
|||
[[nodiscard]] IR::ScalarReg ScalarReg() const;
|
||||
[[nodiscard]] IR::VectorReg VectorReg() const;
|
||||
[[nodiscard]] IR::Attribute Attribute() const;
|
||||
[[nodiscard]] IR::Patch Patch() const;
|
||||
[[nodiscard]] bool U1() const;
|
||||
[[nodiscard]] u8 U8() const;
|
||||
[[nodiscard]] u16 U16() const;
|
||||
|
@ -75,6 +78,7 @@ private:
|
|||
IR::ScalarReg sreg;
|
||||
IR::VectorReg vreg;
|
||||
IR::Attribute attribute;
|
||||
IR::Patch patch;
|
||||
bool imm_u1;
|
||||
u8 imm_u8;
|
||||
u16 imm_u16;
|
||||
|
@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const {
|
|||
return attribute;
|
||||
}
|
||||
|
||||
inline IR::Patch Value::Patch() const {
|
||||
DEBUG_ASSERT(type == Type::Patch);
|
||||
return patch;
|
||||
}
|
||||
|
||||
inline bool Value::U1() const {
|
||||
if (IsIdentity()) {
|
||||
return inst->Arg(0).U1();
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/frontend/control_flow_graph.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/frontend/structured_control_flow.h"
|
||||
|
@ -29,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
|||
}
|
||||
|
||||
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
// Ensure first instruction is expected.
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
if (code[0] != token_mov_vcchi) {
|
||||
|
@ -60,12 +63,29 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
||||
// Run optimization passes
|
||||
const auto stage = program.info.stage;
|
||||
|
||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
// Tess passes require previous const prop passes for now (for simplicity). TODO allow
|
||||
// fine grained folding or opportunistic folding we set an operand to an immediate
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::TessellationPreprocess(program, runtime_info);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::HullShaderTransform(program, runtime_info);
|
||||
} else if (info.l_stage == LogicalStage::TessellationEval) {
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::TessellationPreprocess(program, runtime_info);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::DomainShaderTransform(program, runtime_info);
|
||||
}
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
if (program.info.stage != Stage::Compute) {
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
|
||||
if (stage != Stage::Compute) {
|
||||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
||||
}
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
||||
Shader::Optimization::ResourceTrackingPass(program);
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
|
|
|
@ -28,6 +28,6 @@ struct Pools {
|
|||
};
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile);
|
||||
RuntimeInfo& runtime_info, const Profile& profile);
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <span>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/frontend/tessellation.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
|
@ -21,12 +22,31 @@ enum class Stage : u32 {
|
|||
Local,
|
||||
Compute,
|
||||
};
|
||||
constexpr u32 MaxStageTypes = 7;
|
||||
|
||||
// Vertex intentionally comes after TCS/TES due to order of compilation
|
||||
enum class LogicalStage : u32 {
|
||||
Fragment,
|
||||
TessellationControl,
|
||||
TessellationEval,
|
||||
Vertex,
|
||||
Geometry,
|
||||
Compute,
|
||||
NumLogicalStages
|
||||
};
|
||||
|
||||
constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
|
||||
|
||||
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
|
||||
return static_cast<Stage>(index);
|
||||
}
|
||||
|
||||
struct LocalRuntimeInfo {
|
||||
u32 ls_stride;
|
||||
bool links_with_tcs;
|
||||
|
||||
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
struct ExportRuntimeInfo {
|
||||
u32 vertex_data_size;
|
||||
|
||||
|
@ -64,9 +84,57 @@ struct VertexRuntimeInfo {
|
|||
u32 num_outputs;
|
||||
std::array<VsOutputMap, 3> outputs;
|
||||
bool emulate_depth_negative_one_to_one{};
|
||||
// Domain
|
||||
AmdGpu::TessellationType tess_type;
|
||||
AmdGpu::TessellationTopology tess_topology;
|
||||
AmdGpu::TessellationPartitioning tess_partitioning;
|
||||
u32 hs_output_cp_stride{};
|
||||
|
||||
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one;
|
||||
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
|
||||
tess_type == other.tess_type && tess_topology == other.tess_topology &&
|
||||
tess_partitioning == other.tess_partitioning &&
|
||||
hs_output_cp_stride == other.hs_output_cp_stride;
|
||||
}
|
||||
|
||||
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||
hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||
}
|
||||
};
|
||||
|
||||
struct HullRuntimeInfo {
|
||||
// from registers
|
||||
u32 num_input_control_points;
|
||||
u32 num_threads;
|
||||
AmdGpu::TessellationType tess_type;
|
||||
|
||||
// from tess constants buffer
|
||||
u32 ls_stride;
|
||||
u32 hs_output_cp_stride;
|
||||
u32 hs_output_base;
|
||||
|
||||
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
|
||||
|
||||
// It might be possible for a non-passthrough TCS to have these conditions, in some
|
||||
// dumb situation.
|
||||
// In that case, it should be fine to assume passthrough and declare some extra
|
||||
// output control points and attributes that shouldnt be read by the TES anyways
|
||||
bool IsPassthrough() const {
|
||||
return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1;
|
||||
};
|
||||
|
||||
// regs.ls_hs_config.hs_output_control_points contains the number of threads, which
|
||||
// isn't exactly the number of output control points.
|
||||
// For passthrough shaders, the register field is set to 1, so use the number of
|
||||
// input control points
|
||||
u32 NumOutputControlPoints() const {
|
||||
return IsPassthrough() ? num_input_control_points : num_threads;
|
||||
}
|
||||
|
||||
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||
ls_stride = tess_constants.ls_stride;
|
||||
hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||
hs_output_base = tess_constants.hs_output_base;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -150,8 +218,10 @@ struct RuntimeInfo {
|
|||
AmdGpu::FpDenormMode fp_denorm_mode32;
|
||||
AmdGpu::FpRoundMode fp_round_mode32;
|
||||
union {
|
||||
LocalRuntimeInfo ls_info;
|
||||
ExportRuntimeInfo es_info;
|
||||
VertexRuntimeInfo vs_info;
|
||||
HullRuntimeInfo hs_info;
|
||||
GeometryRuntimeInfo gs_info;
|
||||
FragmentRuntimeInfo fs_info;
|
||||
ComputeRuntimeInfo cs_info;
|
||||
|
@ -174,6 +244,10 @@ struct RuntimeInfo {
|
|||
return es_info == other.es_info;
|
||||
case Stage::Geometry:
|
||||
return gs_info == other.gs_info;
|
||||
case Stage::Hull:
|
||||
return hs_info == other.hs_info;
|
||||
case Stage::Local:
|
||||
return ls_info == other.ls_info;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -127,6 +127,18 @@ struct StageSpecialization {
|
|||
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
|
||||
spec.force_unnormalized = sharp.force_unnormalized;
|
||||
});
|
||||
|
||||
// Initialize runtime_info fields that rely on analysis in tessellation passes
|
||||
if (info->l_stage == LogicalStage::TessellationControl ||
|
||||
info->l_stage == LogicalStage::TessellationEval) {
|
||||
Shader::TessellationDataConstantBuffer tess_constants;
|
||||
info->ReadTessConstantBuffer(tess_constants);
|
||||
if (info->l_stage == LogicalStage::TessellationControl) {
|
||||
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||
} else {
|
||||
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||
|
|
|
@ -143,6 +143,13 @@ struct Liverpool {
|
|||
}
|
||||
};
|
||||
|
||||
struct HsTessFactorClamp {
|
||||
// I've only seen min=0.0, max=1.0 so far.
|
||||
// TODO why is max set to 1.0? Makes no sense
|
||||
float hs_max_tess;
|
||||
float hs_min_tess;
|
||||
};
|
||||
|
||||
struct ComputeProgram {
|
||||
u32 dispatch_initiator;
|
||||
u32 dim_x;
|
||||
|
@ -956,6 +963,7 @@ struct Liverpool {
|
|||
enum VgtStages : u32 {
|
||||
Vs = 0u, // always enabled
|
||||
EsGs = 0xB0u,
|
||||
LsHs = 0x45u,
|
||||
};
|
||||
|
||||
VgtStages raw;
|
||||
|
@ -963,7 +971,8 @@ struct Liverpool {
|
|||
BitField<2, 1, u32> hs_en;
|
||||
BitField<3, 2, u32> es_en;
|
||||
BitField<5, 1, u32> gs_en;
|
||||
BitField<6, 1, u32> vs_en;
|
||||
BitField<6, 2, u32> vs_en;
|
||||
BitField<8, 1, u32> dynamic_hs;
|
||||
|
||||
bool IsStageEnabled(u32 stage) const {
|
||||
switch (stage) {
|
||||
|
@ -1059,6 +1068,28 @@ struct Liverpool {
|
|||
};
|
||||
};
|
||||
|
||||
union LsHsConfig {
|
||||
u32 raw;
|
||||
BitField<0, 8, u32> num_patches;
|
||||
BitField<8, 6, u32> hs_input_control_points;
|
||||
BitField<14, 6, u32> hs_output_control_points;
|
||||
};
|
||||
|
||||
union TessellationConfig {
|
||||
u32 raw;
|
||||
BitField<0, 2, TessellationType> type;
|
||||
BitField<2, 3, TessellationPartitioning> partitioning;
|
||||
BitField<5, 3, TessellationTopology> topology;
|
||||
};
|
||||
|
||||
union TessFactorMemoryBase {
|
||||
u32 base;
|
||||
|
||||
u64 MemoryBase() const {
|
||||
return static_cast<u64>(base) << 8;
|
||||
}
|
||||
};
|
||||
|
||||
union Eqaa {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> max_anchor_samples;
|
||||
|
@ -1109,7 +1140,7 @@ struct Liverpool {
|
|||
ShaderProgram es_program;
|
||||
INSERT_PADDING_WORDS(0x2C);
|
||||
ShaderProgram hs_program;
|
||||
INSERT_PADDING_WORDS(0x2C);
|
||||
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
|
||||
ShaderProgram ls_program;
|
||||
INSERT_PADDING_WORDS(0xA4);
|
||||
ComputeProgram cs_program;
|
||||
|
@ -1176,7 +1207,9 @@ struct Liverpool {
|
|||
PolygonControl polygon_control;
|
||||
ViewportControl viewport_control;
|
||||
VsOutputControl vs_output_control;
|
||||
INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1);
|
||||
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
|
||||
HsTessFactorClamp hs_clamp;
|
||||
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
|
||||
GsMode vgt_gs_mode;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
ModeControl mode_control;
|
||||
|
@ -1200,9 +1233,10 @@ struct Liverpool {
|
|||
BitField<0, 11, u32> vgt_gs_max_vert_out;
|
||||
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
|
||||
ShaderStageEnable stage_enable;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
LsHsConfig ls_hs_config;
|
||||
u32 vgt_gs_vert_itemsize[4];
|
||||
INSERT_PADDING_WORDS(4);
|
||||
TessellationConfig tess_config;
|
||||
INSERT_PADDING_WORDS(3);
|
||||
PolygonOffset poly_offset;
|
||||
GsInstances vgt_gs_instance_cnt;
|
||||
StreamOutConfig vgt_strmout_config;
|
||||
|
@ -1216,6 +1250,8 @@ struct Liverpool {
|
|||
INSERT_PADDING_WORDS(0xC24C - 0xC243);
|
||||
u32 num_indices;
|
||||
VgtNumInstances num_instances;
|
||||
INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1);
|
||||
TessFactorMemoryBase vgt_tf_memory_base;
|
||||
};
|
||||
std::array<u32, NumRegs> reg_array{};
|
||||
|
||||
|
@ -1431,6 +1467,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
|
|||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
|
||||
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
|
||||
|
@ -1445,6 +1482,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
|
|||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
|
||||
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
|
||||
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
|
||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
|
||||
|
@ -1456,6 +1494,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
|
|||
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
|
||||
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
|
||||
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
|
||||
|
||||
#undef GFX6_3D_REG_INDEX
|
||||
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
@ -21,6 +23,69 @@ enum class FpDenormMode : u32 {
|
|||
InOutAllow = 3,
|
||||
};
|
||||
|
||||
enum class TessellationType : u32 {
|
||||
Isoline = 0,
|
||||
Triangle = 1,
|
||||
Quad = 2,
|
||||
};
|
||||
|
||||
constexpr std::string_view NameOf(TessellationType type) {
|
||||
switch (type) {
|
||||
case TessellationType::Isoline:
|
||||
return "Isoline";
|
||||
case TessellationType::Triangle:
|
||||
return "Triangle";
|
||||
case TessellationType::Quad:
|
||||
return "Quad";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
enum class TessellationPartitioning : u32 {
|
||||
Integer = 0,
|
||||
Pow2 = 1,
|
||||
FracOdd = 2,
|
||||
FracEven = 3,
|
||||
};
|
||||
|
||||
constexpr std::string_view NameOf(TessellationPartitioning partitioning) {
|
||||
switch (partitioning) {
|
||||
case TessellationPartitioning::Integer:
|
||||
return "Integer";
|
||||
case TessellationPartitioning::Pow2:
|
||||
return "Pow2";
|
||||
case TessellationPartitioning::FracOdd:
|
||||
return "FracOdd";
|
||||
case TessellationPartitioning::FracEven:
|
||||
return "FracEven";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
enum class TessellationTopology : u32 {
|
||||
Point = 0,
|
||||
Line = 1,
|
||||
TriangleCw = 2,
|
||||
TriangleCcw = 3,
|
||||
};
|
||||
|
||||
constexpr std::string_view NameOf(TessellationTopology topology) {
|
||||
switch (topology) {
|
||||
case TessellationTopology::Point:
|
||||
return "Point";
|
||||
case TessellationTopology::Line:
|
||||
return "Line";
|
||||
case TessellationTopology::TriangleCw:
|
||||
return "TriangleCw";
|
||||
case TessellationTopology::TriangleCcw:
|
||||
return "TriangleCcw";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
|
||||
enum class PrimitiveType : u32 {
|
||||
None = 0,
|
||||
|
@ -118,3 +183,33 @@ enum class NumberFormat : u32 {
|
|||
};
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::TessellationType> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(AmdGpu::TessellationType type, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::TessellationPartitioning> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::TessellationTopology> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(AmdGpu::TessellationTopology type, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -16,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
ComputePipelineKey compute_key_, const Shader::Info& info_,
|
||||
vk::ShaderModule module)
|
||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
|
||||
auto& info = stages[int(Shader::Stage::Compute)];
|
||||
auto& info = stages[int(Shader::LogicalStage::Compute)];
|
||||
info = &info_;
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
@ -52,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
const auto& vs_info = GetStage(Shader::Stage::Vertex);
|
||||
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
// Skip attribute binding as the data will be pulled by shader
|
||||
|
@ -106,6 +107,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
key.primitive_restart_index == 0xFFFFFFFF,
|
||||
"Primitive restart index other than -1 is not supported yet");
|
||||
|
||||
const vk::PipelineTessellationStateCreateInfo tessellation_state = {
|
||||
.patchControlPoints = key.patch_control_points,
|
||||
};
|
||||
|
||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
||||
.depthClampEnable = false,
|
||||
.rasterizerDiscardEnable = false,
|
||||
|
@ -204,7 +209,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
|
||||
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
|
||||
shader_stages;
|
||||
auto stage = u32(Shader::Stage::Vertex);
|
||||
auto stage = u32(Shader::LogicalStage::Vertex);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||
|
@ -212,7 +217,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Geometry);
|
||||
stage = u32(Shader::LogicalStage::Geometry);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eGeometry,
|
||||
|
@ -220,7 +225,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Fragment);
|
||||
stage = u32(Shader::LogicalStage::TessellationControl);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eTessellationControl,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::LogicalStage::TessellationEval);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::LogicalStage::Fragment);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eFragment,
|
||||
|
@ -301,6 +322,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pTessellationState =
|
||||
stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr,
|
||||
.pViewportState = &viewport_info,
|
||||
.pRasterizationState = &raster_state,
|
||||
.pMultisampleState = &multisampling,
|
||||
|
@ -327,7 +350,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
if (!stage) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (stage->has_readconst) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
|
|
|
@ -52,6 +52,7 @@ struct GraphicsPipelineKey {
|
|||
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
|
||||
u32 patch_control_points;
|
||||
|
||||
bool operator==(const GraphicsPipelineKey& key) const noexcept {
|
||||
return std::memcmp(this, &key, sizeof(key)) == 0;
|
||||
|
@ -73,7 +74,7 @@ public:
|
|||
|
||||
bool IsEmbeddedVs() const noexcept {
|
||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
|
||||
return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
|
||||
}
|
||||
|
||||
auto GetWriteMasks() const {
|
||||
|
|
|
@ -327,6 +327,7 @@ bool Instance::CreateDevice() {
|
|||
.imageCubeArray = features.imageCubeArray,
|
||||
.independentBlend = features.independentBlend,
|
||||
.geometryShader = features.geometryShader,
|
||||
.tessellationShader = features.tessellationShader,
|
||||
.logicOp = features.logicOp,
|
||||
.depthBiasClamp = features.depthBiasClamp,
|
||||
.fillModeNonSolid = features.fillModeNonSolid,
|
||||
|
@ -378,6 +379,7 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{
|
||||
.extendedDynamicState = true,
|
||||
},
|
||||
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{},
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{
|
||||
.extendedDynamicState3ColorWriteMask = true,
|
||||
},
|
||||
|
|
|
@ -22,6 +22,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::LogicalStage;
|
||||
using Shader::Stage;
|
||||
using Shader::VsOutput;
|
||||
|
||||
constexpr static std::array DescriptorHeapSizes = {
|
||||
|
@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
|||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
|
||||
auto info = Shader::RuntimeInfo{stage};
|
||||
const auto& regs = liverpool->regs;
|
||||
const auto BuildCommon = [&](const auto& program) {
|
||||
|
@ -89,20 +91,47 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||
info.fp_round_mode32 = program.settings.fp_round_mode32;
|
||||
};
|
||||
switch (stage) {
|
||||
case Shader::Stage::Export: {
|
||||
case Stage::Local: {
|
||||
BuildCommon(regs.ls_program);
|
||||
if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
|
||||
info.ls_info.links_with_tcs = true;
|
||||
Shader::TessellationDataConstantBuffer tess_constants;
|
||||
const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
|
||||
const auto params = Liverpool::GetParams(*pgm);
|
||||
const auto& hull_info = program_cache.at(params.hash)->info;
|
||||
hull_info.ReadTessConstantBuffer(tess_constants);
|
||||
info.ls_info.ls_stride = tess_constants.ls_stride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Hull: {
|
||||
BuildCommon(regs.hs_program);
|
||||
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
||||
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
|
||||
info.hs_info.tess_type = regs.tess_config.type;
|
||||
|
||||
// We need to initialize most hs_info fields after finding the V# with tess constants
|
||||
break;
|
||||
}
|
||||
case Stage::Export: {
|
||||
BuildCommon(regs.es_program);
|
||||
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Vertex: {
|
||||
case Stage::Vertex: {
|
||||
BuildCommon(regs.vs_program);
|
||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||
info.vs_info.emulate_depth_negative_one_to_one =
|
||||
!instance.IsDepthClipControlSupported() &&
|
||||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||
if (l_stage == LogicalStage::TessellationEval) {
|
||||
info.vs_info.tess_type = regs.tess_config.type;
|
||||
info.vs_info.tess_topology = regs.tess_config.topology;
|
||||
info.vs_info.tess_partitioning = regs.tess_config.partitioning;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Geometry: {
|
||||
case Stage::Geometry: {
|
||||
BuildCommon(regs.gs_program);
|
||||
auto& gs_info = info.gs_info;
|
||||
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
||||
|
@ -121,7 +150,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
case Stage::Fragment: {
|
||||
BuildCommon(regs.ps_program);
|
||||
info.fs_info.en_flags = regs.ps_input_ena;
|
||||
info.fs_info.addr_flags = regs.ps_input_addr;
|
||||
|
@ -143,7 +172,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
case Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
|
||||
|
@ -277,6 +306,11 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
|
||||
key.vertex_buffer_formats.fill(vk::Format::eUndefined);
|
||||
|
||||
key.patch_control_points = 0;
|
||||
if (regs.stage_enable.hs_en.Value()) {
|
||||
key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
||||
}
|
||||
|
||||
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
|
||||
// recompiler.
|
||||
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
|
@ -305,7 +339,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
fetch_shader = std::nullopt;
|
||||
|
||||
Shader::Backend::Bindings binding{};
|
||||
const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
|
||||
const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
|
||||
const auto stage_in_idx = static_cast<u32>(stage_in);
|
||||
const auto stage_out_idx = static_cast<u32>(stage_out);
|
||||
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
|
||||
|
@ -332,23 +366,23 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
auto params = Liverpool::GetParams(*pgm);
|
||||
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
|
||||
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
|
||||
key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
|
||||
key.stage_hashes[stage_out_idx]) =
|
||||
GetProgram(stage_in, stage_out, params, binding);
|
||||
if (fetch_shader_) {
|
||||
fetch_shader = fetch_shader_;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
|
||||
|
||||
const auto& IsGsFeaturesSupported = [&]() -> bool {
|
||||
// These checks are temporary until all functionality is implemented.
|
||||
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
|
||||
};
|
||||
|
||||
TryBindStage(Shader::Stage::Fragment);
|
||||
infos.fill(nullptr);
|
||||
TryBindStage(Stage::Fragment, LogicalStage::Fragment);
|
||||
|
||||
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
|
||||
const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
|
||||
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
||||
|
||||
switch (regs.stage_enable.raw) {
|
||||
|
@ -356,22 +390,36 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
|
||||
if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Shader::Stage::Geometry)) {
|
||||
if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Liverpool::ShaderStageEnable::VgtStages::LsHs: {
|
||||
if (!instance.IsTessellationSupported()) {
|
||||
break;
|
||||
}
|
||||
if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
TryBindStage(Shader::Stage::Vertex);
|
||||
infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
|
||||
TryBindStage(Stage::Vertex, LogicalStage::Vertex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
|
||||
const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
|
||||
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
u32 vertex_binding = 0;
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
|
@ -424,19 +472,18 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.num_samples = num_samples;
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace Vulkan
|
||||
|
||||
bool PipelineCache::RefreshComputeKey() {
|
||||
Shader::Backend::Bindings binding{};
|
||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
||||
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
|
||||
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
||||
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
|
||||
return true;
|
||||
}
|
||||
|
||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
||||
const Shader::RuntimeInfo& runtime_info,
|
||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||
std::span<const u32> code, size_t perm_idx,
|
||||
Shader::Backend::Bindings& binding) {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||
|
@ -461,19 +508,19 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
|||
const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||
if (Config::collectShadersForDebug()) {
|
||||
DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span<const u32>{},
|
||||
is_patched);
|
||||
DebugState.CollectShader(name, info.l_stage, module, spv, code,
|
||||
patch ? *patch : std::span<const u32>{}, is_patched);
|
||||
}
|
||||
return module;
|
||||
}
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
|
||||
PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding) {
|
||||
const auto runtime_info = BuildRuntimeInfo(stage);
|
||||
PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
|
||||
Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding) {
|
||||
auto runtime_info = BuildRuntimeInfo(stage, l_stage);
|
||||
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
||||
if (new_program) {
|
||||
it_pgm.value() = std::make_unique<Program>(stage, params);
|
||||
it_pgm.value() = std::make_unique<Program>(stage, l_stage, params);
|
||||
auto& program = it_pgm.value();
|
||||
auto start = binding;
|
||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||
|
@ -492,7 +539,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
|||
|
||||
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
||||
if (it == program->modules.end()) {
|
||||
auto new_info = Shader::Info(stage, params);
|
||||
auto new_info = Shader::Info(stage, l_stage, params);
|
||||
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
|
||||
program->AddPermut(module, std::move(spec));
|
||||
} else {
|
||||
|
|
|
@ -34,11 +34,13 @@ struct Program {
|
|||
vk::ShaderModule module;
|
||||
Shader::StageSpecialization spec;
|
||||
};
|
||||
using ModuleList = boost::container::small_vector<Module, 8>;
|
||||
|
||||
Shader::Info info;
|
||||
boost::container::small_vector<Module, 8> modules;
|
||||
ModuleList modules;
|
||||
|
||||
explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {}
|
||||
explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params)
|
||||
: info{stage, l_stage, params} {}
|
||||
|
||||
void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) {
|
||||
modules.emplace_back(module, std::move(spec));
|
||||
|
@ -55,10 +57,10 @@ public:
|
|||
|
||||
const ComputePipeline* GetComputePipeline();
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
|
||||
u64>
|
||||
GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding);
|
||||
using Result = std::tuple<const Shader::Info*, vk::ShaderModule,
|
||||
std::optional<Shader::Gcn::FetchShaderData>, u64>;
|
||||
Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage,
|
||||
Shader::ShaderParams params, Shader::Backend::Bindings& binding);
|
||||
|
||||
std::optional<vk::ShaderModule> ReplaceShader(vk::ShaderModule module,
|
||||
std::span<const u32> spv_code);
|
||||
|
@ -71,10 +73,10 @@ private:
|
|||
std::string_view ext);
|
||||
std::optional<std::vector<u32>> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx,
|
||||
std::string_view ext);
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||
std::span<const u32> code, size_t perm_idx,
|
||||
Shader::Backend::Bindings& binding);
|
||||
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage);
|
||||
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
|
|
@ -14,9 +14,10 @@ class BufferCache;
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex |
|
||||
vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eFragment;
|
||||
static constexpr auto gp_stage_flags =
|
||||
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eTessellationControl |
|
||||
vk::ShaderStageFlagBits::eTessellationEvaluation | vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eFragment;
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
|
@ -37,6 +38,7 @@ public:
|
|||
}
|
||||
|
||||
auto GetStages() const {
|
||||
static_assert(static_cast<u32>(Shader::LogicalStage::Compute) == Shader::MaxStageTypes - 1);
|
||||
if (is_compute) {
|
||||
return std::span{stages.cend() - 1, stages.cend()};
|
||||
} else {
|
||||
|
@ -44,7 +46,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
|
||||
const Shader::Info& GetStage(Shader::LogicalStage stage) const noexcept {
|
||||
return *stages[u32(stage)];
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "core/memory.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
|
@ -48,10 +49,6 @@ void Rasterizer::CpSync() {
|
|||
|
||||
bool Rasterizer::FilterDraw() {
|
||||
const auto& regs = liverpool->regs;
|
||||
// Tessellation is unsupported so skip the draw to avoid locking up the driver.
|
||||
if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) {
|
||||
return false;
|
||||
}
|
||||
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
|
||||
// actual draw hence can skip pipeline creation.
|
||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
|
||||
|
@ -214,7 +211,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
return;
|
||||
}
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
||||
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
|
||||
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
|
||||
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
|
||||
|
@ -271,7 +268,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
|||
return;
|
||||
}
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
||||
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
|
||||
const auto& fetch_shader = pipeline->GetFetchShader();
|
||||
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
|
||||
buffer_cache.BindIndexBuffer(is_indexed, 0);
|
||||
|
@ -326,7 +323,7 @@ void Rasterizer::DispatchDirect() {
|
|||
return;
|
||||
}
|
||||
|
||||
const auto& cs = pipeline->GetStage(Shader::Stage::Compute);
|
||||
const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||
if (ExecuteShaderHLE(cs, liverpool->regs, *this)) {
|
||||
return;
|
||||
}
|
||||
|
@ -387,7 +384,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
|||
const auto& regs = liverpool->regs;
|
||||
|
||||
if (pipeline->IsCompute()) {
|
||||
const auto& info = pipeline->GetStage(Shader::Stage::Compute);
|
||||
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
|
|
Loading…
Reference in a new issue