mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-01 04:35:59 +00:00
Initial support of Geometry shaders (#1244)
* video_core: initial GS support * fix for components mapping; missing prim type
This commit is contained in:
parent
04b0dc9a23
commit
fbc52610ba
|
@ -518,6 +518,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/frontend/translate/data_share.cpp
|
||||
src/shader_recompiler/frontend/translate/export.cpp
|
||||
src/shader_recompiler/frontend/translate/scalar_alu.cpp
|
||||
src/shader_recompiler/frontend/translate/scalar_flow.cpp
|
||||
src/shader_recompiler/frontend/translate/scalar_memory.cpp
|
||||
src/shader_recompiler/frontend/translate/translate.cpp
|
||||
src/shader_recompiler/frontend/translate/translate.h
|
||||
|
@ -526,6 +527,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/frontend/translate/vector_memory.cpp
|
||||
src/shader_recompiler/frontend/control_flow_graph.cpp
|
||||
src/shader_recompiler/frontend/control_flow_graph.h
|
||||
src/shader_recompiler/frontend/copy_shader.cpp
|
||||
src/shader_recompiler/frontend/copy_shader.h
|
||||
src/shader_recompiler/frontend/decode.cpp
|
||||
src/shader_recompiler/frontend/decode.h
|
||||
src/shader_recompiler/frontend/fetch_shader.cpp
|
||||
|
@ -542,6 +545,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/ir/passes/ir_passes.h
|
||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
||||
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
|
||||
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
||||
src/shader_recompiler/ir/abstract_syntax_list.h
|
||||
|
@ -574,6 +578,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||
src/video_core/amdgpu/pm4_cmds.h
|
||||
src/video_core/amdgpu/pm4_opcodes.h
|
||||
src/video_core/amdgpu/resource.h
|
||||
src/video_core/amdgpu/types.h
|
||||
src/video_core/amdgpu/default_context.cpp
|
||||
src/video_core/buffer_cache/buffer.cpp
|
||||
src/video_core/buffer_cache/buffer.h
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
#include "common/func_traits.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
|
@ -12,10 +13,38 @@
|
|||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
|
||||
static constexpr spv::ExecutionMode GetInputPrimitiveType(AmdGpu::PrimitiveType type) {
|
||||
switch (type) {
|
||||
case AmdGpu::PrimitiveType::PointList:
|
||||
return spv::ExecutionMode::InputPoints;
|
||||
case AmdGpu::PrimitiveType::LineList:
|
||||
return spv::ExecutionMode::InputLines;
|
||||
case AmdGpu::PrimitiveType::TriangleList:
|
||||
case AmdGpu::PrimitiveType::TriangleStrip:
|
||||
return spv::ExecutionMode::Triangles;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr spv::ExecutionMode GetOutputPrimitiveType(AmdGpu::GsOutputPrimitiveType type) {
|
||||
switch (type) {
|
||||
case AmdGpu::GsOutputPrimitiveType::PointList:
|
||||
return spv::ExecutionMode::OutputVertices;
|
||||
case AmdGpu::GsOutputPrimitiveType::LineStrip:
|
||||
return spv::ExecutionMode::OutputLineStrip;
|
||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||
return spv::ExecutionMode::OutputTriangleStrip;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
template <auto func, typename... Args>
|
||||
void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
|
||||
inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
|
||||
|
@ -222,6 +251,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
workgroup_size[1], workgroup_size[2]);
|
||||
break;
|
||||
}
|
||||
case Stage::Export:
|
||||
case Stage::Vertex:
|
||||
execution_model = spv::ExecutionModel::Vertex;
|
||||
break;
|
||||
|
@ -240,6 +270,16 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
execution_model = spv::ExecutionModel::Geometry;
|
||||
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
|
||||
ctx.AddExecutionMode(main,
|
||||
GetOutputPrimitiveType(ctx.runtime_info.gs_info.out_primitive[0]));
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
||||
ctx.runtime_info.gs_info.output_vertices);
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations,
|
||||
ctx.runtime_info.gs_info.num_invocations);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
||||
}
|
||||
|
@ -270,11 +310,20 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
|
|||
EmitContext ctx{profile, runtime_info, program.info, binding};
|
||||
const Id main{DefineMain(ctx, program)};
|
||||
DefineEntryPoint(program, ctx, main);
|
||||
if (program.info.stage == Stage::Vertex) {
|
||||
switch (program.info.stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex:
|
||||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
ctx.AddCapability(spv::Capability::Geometry);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
PatchPhiNodes(program, ctx);
|
||||
binding.user_data += program.info.ud_mask.NumRegs();
|
||||
return ctx.Assemble();
|
||||
}
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& info{ctx.output_params.at(index)};
|
||||
ASSERT(info.num_components > 0);
|
||||
if (info.num_components == 1) {
|
||||
return info.id;
|
||||
} else {
|
||||
|
@ -164,7 +165,30 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
|||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||
}
|
||||
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||
if (ctx.info.stage == Stage::Geometry) {
|
||||
if (IR::IsPosition(attr)) {
|
||||
ASSERT(attr == IR::Attribute::Position0);
|
||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index),
|
||||
ctx.ConstU32(0u))};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
}
|
||||
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto param = ctx.input_params.at(param_id).id;
|
||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
|
||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& param{ctx.input_params.at(index)};
|
||||
|
@ -232,6 +256,9 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
case IR::Attribute::IsFrontFace:
|
||||
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Attribute::PrimitiveId:
|
||||
ASSERT(ctx.info.stage == Stage::Geometry);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
}
|
||||
|
|
|
@ -27,7 +27,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
|
|||
void EmitReference(EmitContext&);
|
||||
void EmitPhiMove(EmitContext&);
|
||||
void EmitJoin(EmitContext& ctx);
|
||||
void EmitBarrier(EmitContext& ctx);
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||
void EmitGetScc(EmitContext& ctx);
|
||||
|
@ -85,7 +84,7 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
|||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
|
||||
|
@ -409,4 +408,7 @@ Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
|
|||
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
|
||||
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);
|
||||
|
||||
void EmitEmitVertex(EmitContext& ctx);
|
||||
void EmitEmitPrimitive(EmitContext& ctx);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -41,6 +41,14 @@ void EmitDiscardCond(EmitContext& ctx, Id condition) {
|
|||
ctx.AddLabel(merge_label);
|
||||
}
|
||||
|
||||
void EmitEmitVertex(EmitContext& ctx) {
|
||||
ctx.OpEmitVertex();
|
||||
}
|
||||
|
||||
void EmitEmitPrimitive(EmitContext& ctx) {
|
||||
ctx.OpEndPrimitive();
|
||||
}
|
||||
|
||||
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
|
||||
throw NotImplementedException("Geometry streams");
|
||||
}
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <fmt/format.h>
|
||||
|
@ -32,6 +34,19 @@ std::string_view StageName(Stage stage) {
|
|||
throw InvalidArgument("Invalid stage {}", u32(stage));
|
||||
}
|
||||
|
||||
static constexpr u32 NumVertices(AmdGpu::GsOutputPrimitiveType type) {
|
||||
switch (type) {
|
||||
case AmdGpu::GsOutputPrimitiveType::PointList:
|
||||
return 1u;
|
||||
case AmdGpu::GsOutputPrimitiveType::LineStrip:
|
||||
return 2u;
|
||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||
return 3u;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) {
|
||||
ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage),
|
||||
|
@ -222,6 +237,7 @@ void EmitContext::DefineInputs() {
|
|||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
switch (stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
|
@ -290,6 +306,38 @@ void EmitContext::DefineInputs() {
|
|||
local_invocation_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||
break;
|
||||
case Stage::Geometry: {
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
const auto gl_per_vertex =
|
||||
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
|
||||
"gl_PerVertex");
|
||||
MemberName(gl_per_vertex, 0, "gl_Position");
|
||||
MemberName(gl_per_vertex, 1, "gl_PointSize");
|
||||
MemberName(gl_per_vertex, 2, "gl_ClipDistance");
|
||||
MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn,
|
||||
static_cast<std::uint32_t>(spv::BuiltIn::Position));
|
||||
MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn,
|
||||
static_cast<std::uint32_t>(spv::BuiltIn::PointSize));
|
||||
MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn,
|
||||
static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance));
|
||||
Decorate(gl_per_vertex, spv::Decoration::Block);
|
||||
const auto vertices_in =
|
||||
TypeArray(gl_per_vertex, ConstU32(NumVertices(runtime_info.gs_info.out_primitive[0])));
|
||||
gl_in = Name(DefineVar(vertices_in, spv::StorageClass::Input), "gl_in");
|
||||
interfaces.push_back(gl_in);
|
||||
|
||||
const auto num_params = runtime_info.gs_info.in_vertex_data_size / 4 - 1u;
|
||||
for (int param_id = 0; param_id < num_params; ++param_id) {
|
||||
const IR::Attribute param{IR::Attribute::Param0 + param_id};
|
||||
const Id type{
|
||||
TypeArray(F32[4], ConstU32(NumVertices(runtime_info.gs_info.out_primitive[0])))};
|
||||
const Id id{DefineInput(type, param_id)};
|
||||
Name(id, fmt::format("in_attr{}", param_id));
|
||||
input_params[param_id] = {id, input_f32, F32[1], 4};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -297,6 +345,7 @@ void EmitContext::DefineInputs() {
|
|||
|
||||
void EmitContext::DefineOutputs() {
|
||||
switch (stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||
|
@ -338,6 +387,18 @@ void EmitContext::DefineOutputs() {
|
|||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
|
||||
for (u32 attr_id = 0; attr_id < runtime_info.gs_info.copy_data.num_attrs; attr_id++) {
|
||||
const IR::Attribute param{IR::Attribute::Param0 + attr_id};
|
||||
const Id id{DefineOutput(F32[4], attr_id)};
|
||||
Name(id, fmt::format("out_attr{}", attr_id));
|
||||
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -168,9 +168,12 @@ public:
|
|||
Id output_f32{};
|
||||
Id output_s32{};
|
||||
|
||||
Id gl_in{};
|
||||
|
||||
boost::container::small_vector<Id, 16> interfaces;
|
||||
|
||||
Id output_position{};
|
||||
Id primitive_id{};
|
||||
Id vertex_index{};
|
||||
Id instance_id{};
|
||||
Id push_data_block{};
|
||||
|
|
65
src/shader_recompiler/frontend/copy_shader.cpp
Normal file
65
src/shader_recompiler/frontend/copy_shader.cpp
Normal file
|
@ -0,0 +1,65 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/copy_shader.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
CopyShaderData ParseCopyShader(const std::span<const u32>& code) {
|
||||
Gcn::GcnCodeSlice code_slice{code.data(), code.data() + code.size()};
|
||||
Gcn::GcnDecodeContext decoder;
|
||||
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||
|
||||
std::array<s32, 32> offsets{};
|
||||
std::fill(offsets.begin(), offsets.end(), -1);
|
||||
|
||||
CopyShaderData data{};
|
||||
Gcn::OperandField sgpr{};
|
||||
auto last_attr{IR::Attribute::Position0};
|
||||
s32 soffset{0};
|
||||
while (!code_slice.atEnd()) {
|
||||
auto inst = decoder.decodeInstruction(code_slice);
|
||||
switch (inst.opcode) {
|
||||
case Gcn::Opcode::S_MOVK_I32: {
|
||||
sgpr = inst.dst[0].field;
|
||||
soffset = inst.control.sopk.simm;
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::EXP: {
|
||||
const auto& exp = inst.control.exp;
|
||||
const IR::Attribute semantic = static_cast<IR::Attribute>(exp.target);
|
||||
for (int i = 0; i < inst.src_count; ++i) {
|
||||
const auto ofs = offsets[inst.src[i].code];
|
||||
if (ofs != -1) {
|
||||
data.attr_map[ofs] = {semantic, i};
|
||||
if (semantic > last_attr) {
|
||||
last_attr = semantic;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::BUFFER_LOAD_DWORD: {
|
||||
offsets[inst.src[1].code] = inst.control.mubuf.offset;
|
||||
if (inst.src[3].field != Gcn::OperandField::ConstZero) {
|
||||
ASSERT(inst.src[3].field == sgpr);
|
||||
offsets[inst.src[1].code] += soffset;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_attr != IR::Attribute::Position0) {
|
||||
data.num_attrs = static_cast<u32>(last_attr) - static_cast<u32>(IR::Attribute::Param0) + 1;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
} // namespace Shader
|
21
src/shader_recompiler/frontend/copy_shader.h
Normal file
21
src/shader_recompiler/frontend/copy_shader.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct CopyShaderData {
|
||||
std::unordered_map<u32, std::pair<Shader::IR::Attribute, u32>> attr_map;
|
||||
u32 num_attrs{0};
|
||||
};
|
||||
|
||||
CopyShaderData ParseCopyShader(const std::span<const u32>& code);
|
||||
|
||||
} // namespace Shader
|
|
@ -2491,4 +2491,23 @@ enum class ImageAddrComponent : u32 {
|
|||
Clamp,
|
||||
};
|
||||
|
||||
struct SendMsgSimm {
|
||||
enum class Message : u32 {
|
||||
Interrupt = 1,
|
||||
Gs = 2,
|
||||
GsDone = 3,
|
||||
System = 15,
|
||||
};
|
||||
|
||||
enum class GsOp : u32 {
|
||||
Nop = 0,
|
||||
Cut = 1,
|
||||
Emit = 2,
|
||||
EmitCut = 3,
|
||||
};
|
||||
|
||||
Message msg : 4;
|
||||
GsOp op : 2;
|
||||
};
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -55,12 +55,6 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
// SOPP
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
// VOP2
|
||||
|
||||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||
|
|
|
@ -540,14 +540,6 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
ir.SetScalarReg(dst, ir.Imm32(pc));
|
||||
ir.SetScalarReg(dst + 1, ir.Imm32(0));
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
|
|
75
src/shader_recompiler/frontend/translate/scalar_flow.cpp
Normal file
75
src/shader_recompiler/frontend/translate/scalar_flow.cpp
Normal file
|
@ -0,0 +1,75 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/opcodes.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_BARRIER:
|
||||
return S_BARRIER();
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
return;
|
||||
case Opcode::S_GETPC_B64:
|
||||
return S_GETPC_B64(pc, inst);
|
||||
case Opcode::S_WAITCNT:
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_ENDPGM:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_CBRANCH_VCCNZ:
|
||||
case Opcode::S_CBRANCH_VCCZ:
|
||||
case Opcode::S_CBRANCH_EXECNZ:
|
||||
case Opcode::S_BRANCH:
|
||||
return;
|
||||
case Opcode::S_SENDMSG:
|
||||
S_SENDMSG(inst);
|
||||
return;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
ir.SetScalarReg(dst, ir.Imm32(pc));
|
||||
ir.SetScalarReg(dst + 1, ir.Imm32(0));
|
||||
}
|
||||
|
||||
void Translator::S_SENDMSG(const GcnInst& inst) {
|
||||
const auto& simm = reinterpret_cast<const SendMsgSimm&>(inst.control.sopp.simm);
|
||||
switch (simm.msg) {
|
||||
case SendMsgSimm::Message::Gs: {
|
||||
switch (simm.op) {
|
||||
case SendMsgSimm::GsOp::Nop:
|
||||
break;
|
||||
case SendMsgSimm::GsOp::Cut:
|
||||
ir.EmitPrimitive();
|
||||
break;
|
||||
case SendMsgSimm::GsOp::Emit:
|
||||
ir.EmitVertex();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SendMsgSimm::Message::GsDone: {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
|
@ -10,6 +10,7 @@
|
|||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
#define MAGIC_ENUM_RANGE_MIN 0
|
||||
#define MAGIC_ENUM_RANGE_MAX 1515
|
||||
|
@ -35,6 +36,7 @@ void Translator::EmitPrologue() {
|
|||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||
switch (info.stage) {
|
||||
case Stage::Vertex:
|
||||
case Stage::Export:
|
||||
// v0: vertex ID, always present
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||
// v1: instance ID, step rate 0
|
||||
|
@ -76,6 +78,20 @@ void Translator::EmitPrologue() {
|
|||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
switch (runtime_info.gs_info.out_primitive[0]) {
|
||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
||||
[[fallthrough]];
|
||||
case AmdGpu::GsOutputPrimitiveType::LineStrip:
|
||||
ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1
|
||||
[[fallthrough]];
|
||||
default:
|
||||
ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0
|
||||
break;
|
||||
}
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Unknown shader stage");
|
||||
}
|
||||
|
@ -359,7 +375,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("vs_{:#018x}_fetch.bin", info.pgm_hash);
|
||||
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteRaw<u8>(code, fetch_size);
|
||||
}
|
||||
|
@ -424,31 +440,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_BARRIER:
|
||||
return S_BARRIER();
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
return;
|
||||
case Opcode::S_GETPC_B64:
|
||||
return S_GETPC_B64(pc, inst);
|
||||
case Opcode::S_WAITCNT:
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_ENDPGM:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_CBRANCH_VCCNZ:
|
||||
case Opcode::S_CBRANCH_VCCZ:
|
||||
case Opcode::S_CBRANCH_EXECNZ:
|
||||
case Opcode::S_BRANCH:
|
||||
return;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::LogMissingOpcode(const GcnInst& inst) {
|
||||
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})",
|
||||
magic_enum::enum_name(inst.opcode), u32(inst.opcode),
|
||||
|
@ -467,7 +458,7 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
|
|||
|
||||
// Special case for emitting fetch shader.
|
||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||
ASSERT(info.stage == Stage::Vertex);
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
|
||||
translator.EmitFetch(inst);
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -116,6 +116,7 @@ public:
|
|||
|
||||
// SOPP
|
||||
void S_BARRIER();
|
||||
void S_SENDMSG(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
// SMRD
|
||||
|
|
|
@ -160,9 +160,19 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
|
||||
void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const bool is_ring = mtbuf.glc && mtbuf.slc;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
if (info.stage != Stage::Geometry) {
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||
"Non immediate offset not supported");
|
||||
}
|
||||
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (is_ring) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||
}
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
|
@ -171,13 +181,12 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
IR::BufferInstInfo buffer_info{};
|
||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
|
@ -190,7 +199,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, buffer_info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::U32{value});
|
||||
|
@ -230,9 +239,20 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
|||
|
||||
void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const bool is_ring = mtbuf.glc && mtbuf.slc;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
|
||||
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||
"Non immediate offset not supported");
|
||||
}
|
||||
|
||||
IR::Value address = [&] -> IR::Value {
|
||||
if (is_ring) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||
}
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
|
@ -241,13 +261,12 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
IR::BufferInstInfo buffer_info{};
|
||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
|
@ -278,7 +297,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, buffer_info);
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
|
||||
|
|
|
@ -225,8 +225,9 @@ struct Info {
|
|||
}
|
||||
|
||||
void AddBindings(Backend::Bindings& bnd) const {
|
||||
bnd.buffer += buffers.size() + texture_buffers.size();
|
||||
bnd.unified += bnd.buffer + images.size() + samplers.size();
|
||||
const auto total_buffers = buffers.size() + texture_buffers.size();
|
||||
bnd.buffer += total_buffers;
|
||||
bnd.unified += total_buffers + images.size() + samplers.size();
|
||||
bnd.user_data += ud_mask.NumRegs();
|
||||
}
|
||||
|
||||
|
|
|
@ -6,14 +6,6 @@
|
|||
|
||||
namespace Shader::IR {
|
||||
|
||||
bool IsParam(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
||||
bool IsMrt(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||
}
|
||||
|
||||
std::string NameOf(Attribute attribute) {
|
||||
switch (attribute) {
|
||||
case Attribute::RenderTarget0:
|
||||
|
|
|
@ -81,9 +81,17 @@ constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
|
|||
constexpr size_t NumRenderTargets = 8;
|
||||
constexpr size_t NumParams = 32;
|
||||
|
||||
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
|
||||
constexpr bool IsPosition(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsMrt(Attribute attribute) noexcept;
|
||||
constexpr bool IsParam(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
||||
constexpr bool IsMrt(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string NameOf(Attribute attribute);
|
||||
|
||||
|
|
|
@ -249,8 +249,8 @@ void IREmitter::SetM0(const U32& value) {
|
|||
Inst(Opcode::SetM0, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||
|
@ -1553,4 +1553,12 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value
|
|||
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color);
|
||||
}
|
||||
|
||||
void IREmitter::EmitVertex() {
|
||||
Inst(Opcode::EmitVertex);
|
||||
}
|
||||
|
||||
void IREmitter::EmitPrimitive() {
|
||||
Inst(Opcode::EmitPrimitive);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -78,7 +78,7 @@ public:
|
|||
|
||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0);
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
|
@ -310,6 +310,9 @@ public:
|
|||
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
||||
TextureInstInfo info);
|
||||
|
||||
void EmitVertex();
|
||||
void EmitPrimitive();
|
||||
|
||||
private:
|
||||
IR::Block::iterator insertion_point;
|
||||
|
||||
|
|
|
@ -89,6 +89,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::ImageAtomicOr32:
|
||||
case Opcode::ImageAtomicXor32:
|
||||
case Opcode::ImageAtomicExchange32:
|
||||
case Opcode::EmitVertex:
|
||||
case Opcode::EmitPrimitive:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -24,6 +24,10 @@ OPCODE(Barrier, Void,
|
|||
OPCODE(WorkgroupMemoryBarrier, Void, )
|
||||
OPCODE(DeviceMemoryBarrier, Void, )
|
||||
|
||||
// Geometry shader specific
|
||||
OPCODE(EmitVertex, Void, )
|
||||
OPCODE(EmitPrimitive, Void, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
|
@ -49,7 +53,7 @@ OPCODE(GetVectorRegister, U32, Vect
|
|||
OPCODE(SetVectorRegister, Void, VectorReg, U32, )
|
||||
OPCODE(GetGotoVariable, U1, U32, )
|
||||
OPCODE(SetGotoVariable, Void, U32, U1, )
|
||||
OPCODE(GetAttribute, F32, Attribute, U32, )
|
||||
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
|
||||
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
|
||||
|
|
|
@ -15,5 +15,7 @@ void ConstantPropagationPass(IR::BlockList& program);
|
|||
void ResourceTrackingPass(IR::Program& program);
|
||||
void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
110
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
Normal file
110
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
Normal file
|
@ -0,0 +1,110 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage) {
|
||||
const auto& ForEachInstruction = [&](auto func) {
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
func(ir, inst);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
switch (stage) {
|
||||
case Stage::Export: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
||||
ASSERT(offset < runtime_info.es_info.vertex_data_size * 4);
|
||||
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
||||
const auto attrib =
|
||||
IR::Value{offset < 16 ? IR::Attribute::Position0
|
||||
: IR::Attribute::Param0 + (offset / 16 - 1)};
|
||||
const auto comp = (offset / 4) % 4;
|
||||
|
||||
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
|
||||
inst.ClearArgs();
|
||||
inst.SetArg(0, attrib);
|
||||
inst.SetArg(1, data);
|
||||
inst.SetArg(2, ir.Imm32(comp));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
case Stage::Geometry: {
|
||||
ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::LoadBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||
const auto bucket = offset.Resolve().U32() / 256u;
|
||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||
: IR::Attribute::Param0 + (bucket / 4 - 1);
|
||||
const auto comp = bucket % 4;
|
||||
|
||||
auto attr_value = ir.GetAttribute(attrib, comp, vertex_id);
|
||||
inst.ReplaceOpcode(IR::Opcode::BitCastU32F32);
|
||||
inst.ClearArgs();
|
||||
inst.SetArg(0, attr_value);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto offset = inst.Flags<IR::BufferInstInfo>().inst_offset.Value();
|
||||
const auto data = ir.BitCast<IR::F32>(IR::U32{inst.Arg(2)});
|
||||
const auto comp_ofs = runtime_info.gs_info.output_vertices * 4u;
|
||||
const auto output_size = comp_ofs * runtime_info.gs_info.out_vertex_data_size;
|
||||
|
||||
const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u;
|
||||
const auto& it = runtime_info.gs_info.copy_data.attr_map.find(vc_read_ofs);
|
||||
ASSERT(it != runtime_info.gs_info.copy_data.attr_map.cend());
|
||||
const auto& [attr, comp] = it->second;
|
||||
|
||||
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
|
||||
inst.ClearArgs();
|
||||
inst.SetArg(0, IR::Value{attr});
|
||||
inst.SetArg(1, data);
|
||||
inst.SetArg(2, ir.Imm32(comp));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -7,7 +7,6 @@
|
|||
#include "common/bit_field.h"
|
||||
#include "common/enum.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -67,6 +66,7 @@ union BufferInstInfo {
|
|||
BitField<0, 1, u32> index_enable;
|
||||
BitField<1, 1, u32> offset_enable;
|
||||
BitField<2, 12, u32> inst_offset;
|
||||
BitField<14, 1, u32> ring_access; // global + system coherency
|
||||
};
|
||||
|
||||
enum class ScalarReg : u32 {
|
||||
|
|
|
@ -63,6 +63,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||
if (program.info.stage != Stage::Compute) {
|
||||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
||||
}
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
|
||||
Shader::Optimization::ResourceTrackingPass(program);
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
#include "frontend/copy_shader.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
|
@ -26,13 +28,11 @@ constexpr u32 MaxStageTypes = 6;
|
|||
return static_cast<Stage>(index);
|
||||
}
|
||||
|
||||
enum class MrtSwizzle : u8 {
|
||||
Identity = 0,
|
||||
Alt = 1,
|
||||
Reverse = 2,
|
||||
ReverseAlt = 3,
|
||||
struct ExportRuntimeInfo {
|
||||
u32 vertex_data_size;
|
||||
|
||||
auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
|
||||
};
|
||||
static constexpr u32 MaxColorBuffers = 8;
|
||||
|
||||
enum class VsOutput : u8 {
|
||||
None,
|
||||
|
@ -70,6 +70,33 @@ struct VertexRuntimeInfo {
|
|||
}
|
||||
};
|
||||
|
||||
static constexpr auto GsMaxOutputStreams = 4u;
|
||||
using GsOutputPrimTypes = std::array<AmdGpu::GsOutputPrimitiveType, GsMaxOutputStreams>;
|
||||
struct GeometryRuntimeInfo {
|
||||
u32 num_invocations{};
|
||||
u32 output_vertices{};
|
||||
u32 in_vertex_data_size{};
|
||||
u32 out_vertex_data_size{};
|
||||
AmdGpu::PrimitiveType in_primitive;
|
||||
GsOutputPrimTypes out_primitive;
|
||||
CopyShaderData copy_data;
|
||||
|
||||
bool operator==(const GeometryRuntimeInfo& other) const noexcept {
|
||||
return num_invocations && other.num_invocations &&
|
||||
output_vertices == other.output_vertices && in_primitive == other.in_primitive &&
|
||||
std::ranges::equal(out_primitive, other.out_primitive) &&
|
||||
std::ranges::equal(copy_data.attr_map, other.copy_data.attr_map);
|
||||
}
|
||||
};
|
||||
|
||||
enum class MrtSwizzle : u8 {
|
||||
Identity = 0,
|
||||
Alt = 1,
|
||||
Reverse = 2,
|
||||
ReverseAlt = 3,
|
||||
};
|
||||
static constexpr u32 MaxColorBuffers = 8;
|
||||
|
||||
struct FragmentRuntimeInfo {
|
||||
struct PsInput {
|
||||
u8 param_index;
|
||||
|
@ -114,7 +141,9 @@ struct RuntimeInfo {
|
|||
u32 num_user_data;
|
||||
u32 num_input_vgprs;
|
||||
u32 num_allocated_vgprs;
|
||||
ExportRuntimeInfo es_info;
|
||||
VertexRuntimeInfo vs_info;
|
||||
GeometryRuntimeInfo gs_info;
|
||||
FragmentRuntimeInfo fs_info;
|
||||
ComputeRuntimeInfo cs_info;
|
||||
|
||||
|
@ -128,6 +157,10 @@ struct RuntimeInfo {
|
|||
return vs_info == other.vs_info;
|
||||
case Stage::Compute:
|
||||
return cs_info == other.cs_info;
|
||||
case Stage::Export:
|
||||
return es_info == other.es_info;
|
||||
case Stage::Geometry:
|
||||
return gs_info == other.gs_info;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
#include "types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
|
@ -842,26 +843,6 @@ struct Liverpool {
|
|||
}
|
||||
};
|
||||
|
||||
enum class PrimitiveType : u32 {
|
||||
None = 0,
|
||||
PointList = 1,
|
||||
LineList = 2,
|
||||
LineStrip = 3,
|
||||
TriangleList = 4,
|
||||
TriangleFan = 5,
|
||||
TriangleStrip = 6,
|
||||
PatchPrimitive = 9,
|
||||
AdjLineList = 10,
|
||||
AdjLineStrip = 11,
|
||||
AdjTriangleList = 12,
|
||||
AdjTriangleStrip = 13,
|
||||
RectList = 17,
|
||||
LineLoop = 18,
|
||||
QuadList = 19,
|
||||
QuadStrip = 20,
|
||||
Polygon = 21,
|
||||
};
|
||||
|
||||
enum ContextRegs : u32 {
|
||||
DbZInfo = 0xA010,
|
||||
CbColor0Base = 0xA318,
|
||||
|
@ -936,7 +917,12 @@ struct Liverpool {
|
|||
};
|
||||
|
||||
union ShaderStageEnable {
|
||||
u32 raw;
|
||||
enum VgtStages : u32 {
|
||||
Vs = 0u, // always enabled
|
||||
EsGs = 0xB0u,
|
||||
};
|
||||
|
||||
VgtStages raw;
|
||||
BitField<0, 2, u32> ls_en;
|
||||
BitField<2, 1, u32> hs_en;
|
||||
BitField<3, 2, u32> es_en;
|
||||
|
@ -962,6 +948,81 @@ struct Liverpool {
|
|||
}
|
||||
};
|
||||
|
||||
union GsInstances {
|
||||
u32 raw;
|
||||
struct {
|
||||
u32 enable : 2;
|
||||
u32 count : 6;
|
||||
};
|
||||
|
||||
bool IsEnabled() const {
|
||||
return enable && count > 0;
|
||||
}
|
||||
};
|
||||
|
||||
union GsOutPrimitiveType {
|
||||
u32 raw;
|
||||
struct {
|
||||
GsOutputPrimitiveType outprim_type : 6;
|
||||
GsOutputPrimitiveType outprim_type1 : 6;
|
||||
GsOutputPrimitiveType outprim_type2 : 6;
|
||||
GsOutputPrimitiveType outprim_type3 : 6;
|
||||
u32 reserved : 3;
|
||||
u32 unique_type_per_stream : 1;
|
||||
};
|
||||
|
||||
GsOutputPrimitiveType GetPrimitiveType(u32 stream) const {
|
||||
if (unique_type_per_stream == 0) {
|
||||
return outprim_type;
|
||||
}
|
||||
|
||||
switch (stream) {
|
||||
case 0:
|
||||
return outprim_type;
|
||||
case 1:
|
||||
return outprim_type1;
|
||||
case 2:
|
||||
return outprim_type2;
|
||||
case 3:
|
||||
return outprim_type3;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
union GsMode {
|
||||
u32 raw;
|
||||
BitField<0, 3, u32> mode;
|
||||
BitField<3, 2, u32> cut_mode;
|
||||
BitField<22, 2, u32> onchip;
|
||||
};
|
||||
|
||||
union StreamOutConfig {
|
||||
u32 raw;
|
||||
struct {
|
||||
u32 streamout_0_en : 1;
|
||||
u32 streamout_1_en : 1;
|
||||
u32 streamout_2_en : 1;
|
||||
u32 streamout_3_en : 1;
|
||||
u32 rast_stream : 3;
|
||||
u32 : 1;
|
||||
u32 rast_stream_mask : 4;
|
||||
u32 : 19;
|
||||
u32 use_rast_stream_mask : 1;
|
||||
};
|
||||
};
|
||||
|
||||
union StreamOutBufferConfig {
|
||||
u32 raw;
|
||||
struct {
|
||||
u32 stream_0_buf_en : 4;
|
||||
u32 stream_1_buf_en : 4;
|
||||
u32 stream_2_buf_en : 4;
|
||||
u32 stream_3_buf_en : 4;
|
||||
};
|
||||
};
|
||||
|
||||
union Eqaa {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> max_anchor_samples;
|
||||
|
@ -1053,9 +1114,13 @@ struct Liverpool {
|
|||
PolygonControl polygon_control;
|
||||
ViewportControl viewport_control;
|
||||
VsOutputControl vs_output_control;
|
||||
INSERT_PADDING_WORDS(0xA292 - 0xA207 - 1);
|
||||
INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1);
|
||||
GsMode vgt_gs_mode;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
ModeControl mode_control;
|
||||
INSERT_PADDING_WORDS(0xA29D - 0xA292 - 1);
|
||||
INSERT_PADDING_WORDS(8);
|
||||
GsOutPrimitiveType vgt_gs_out_prim_type;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 index_size;
|
||||
u32 max_index_size;
|
||||
IndexBufferType index_buffer_type;
|
||||
|
@ -1066,11 +1131,21 @@ struct Liverpool {
|
|||
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1);
|
||||
u32 vgt_instance_step_rate_0;
|
||||
u32 vgt_instance_step_rate_1;
|
||||
INSERT_PADDING_WORDS(0xA2D5 - 0xA2A9 - 1);
|
||||
INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1);
|
||||
u32 vgt_esgs_ring_itemsize;
|
||||
u32 vgt_gsvs_ring_itemsize;
|
||||
INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1);
|
||||
BitField<0, 11, u32> vgt_gs_max_vert_out;
|
||||
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
|
||||
ShaderStageEnable stage_enable;
|
||||
INSERT_PADDING_WORDS(9);
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 vgt_gs_vert_itemsize[4];
|
||||
INSERT_PADDING_WORDS(4);
|
||||
PolygonOffset poly_offset;
|
||||
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
|
||||
GsInstances vgt_gs_instance_cnt;
|
||||
StreamOutConfig vgt_strmout_config;
|
||||
StreamOutBufferConfig vgt_strmout_buffer_config;
|
||||
INSERT_PADDING_WORDS(0xA2F8 - 0xA2E6 - 1);
|
||||
AaConfig aa_config;
|
||||
INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1);
|
||||
ColorBuffer color_buffers[NumColorBuffers];
|
||||
|
@ -1291,15 +1366,24 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
|
|||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
|
||||
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
|
||||
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
||||
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
|
||||
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
|
||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_buffer_config) == 0xA2E6);
|
||||
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
|
||||
|
|
|
@ -6,78 +6,10 @@
|
|||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
|
||||
enum class DataFormat : u32 {
|
||||
FormatInvalid = 0,
|
||||
Format8 = 1,
|
||||
Format16 = 2,
|
||||
Format8_8 = 3,
|
||||
Format32 = 4,
|
||||
Format16_16 = 5,
|
||||
Format10_11_11 = 6,
|
||||
Format11_11_10 = 7,
|
||||
Format10_10_10_2 = 8,
|
||||
Format2_10_10_10 = 9,
|
||||
Format8_8_8_8 = 10,
|
||||
Format32_32 = 11,
|
||||
Format16_16_16_16 = 12,
|
||||
Format32_32_32 = 13,
|
||||
Format32_32_32_32 = 14,
|
||||
Format5_6_5 = 16,
|
||||
Format1_5_5_5 = 17,
|
||||
Format5_5_5_1 = 18,
|
||||
Format4_4_4_4 = 19,
|
||||
Format8_24 = 20,
|
||||
Format24_8 = 21,
|
||||
FormatX24_8_32 = 22,
|
||||
FormatGB_GR = 32,
|
||||
FormatBG_RG = 33,
|
||||
Format5_9_9_9 = 34,
|
||||
FormatBc1 = 35,
|
||||
FormatBc2 = 36,
|
||||
FormatBc3 = 37,
|
||||
FormatBc4 = 38,
|
||||
FormatBc5 = 39,
|
||||
FormatBc6 = 40,
|
||||
FormatBc7 = 41,
|
||||
FormatFmask8_1 = 47,
|
||||
FormatFmask8_2 = 48,
|
||||
FormatFmask8_4 = 49,
|
||||
FormatFmask16_1 = 50,
|
||||
FormatFmask16_2 = 51,
|
||||
FormatFmask32_2 = 52,
|
||||
FormatFmask32_4 = 53,
|
||||
FormatFmask32_8 = 54,
|
||||
FormatFmask64_4 = 55,
|
||||
FormatFmask64_8 = 56,
|
||||
Format4_4 = 57,
|
||||
Format6_5_5 = 58,
|
||||
Format1 = 59,
|
||||
Format1_Reversed = 60,
|
||||
Format32_As_8 = 61,
|
||||
Format32_As_8_8 = 62,
|
||||
Format32_As_32_32_32_32 = 63,
|
||||
};
|
||||
|
||||
enum class NumberFormat : u32 {
|
||||
Unorm = 0,
|
||||
Snorm = 1,
|
||||
Uscaled = 2,
|
||||
Sscaled = 3,
|
||||
Uint = 4,
|
||||
Sint = 5,
|
||||
SnormNz = 6,
|
||||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnromNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
|
||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||
}
|
||||
|
|
106
src/video_core/amdgpu/types.h
Normal file
106
src/video_core/amdgpu/types.h
Normal file
|
@ -0,0 +1,106 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
|
||||
enum class PrimitiveType : u32 {
|
||||
None = 0,
|
||||
PointList = 1,
|
||||
LineList = 2,
|
||||
LineStrip = 3,
|
||||
TriangleList = 4,
|
||||
TriangleFan = 5,
|
||||
TriangleStrip = 6,
|
||||
PatchPrimitive = 9,
|
||||
AdjLineList = 10,
|
||||
AdjLineStrip = 11,
|
||||
AdjTriangleList = 12,
|
||||
AdjTriangleStrip = 13,
|
||||
RectList = 17,
|
||||
LineLoop = 18,
|
||||
QuadList = 19,
|
||||
QuadStrip = 20,
|
||||
Polygon = 21,
|
||||
};
|
||||
|
||||
enum class GsOutputPrimitiveType : u32 {
|
||||
PointList = 0,
|
||||
LineStrip = 1,
|
||||
TriangleStrip = 2,
|
||||
};
|
||||
|
||||
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
|
||||
enum class DataFormat : u32 {
|
||||
FormatInvalid = 0,
|
||||
Format8 = 1,
|
||||
Format16 = 2,
|
||||
Format8_8 = 3,
|
||||
Format32 = 4,
|
||||
Format16_16 = 5,
|
||||
Format10_11_11 = 6,
|
||||
Format11_11_10 = 7,
|
||||
Format10_10_10_2 = 8,
|
||||
Format2_10_10_10 = 9,
|
||||
Format8_8_8_8 = 10,
|
||||
Format32_32 = 11,
|
||||
Format16_16_16_16 = 12,
|
||||
Format32_32_32 = 13,
|
||||
Format32_32_32_32 = 14,
|
||||
Format5_6_5 = 16,
|
||||
Format1_5_5_5 = 17,
|
||||
Format5_5_5_1 = 18,
|
||||
Format4_4_4_4 = 19,
|
||||
Format8_24 = 20,
|
||||
Format24_8 = 21,
|
||||
FormatX24_8_32 = 22,
|
||||
FormatGB_GR = 32,
|
||||
FormatBG_RG = 33,
|
||||
Format5_9_9_9 = 34,
|
||||
FormatBc1 = 35,
|
||||
FormatBc2 = 36,
|
||||
FormatBc3 = 37,
|
||||
FormatBc4 = 38,
|
||||
FormatBc5 = 39,
|
||||
FormatBc6 = 40,
|
||||
FormatBc7 = 41,
|
||||
FormatFmask8_1 = 47,
|
||||
FormatFmask8_2 = 48,
|
||||
FormatFmask8_4 = 49,
|
||||
FormatFmask16_1 = 50,
|
||||
FormatFmask16_2 = 51,
|
||||
FormatFmask32_2 = 52,
|
||||
FormatFmask32_4 = 53,
|
||||
FormatFmask32_8 = 54,
|
||||
FormatFmask64_4 = 55,
|
||||
FormatFmask64_8 = 56,
|
||||
Format4_4 = 57,
|
||||
Format6_5_5 = 58,
|
||||
Format1 = 59,
|
||||
Format1_Reversed = 60,
|
||||
Format32_As_8 = 61,
|
||||
Format32_As_8_8 = 62,
|
||||
Format32_As_32_32_32_32 = 63,
|
||||
};
|
||||
|
||||
enum class NumberFormat : u32 {
|
||||
Unorm = 0,
|
||||
Snorm = 1,
|
||||
Uscaled = 2,
|
||||
Sscaled = 3,
|
||||
Uint = 4,
|
||||
Sint = 5,
|
||||
SnormNz = 6,
|
||||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnromNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
|
@ -224,7 +224,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
|
||||
// Emulate QuadList primitive type with CPU made index buffer.
|
||||
const auto& regs = liverpool->regs;
|
||||
if (regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::QuadList) {
|
||||
if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) {
|
||||
is_indexed = true;
|
||||
|
||||
// Emit indices.
|
||||
|
|
|
@ -61,34 +61,34 @@ vk::CompareOp CompareOp(Liverpool::CompareFunc func) {
|
|||
}
|
||||
}
|
||||
|
||||
vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) {
|
||||
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
|
||||
switch (type) {
|
||||
case Liverpool::PrimitiveType::PointList:
|
||||
case AmdGpu::PrimitiveType::PointList:
|
||||
return vk::PrimitiveTopology::ePointList;
|
||||
case Liverpool::PrimitiveType::LineList:
|
||||
case AmdGpu::PrimitiveType::LineList:
|
||||
return vk::PrimitiveTopology::eLineList;
|
||||
case Liverpool::PrimitiveType::LineStrip:
|
||||
case AmdGpu::PrimitiveType::LineStrip:
|
||||
return vk::PrimitiveTopology::eLineStrip;
|
||||
case Liverpool::PrimitiveType::TriangleList:
|
||||
case AmdGpu::PrimitiveType::TriangleList:
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case Liverpool::PrimitiveType::TriangleFan:
|
||||
case AmdGpu::PrimitiveType::TriangleFan:
|
||||
return vk::PrimitiveTopology::eTriangleFan;
|
||||
case Liverpool::PrimitiveType::TriangleStrip:
|
||||
case AmdGpu::PrimitiveType::TriangleStrip:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
case Liverpool::PrimitiveType::AdjLineList:
|
||||
case AmdGpu::PrimitiveType::AdjLineList:
|
||||
return vk::PrimitiveTopology::eLineListWithAdjacency;
|
||||
case Liverpool::PrimitiveType::AdjLineStrip:
|
||||
case AmdGpu::PrimitiveType::AdjLineStrip:
|
||||
return vk::PrimitiveTopology::eLineStripWithAdjacency;
|
||||
case Liverpool::PrimitiveType::AdjTriangleList:
|
||||
case AmdGpu::PrimitiveType::AdjTriangleList:
|
||||
return vk::PrimitiveTopology::eTriangleListWithAdjacency;
|
||||
case Liverpool::PrimitiveType::AdjTriangleStrip:
|
||||
case AmdGpu::PrimitiveType::AdjTriangleStrip:
|
||||
return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
|
||||
case Liverpool::PrimitiveType::PatchPrimitive:
|
||||
case AmdGpu::PrimitiveType::PatchPrimitive:
|
||||
return vk::PrimitiveTopology::ePatchList;
|
||||
case Liverpool::PrimitiveType::QuadList:
|
||||
case AmdGpu::PrimitiveType::QuadList:
|
||||
// Needs to generate index buffer on the fly.
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case Liverpool::PrimitiveType::RectList:
|
||||
case AmdGpu::PrimitiveType::RectList:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
|
|
@ -18,7 +18,7 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op);
|
|||
|
||||
vk::CompareOp CompareOp(Liverpool::CompareFunc func);
|
||||
|
||||
vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type);
|
||||
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type);
|
||||
|
||||
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
|
||||
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex |
|
||||
vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eFragment;
|
||||
|
||||
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
|
||||
vk::PipelineCache pipeline_cache,
|
||||
|
@ -27,7 +31,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
BuildDescSetLayout();
|
||||
|
||||
const vk::PushConstantRange push_constants = {
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||
.stageFlags = gp_stage_flags,
|
||||
.offset = 0,
|
||||
.size = sizeof(Shader::PushData),
|
||||
};
|
||||
|
@ -83,7 +87,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.pVertexAttributeDescriptions = vertex_attributes.data(),
|
||||
};
|
||||
|
||||
if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) {
|
||||
if (key.prim_type == AmdGpu::PrimitiveType::RectList && !IsEmbeddedVs()) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Rectangle List primitive type is only supported for embedded VS");
|
||||
}
|
||||
|
@ -196,9 +200,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
},
|
||||
};
|
||||
|
||||
auto stage = u32(Shader::Stage::Vertex);
|
||||
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
|
||||
shader_stages;
|
||||
auto stage = u32(Shader::Stage::Vertex);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||
|
@ -206,6 +210,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Geometry);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eGeometry,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Fragment);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
|
@ -322,7 +334,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||
.stageFlags = gp_stage_flags,
|
||||
});
|
||||
}
|
||||
for (const auto& tex_buffer : stage->texture_buffers) {
|
||||
|
@ -331,7 +343,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||
.stageFlags = gp_stage_flags,
|
||||
});
|
||||
}
|
||||
for (const auto& image : stage->images) {
|
||||
|
@ -340,7 +352,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||
.stageFlags = gp_stage_flags,
|
||||
});
|
||||
}
|
||||
for (const auto& sampler : stage->samplers) {
|
||||
|
@ -348,7 +360,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eSampler,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||
.stageFlags = gp_stage_flags,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -518,9 +530,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
desc_set, {});
|
||||
}
|
||||
}
|
||||
cmdbuf.pushConstants(*pipeline_layout,
|
||||
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0U,
|
||||
sizeof(push_data), &push_data);
|
||||
cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data);
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle());
|
||||
}
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ struct GraphicsPipelineKey {
|
|||
u32 num_samples;
|
||||
u32 mrt_mask;
|
||||
Liverpool::StencilControl stencil;
|
||||
Liverpool::PrimitiveType prim_type;
|
||||
AmdGpu::PrimitiveType prim_type;
|
||||
u32 enable_primitive_restart;
|
||||
u32 primitive_restart_index;
|
||||
Liverpool::PolygonMode polygon_mode;
|
||||
|
@ -86,13 +86,13 @@ public:
|
|||
}
|
||||
|
||||
[[nodiscard]] bool IsPrimitiveListTopology() const {
|
||||
return key.prim_type == Liverpool::PrimitiveType::PointList ||
|
||||
key.prim_type == Liverpool::PrimitiveType::LineList ||
|
||||
key.prim_type == Liverpool::PrimitiveType::TriangleList ||
|
||||
key.prim_type == Liverpool::PrimitiveType::AdjLineList ||
|
||||
key.prim_type == Liverpool::PrimitiveType::AdjTriangleList ||
|
||||
key.prim_type == Liverpool::PrimitiveType::RectList ||
|
||||
key.prim_type == Liverpool::PrimitiveType::QuadList;
|
||||
return key.prim_type == AmdGpu::PrimitiveType::PointList ||
|
||||
key.prim_type == AmdGpu::PrimitiveType::LineList ||
|
||||
key.prim_type == AmdGpu::PrimitiveType::TriangleList ||
|
||||
key.prim_type == AmdGpu::PrimitiveType::AdjLineList ||
|
||||
key.prim_type == AmdGpu::PrimitiveType::AdjTriangleList ||
|
||||
key.prim_type == AmdGpu::PrimitiveType::RectList ||
|
||||
key.prim_type == AmdGpu::PrimitiveType::QuadList;
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -322,6 +322,7 @@ bool Instance::CreateDevice() {
|
|||
.geometryShader = features.geometryShader,
|
||||
.logicOp = features.logicOp,
|
||||
.depthBiasClamp = features.depthBiasClamp,
|
||||
.fillModeNonSolid = features.fillModeNonSolid,
|
||||
.multiViewport = features.multiViewport,
|
||||
.samplerAnisotropy = features.samplerAnisotropy,
|
||||
.vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics,
|
||||
|
|
|
@ -147,6 +147,16 @@ public:
|
|||
return list_restart;
|
||||
}
|
||||
|
||||
/// Returns true when geometry shaders are supported by the device
|
||||
bool IsGeometryStageSupported() const {
|
||||
return features.geometryShader;
|
||||
}
|
||||
|
||||
/// Returns true when tessellation is supported by the device
|
||||
bool IsTessellationSupported() const {
|
||||
return features.tessellationShader;
|
||||
}
|
||||
|
||||
/// Returns the vendor ID of the physical device
|
||||
u32 GetVendorID() const {
|
||||
return properties.vendorID;
|
||||
|
|
|
@ -7,7 +7,10 @@
|
|||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/frontend/copy_shader.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
|
@ -82,6 +85,13 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||
auto info = Shader::RuntimeInfo{stage};
|
||||
const auto& regs = liverpool->regs;
|
||||
switch (stage) {
|
||||
case Shader::Stage::Export: {
|
||||
info.num_user_data = regs.es_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.es_program.settings.vgpr_comp_cnt;
|
||||
info.num_allocated_vgprs = regs.es_program.settings.num_vgprs * 4;
|
||||
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||
|
@ -92,6 +102,29 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Geometry: {
|
||||
info.num_user_data = regs.gs_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.gs_program.settings.vgpr_comp_cnt;
|
||||
info.num_allocated_vgprs = regs.gs_program.settings.num_vgprs * 4;
|
||||
info.gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
||||
info.gs_info.num_invocations =
|
||||
regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
|
||||
info.gs_info.in_primitive = regs.primitive_type;
|
||||
for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) {
|
||||
info.gs_info.out_primitive[stream_id] =
|
||||
regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id);
|
||||
}
|
||||
info.gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||
info.gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
|
||||
|
||||
// Extract semantics offsets from a copy shader
|
||||
const auto vc_stage = Shader::Stage::Vertex;
|
||||
const auto* pgm_vc = regs.ProgramForStage(static_cast<u32>(vc_stage));
|
||||
const auto params_vc = Liverpool::GetParams(*pgm_vc);
|
||||
DumpShader(params_vc.code, params_vc.hash, Shader::Stage::Vertex, 0, "copy.bin");
|
||||
info.gs_info.copy_data = Shader::ParseCopyShader(params_vc.code);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4;
|
||||
|
@ -149,7 +182,7 @@ PipelineCache::~PipelineCache() = default;
|
|||
const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
||||
const auto& regs = liverpool->regs;
|
||||
// Tessellation is unsupported so skip the draw to avoid locking up the driver.
|
||||
if (regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) {
|
||||
if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) {
|
||||
return nullptr;
|
||||
}
|
||||
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
|
||||
|
@ -163,7 +196,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
|||
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
|
||||
return nullptr;
|
||||
}
|
||||
if (regs.primitive_type == Liverpool::PrimitiveType::None) {
|
||||
if (regs.primitive_type == AmdGpu::PrimitiveType::None) {
|
||||
LOG_TRACE(Render_Vulkan, "Primitive type 'None' skipped");
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -190,15 +223,6 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
|
|||
return it->second;
|
||||
}
|
||||
|
||||
bool ShouldSkipShader(u64 shader_hash, const char* shader_type) {
|
||||
static constexpr std::array<u64, 0> skip_hashes = {};
|
||||
if (std::ranges::contains(skip_hashes, shader_hash)) {
|
||||
LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PipelineCache::RefreshGraphicsKey() {
|
||||
std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey));
|
||||
|
||||
|
@ -275,46 +299,66 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
}
|
||||
|
||||
Shader::Backend::Bindings binding{};
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
if (!regs.stage_enable.IsStageEnabled(i)) {
|
||||
key.stage_hashes[i] = 0;
|
||||
infos[i] = nullptr;
|
||||
continue;
|
||||
const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
|
||||
const auto stage_in_idx = static_cast<u32>(stage_in);
|
||||
const auto stage_out_idx = static_cast<u32>(stage_out);
|
||||
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
|
||||
key.stage_hashes[stage_out_idx] = 0;
|
||||
infos[stage_out_idx] = nullptr;
|
||||
return false;
|
||||
}
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
|
||||
const auto* pgm = regs.ProgramForStage(stage_in_idx);
|
||||
if (!pgm || !pgm->Address<u32*>()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
infos[i] = nullptr;
|
||||
continue;
|
||||
key.stage_hashes[stage_out_idx] = 0;
|
||||
infos[stage_out_idx] = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
|
||||
if (!bininfo->Valid()) {
|
||||
LOG_WARNING(Render_Vulkan, "Invalid binary info structure!");
|
||||
key.stage_hashes[i] = 0;
|
||||
infos[i] = nullptr;
|
||||
continue;
|
||||
}
|
||||
if (ShouldSkipShader(bininfo->shader_hash, "graphics")) {
|
||||
return false;
|
||||
}
|
||||
const auto stage = Shader::StageFromIndex(i);
|
||||
const auto params = Liverpool::GetParams(*pgm);
|
||||
|
||||
if (stage != Shader::Stage::Vertex && stage != Shader::Stage::Fragment) {
|
||||
key.stage_hashes[stage_out_idx] = 0;
|
||||
infos[stage_out_idx] = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool TessMissingLogged = false;
|
||||
if (auto* pgm = regs.ProgramForStage(3);
|
||||
regs.stage_enable.IsStageEnabled(3) && pgm->Address() != 0) {
|
||||
if (!TessMissingLogged) {
|
||||
LOG_WARNING(Render_Vulkan, "Tess pipeline compilation skipped");
|
||||
TessMissingLogged = true;
|
||||
auto params = Liverpool::GetParams(*pgm);
|
||||
std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) =
|
||||
GetProgram(stage_in, params, binding);
|
||||
return true;
|
||||
};
|
||||
|
||||
const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
|
||||
|
||||
const auto& IsGsFeaturesSupported = [&]() -> bool {
|
||||
// These checks are temporary until all functionality is implemented.
|
||||
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
|
||||
};
|
||||
|
||||
TryBindStage(Shader::Stage::Fragment);
|
||||
|
||||
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
|
||||
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
||||
|
||||
switch (regs.stage_enable.raw) {
|
||||
case Liverpool::ShaderStageEnable::VgtStages::EsGs: {
|
||||
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
|
||||
break;
|
||||
}
|
||||
if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding);
|
||||
if (!TryBindStage(Shader::Stage::Geometry)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
TryBindStage(Shader::Stage::Vertex);
|
||||
infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const auto* vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
|
||||
|
@ -336,9 +380,6 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
}
|
||||
}
|
||||
|
||||
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
|
||||
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
||||
|
||||
// Second pass to fill remain CB pipeline key data
|
||||
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
auto const& col_buf = regs.color_buffers[cb];
|
||||
|
@ -364,9 +405,6 @@ bool PipelineCache::RefreshComputeKey() {
|
|||
Shader::Backend::Bindings binding{};
|
||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
||||
if (ShouldSkipShader(cs_params.hash, "compute")) {
|
||||
return false;
|
||||
}
|
||||
std::tie(infos[0], modules[0], compute_key) =
|
||||
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
||||
return true;
|
||||
|
@ -378,15 +416,11 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
|||
Shader::Backend::Bindings& binding) {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||
perm_idx != 0 ? "(permutation)" : "");
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
|
||||
}
|
||||
|
||||
const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile);
|
||||
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding);
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
|
||||
}
|
||||
|
||||
const auto module = CompileSPV(spv, instance.GetDevice());
|
||||
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
|
||||
|
@ -429,6 +463,10 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
|
|||
|
||||
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||
size_t perm_idx, std::string_view ext) {
|
||||
if (!Config::dumpShaders()) {
|
||||
return;
|
||||
}
|
||||
|
||||
using namespace Common::FS;
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
|
|
|
@ -70,9 +70,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset),
|
||||
instance_offset);
|
||||
} else {
|
||||
const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList
|
||||
? 4
|
||||
: regs.num_indices;
|
||||
const u32 num_vertices =
|
||||
regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 4 : regs.num_indices;
|
||||
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset,
|
||||
instance_offset);
|
||||
}
|
||||
|
@ -88,7 +87,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si
|
|||
return;
|
||||
}
|
||||
|
||||
ASSERT_MSG(regs.primitive_type != AmdGpu::Liverpool::PrimitiveType::RectList,
|
||||
ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList,
|
||||
"Unsupported primitive type for indirect draw");
|
||||
|
||||
try {
|
||||
|
|
Loading…
Reference in a new issue