mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-27 08:51:47 +00:00
Implement shader resource tables (#1165)
* Implement shader resource tables * fix after rebase + squash * address some review comments * fix pipeline_common * cleanup debug stuff * switch to using single codegenerator
This commit is contained in:
parent
7b16085c59
commit
9ec75c3feb
|
@ -590,6 +590,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/frontend/structured_control_flow.h
|
||||
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
|
||||
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
|
||||
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
|
||||
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ir_passes.h
|
||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
||||
|
|
|
@ -13,6 +13,15 @@ DecoderImpl::DecoderImpl() {
|
|||
|
||||
DecoderImpl::~DecoderImpl() = default;
|
||||
|
||||
std::string DecoderImpl::disassembleInst(ZydisDecodedInstruction& inst,
|
||||
ZydisDecodedOperand* operands, u64 address) {
|
||||
const int bufLen = 256;
|
||||
char szBuffer[bufLen];
|
||||
ZydisFormatterFormatInstruction(&m_formatter, &inst, operands, inst.operand_count_visible,
|
||||
szBuffer, sizeof(szBuffer), address, ZYAN_NULL);
|
||||
return szBuffer;
|
||||
}
|
||||
|
||||
void DecoderImpl::printInstruction(void* code, u64 address) {
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE];
|
||||
|
@ -27,11 +36,8 @@ void DecoderImpl::printInstruction(void* code, u64 address) {
|
|||
|
||||
void DecoderImpl::printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,
|
||||
u64 address) {
|
||||
const int bufLen = 256;
|
||||
char szBuffer[bufLen];
|
||||
ZydisFormatterFormatInstruction(&m_formatter, &inst, operands, inst.operand_count_visible,
|
||||
szBuffer, sizeof(szBuffer), address, ZYAN_NULL);
|
||||
fmt::print("instruction: {}\n", szBuffer);
|
||||
std::string s = disassembleInst(inst, operands, address);
|
||||
fmt::print("instruction: {}\n", s);
|
||||
}
|
||||
|
||||
ZyanStatus DecoderImpl::decodeInstruction(ZydisDecodedInstruction& inst,
|
||||
|
|
|
@ -14,6 +14,8 @@ public:
|
|||
DecoderImpl();
|
||||
~DecoderImpl();
|
||||
|
||||
std::string disassembleInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,
|
||||
u64 address);
|
||||
void printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands, u64 address);
|
||||
void printInstruction(void* code, u64 address);
|
||||
ZyanStatus decodeInstruction(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,
|
||||
|
|
14
src/common/hash.h
Normal file
14
src/common/hash.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 12) + (seed >> 4));
|
||||
}
|
||||
|
||||
[[nodiscard]] inline u32 HashCombine(const u32 seed, const u32 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
|
@ -146,9 +147,14 @@ void EmitGetGotoVariable(EmitContext&) {
|
|||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx) {
|
||||
return ctx.u32_zero_value;
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
||||
u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
ASSERT(ctx.srt_flatbuf.binding >= 0);
|
||||
ASSERT(flatbuf_off_dw > 0);
|
||||
Id index = ctx.ConstU32(flatbuf_off_dw);
|
||||
auto& buffer = ctx.srt_flatbuf;
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
}
|
||||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
|
|
|
@ -62,7 +62,7 @@ void EmitSetVectorRegister(EmitContext& ctx);
|
|||
void EmitSetGotoVariable(EmitContext& ctx);
|
||||
void EmitGetGotoVariable(EmitContext& ctx);
|
||||
void EmitSetScc(EmitContext& ctx);
|
||||
Id EmitReadConst(EmitContext& ctx);
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst);
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
|
|
|
@ -4,12 +4,14 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <numbers>
|
||||
#include <string_view>
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
|
@ -435,14 +437,16 @@ void EmitContext::DefinePushDataBlock() {
|
|||
|
||||
void EmitContext::DefineBuffers() {
|
||||
boost::container::small_vector<Id, 8> type_ids;
|
||||
const auto define_struct = [&](Id record_array_type, bool is_instance_data) {
|
||||
const auto define_struct = [&](Id record_array_type, bool is_instance_data,
|
||||
std::optional<std::string_view> explicit_name = {}) {
|
||||
const Id struct_type{TypeStruct(record_array_type)};
|
||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) != type_ids.end()) {
|
||||
return struct_type;
|
||||
}
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
const auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
|
||||
: fmt::format("{}_cbuf_block_f32", stage);
|
||||
auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
|
||||
: fmt::format("{}_cbuf_block_f32", stage);
|
||||
name = explicit_name.value_or(name);
|
||||
Name(struct_type, name);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
|
@ -451,6 +455,29 @@ void EmitContext::DefineBuffers() {
|
|||
return struct_type;
|
||||
};
|
||||
|
||||
if (info.has_readconst) {
|
||||
const Id data_type = U32[1];
|
||||
const auto storage_class = spv::StorageClass::Uniform;
|
||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
||||
const Id record_array_type{
|
||||
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};
|
||||
|
||||
const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};
|
||||
|
||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, "srt_flatbuf_ubo");
|
||||
|
||||
srt_flatbuf = {
|
||||
.id = id,
|
||||
.binding = binding.buffer++,
|
||||
.pointer_type = pointer_type,
|
||||
};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
|
||||
for (const auto& desc : info.buffers) {
|
||||
const auto sharp = desc.GetSharp(info);
|
||||
const bool is_storage = desc.IsStorage(sharp);
|
||||
|
@ -471,7 +498,7 @@ void EmitContext::DefineBuffers() {
|
|||
if (is_storage && !desc.is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
}
|
||||
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
|
||||
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sharp_idx));
|
||||
|
||||
buffers.push_back({
|
||||
.id = id,
|
||||
|
@ -495,7 +522,7 @@ void EmitContext::DefineTextureBuffers() {
|
|||
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base));
|
||||
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sharp_idx));
|
||||
texture_buffers.push_back({
|
||||
.id = id,
|
||||
.binding = binding.buffer++,
|
||||
|
@ -582,7 +609,7 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||
}
|
||||
|
||||
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||
const auto image = ctx.info.ReadUd<AmdGpu::Image>(desc.sgpr_base, desc.dword_offset);
|
||||
const auto image = ctx.info.ReadUdSharp<AmdGpu::Image>(desc.sharp_idx);
|
||||
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
|
||||
const u32 sampled = desc.is_storage ? 2 : 1;
|
||||
switch (desc.type) {
|
||||
|
@ -618,8 +645,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
|
||||
image_desc.dword_offset));
|
||||
Name(id, fmt::format("{}_{}{}", stage, "img", image_desc.sharp_idx));
|
||||
images.push_back({
|
||||
.data_types = &data_types,
|
||||
.id = id,
|
||||
|
@ -643,8 +669,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
|
||||
Decorate(id, spv::Decoration::Binding, binding.unified++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base,
|
||||
samp_desc.dword_offset));
|
||||
Name(id, fmt::format("{}_{}{}", stage, "samp", samp_desc.sharp_idx));
|
||||
samplers.push_back(id);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
|
|
|
@ -228,6 +228,7 @@ public:
|
|||
Bindings& binding;
|
||||
boost::container::small_vector<BufferDefinition, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferDefinition, 8> texture_buffers;
|
||||
BufferDefinition srt_flatbuf;
|
||||
boost::container::small_vector<TextureDefinition, 8> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
|
||||
|
|
|
@ -10,6 +10,10 @@ static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
|||
void Translator::EmitScalarMemory(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
// SMRD
|
||||
case Opcode::S_LOAD_DWORD:
|
||||
return S_LOAD_DWORD(1, inst);
|
||||
case Opcode::S_LOAD_DWORDX2:
|
||||
return S_LOAD_DWORD(2, inst);
|
||||
case Opcode::S_LOAD_DWORDX4:
|
||||
return S_LOAD_DWORD(4, inst);
|
||||
case Opcode::S_LOAD_DWORDX8:
|
||||
|
|
|
@ -388,7 +388,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
|
||||
// Read the V# of the attribute to figure out component number and type.
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
|
||||
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
const IR::F32 comp = [&] {
|
||||
switch (buffer.GetSwizzle(i)) {
|
||||
|
@ -418,8 +418,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
if (step_rate == Info::VsInput::OverStepRate0 ||
|
||||
step_rate == Info::VsInput::OverStepRate1) {
|
||||
info.buffers.push_back({
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
||||
.used_types = IR::Type::F32,
|
||||
.is_instance_data = true,
|
||||
});
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
|
@ -10,6 +12,7 @@
|
|||
#include "shader_recompiler/backend/bindings.h"
|
||||
#include "shader_recompiler/frontend/copy_shader.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
|
@ -36,8 +39,7 @@ constexpr u32 NUM_TEXTURE_TYPES = 7;
|
|||
struct Info;
|
||||
|
||||
struct BufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 sharp_idx;
|
||||
IR::Type used_types;
|
||||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_gds_buffer{};
|
||||
|
@ -53,8 +55,7 @@ struct BufferResource {
|
|||
using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
|
||||
|
||||
struct TextureBufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 sharp_idx;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_written{};
|
||||
|
||||
|
@ -63,8 +64,7 @@ struct TextureBufferResource {
|
|||
using TextureBufferResourceList = boost::container::small_vector<TextureBufferResource, 16>;
|
||||
|
||||
struct ImageResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 sharp_idx;
|
||||
AmdGpu::ImageType type;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_storage{};
|
||||
|
@ -77,8 +77,7 @@ struct ImageResource {
|
|||
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;
|
||||
|
||||
struct SamplerResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 sharp_idx;
|
||||
AmdGpu::Sampler inline_sampler{};
|
||||
u32 associated_image : 4;
|
||||
u32 disable_aniso : 1;
|
||||
|
@ -180,6 +179,9 @@ struct Info {
|
|||
ImageResourceList images;
|
||||
SamplerResourceList samplers;
|
||||
|
||||
PersistentSrtInfo srt_info;
|
||||
std::vector<u32> flattened_ud_buf;
|
||||
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
|
||||
|
@ -199,6 +201,7 @@ struct Info {
|
|||
bool uses_fp64{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
bool has_readconst{};
|
||||
u8 mrt_mask{0u};
|
||||
|
||||
explicit Info(Stage stage_, ShaderParams params)
|
||||
|
@ -206,7 +209,12 @@ struct Info {
|
|||
user_data{params.user_data} {}
|
||||
|
||||
template <typename T>
|
||||
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
|
||||
inline T ReadUdSharp(u32 sharp_idx) const noexcept {
|
||||
return *reinterpret_cast<const T*>(&flattened_ud_buf[sharp_idx]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T ReadUdReg(u32 ptr_index, u32 dword_offset) const noexcept {
|
||||
T data;
|
||||
const u32* base = user_data.data();
|
||||
if (ptr_index != IR::NumScalarRegs) {
|
||||
|
@ -228,7 +236,8 @@ struct Info {
|
|||
}
|
||||
|
||||
void AddBindings(Backend::Bindings& bnd) const {
|
||||
const auto total_buffers = buffers.size() + texture_buffers.size();
|
||||
const auto total_buffers =
|
||||
buffers.size() + texture_buffers.size() + (has_readconst ? 1 : 0);
|
||||
bnd.buffer += total_buffers;
|
||||
bnd.unified += total_buffers + images.size() + samplers.size();
|
||||
bnd.user_data += ud_mask.NumRegs();
|
||||
|
@ -245,22 +254,32 @@ struct Info {
|
|||
}
|
||||
return {vertex_offset, instance_offset};
|
||||
}
|
||||
|
||||
void RefreshFlatBuf() {
|
||||
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
|
||||
ASSERT(user_data.size() <= NumUserDataRegs);
|
||||
std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes());
|
||||
// Run the JIT program to walk the SRT and write the leaves to a flat buffer
|
||||
if (srt_info.walker_func) {
|
||||
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
return inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
return info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
|
||||
return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
|
||||
return inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
|
||||
}
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -118,6 +118,10 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
|
|||
} else {
|
||||
ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
|
||||
}
|
||||
|
||||
if (op == Opcode::ReadConst) {
|
||||
ret += fmt::format(" (flags={}) ", inst.Flags<u32>());
|
||||
}
|
||||
const size_t arg_count{inst.NumArgs()};
|
||||
for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
|
||||
const Value arg{inst.Arg(arg_index)};
|
||||
|
|
|
@ -11,34 +11,37 @@
|
|||
|
||||
namespace Shader::IR {
|
||||
|
||||
template <typename Pred>
|
||||
auto BreadthFirstSearch(const Inst* inst, Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> {
|
||||
// Use typename Instruction so the function can be used to return either const or mutable
|
||||
// Insts depending on the context.
|
||||
template <typename Instruction, typename Pred>
|
||||
auto BreadthFirstSearch(Instruction* inst, Pred&& pred)
|
||||
-> std::invoke_result_t<Pred, Instruction*> {
|
||||
// Most often case the instruction is the desired already.
|
||||
if (const std::optional result = pred(inst)) {
|
||||
if (std::optional result = pred(inst)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Breadth-first search visiting the right most arguments first
|
||||
boost::container::small_vector<const Inst*, 2> visited;
|
||||
std::queue<const Inst*> queue;
|
||||
boost::container::small_vector<Instruction*, 2> visited;
|
||||
std::queue<Instruction*> queue;
|
||||
queue.push(inst);
|
||||
|
||||
while (!queue.empty()) {
|
||||
// Pop one instruction from the queue
|
||||
const Inst* const inst{queue.front()};
|
||||
Instruction* inst{queue.front()};
|
||||
queue.pop();
|
||||
if (const std::optional result = pred(inst)) {
|
||||
if (std::optional result = pred(inst)) {
|
||||
// This is the instruction we were looking for
|
||||
return result;
|
||||
}
|
||||
// Visit the right most arguments first
|
||||
for (size_t arg = inst->NumArgs(); arg--;) {
|
||||
const Value arg_value{inst->Arg(arg)};
|
||||
Value arg_value{inst->Arg(arg)};
|
||||
if (arg_value.IsImmediate()) {
|
||||
continue;
|
||||
}
|
||||
// Queue instruction if it hasn't been visited
|
||||
const Inst* const arg_inst{arg_value.InstRecursive()};
|
||||
Instruction* arg_inst{arg_value.InstRecursive()};
|
||||
if (std::ranges::find(visited, arg_inst) == visited.end()) {
|
||||
visited.push_back(arg_inst);
|
||||
queue.push(arg_inst);
|
||||
|
@ -59,4 +62,13 @@ auto BreadthFirstSearch(const Value& value, Pred&& pred)
|
|||
return BreadthFirstSearch(value.InstRecursive(), pred);
|
||||
}
|
||||
|
||||
template <typename Pred>
|
||||
auto BreadthFirstSearch(Value value, Pred&& pred) -> std::invoke_result_t<Pred, Inst*> {
|
||||
if (value.IsImmediate()) {
|
||||
// Nothing to do with immediates
|
||||
return std::nullopt;
|
||||
}
|
||||
return BreadthFirstSearch(value.InstRecursive(), pred);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -0,0 +1,249 @@
|
|||
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <unordered_map>
|
||||
#include <boost/container/flat_map.hpp>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/srt_gvn_table.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "src/common/arch.h"
|
||||
#include "src/common/decoder.h"
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
static Xbyak::CodeGenerator g_srt_codegen(32_MB);
|
||||
|
||||
namespace {
|
||||
|
||||
static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t codesize) {
|
||||
#ifdef ARCH_X86_64
|
||||
using namespace Common::FS;
|
||||
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("{}_{:#018x}.srtprogram.txt", info.stage, info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write, FileType::TextFile};
|
||||
|
||||
u64 address = reinterpret_cast<u64>(code);
|
||||
u64 code_end = address + codesize;
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
ZyanStatus status = ZYAN_STATUS_SUCCESS;
|
||||
while (address < code_end && ZYAN_SUCCESS(Common::Decoder::Instance()->decodeInstruction(
|
||||
instruction, operands, reinterpret_cast<void*>(address)))) {
|
||||
std::string s =
|
||||
Common::Decoder::Instance()->disassembleInst(instruction, operands, address);
|
||||
s += "\n";
|
||||
file.WriteString(s);
|
||||
address += instruction.length;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
using namespace Shader;
|
||||
|
||||
struct PassInfo {
|
||||
// map offset to inst
|
||||
using PtrUserList = boost::container::flat_map<u32, Shader::IR::Inst*>;
|
||||
|
||||
Optimization::SrtGvnTable gvn_table;
|
||||
// keys are GetUserData or ReadConst instructions that are used as pointers
|
||||
std::unordered_map<IR::Inst*, PtrUserList> pointer_uses;
|
||||
// GetUserData instructions corresponding to sgpr_base of SRT roots
|
||||
boost::container::small_flat_map<IR::ScalarReg, IR::Inst*, 1> srt_roots;
|
||||
|
||||
// pick a single inst for a given value number
|
||||
std::unordered_map<u32, IR::Inst*> vn_to_inst;
|
||||
|
||||
// Bumped during codegen to assign offsets to readconsts
|
||||
u32 dst_off_dw;
|
||||
|
||||
PtrUserList* GetUsesAsPointer(IR::Inst* inst) {
|
||||
auto it = pointer_uses.find(inst);
|
||||
if (it != pointer_uses.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Return a single instruction that this instruction is identical to, according
|
||||
// to value number
|
||||
// The "original" is arbitrary. Here it's the first instruction found for a given value number
|
||||
IR::Inst* DeduplicateInstruction(IR::Inst* inst) {
|
||||
auto it = vn_to_inst.try_emplace(gvn_table.GetValueNumber(inst), inst);
|
||||
return it.first->second;
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
namespace {
|
||||
|
||||
static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
|
||||
c.push(rdi);
|
||||
c.mov(rdi, ptr[rdi + (off_dw << 2)]);
|
||||
c.mov(r10, 0xFFFFFFFFFFFFULL);
|
||||
c.and_(rdi, r10);
|
||||
}
|
||||
|
||||
static inline void PopPtr(Xbyak::CodeGenerator& c) {
|
||||
c.pop(rdi);
|
||||
};
|
||||
|
||||
static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
|
||||
Xbyak::CodeGenerator& c) {
|
||||
PushPtr(c, off_dw);
|
||||
PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree);
|
||||
ASSERT(use_list);
|
||||
|
||||
// First copy all the src data from this tree level
|
||||
// That way, all data that was contiguous in the guest SRT is also contiguous in the
|
||||
// flattened buffer.
|
||||
// TODO src and dst are contiguous. Optimize with wider loads/stores
|
||||
// TODO if this subtree is dynamically indexed, don't compact it (keep it sparse)
|
||||
for (auto [src_off_dw, use] : *use_list) {
|
||||
c.mov(r10d, ptr[rdi + (src_off_dw << 2)]);
|
||||
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d);
|
||||
|
||||
use->SetFlags<u32>(pass_info.dst_off_dw);
|
||||
pass_info.dst_off_dw++;
|
||||
}
|
||||
|
||||
// Then visit any children used as pointers
|
||||
for (const auto [src_off_dw, use] : *use_list) {
|
||||
if (pass_info.GetUsesAsPointer(use)) {
|
||||
VisitPointer(src_off_dw, use, pass_info, c);
|
||||
}
|
||||
}
|
||||
|
||||
PopPtr(c);
|
||||
}
|
||||
|
||||
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
||||
Xbyak::CodeGenerator& c = g_srt_codegen;
|
||||
|
||||
if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
|
||||
|
||||
pass_info.dst_off_dw = NumUserDataRegs;
|
||||
|
||||
// Special case for V# step rate buffers in fetch shader
|
||||
for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
|
||||
// get pointer to V#
|
||||
c.mov(r10d, ptr[rdi + (sgpr_base << 2)]);
|
||||
|
||||
u32 src_off = dword_offset << 2;
|
||||
|
||||
for (auto j = 0; j < num_dwords; j++) {
|
||||
c.mov(r11d, ptr[r10d + src_off]);
|
||||
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
|
||||
|
||||
src_off += 4;
|
||||
++pass_info.dst_off_dw;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
|
||||
|
||||
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {
|
||||
VisitPointer(static_cast<u32>(sgpr_base), root, pass_info, c);
|
||||
}
|
||||
|
||||
c.ret();
|
||||
c.ready();
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
size_t codesize = c.getCurr() - reinterpret_cast<const u8*>(info.srt_info.walker_func);
|
||||
DumpSrtProgram(info, reinterpret_cast<const u8*>(info.srt_info.walker_func), codesize);
|
||||
}
|
||||
|
||||
info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
|
||||
}
|
||||
|
||||
}; // namespace
|
||||
|
||||
void FlattenExtendedUserdataPass(IR::Program& program) {
|
||||
Shader::Info& info = program.info;
|
||||
PassInfo pass_info;
|
||||
|
||||
// traverse at end and assign offsets to duplicate readconsts, using
|
||||
// vn_to_inst as the source
|
||||
boost::container::small_vector<IR::Inst*, 32> all_readconsts;
|
||||
|
||||
for (auto r_it = program.post_order_blocks.rbegin(); r_it != program.post_order_blocks.rend();
|
||||
r_it++) {
|
||||
IR::Block* block = *r_it;
|
||||
for (IR::Inst& inst : *block) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConst) {
|
||||
if (!inst.Arg(1).IsImmediate()) {
|
||||
LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset");
|
||||
continue;
|
||||
}
|
||||
|
||||
all_readconsts.push_back(&inst);
|
||||
if (pass_info.DeduplicateInstruction(&inst) != &inst) {
|
||||
// This is a duplicate of a readconst we've already visited
|
||||
continue;
|
||||
}
|
||||
|
||||
IR::Inst* ptr_composite = inst.Arg(0).InstRecursive();
|
||||
|
||||
const auto pred = [](IR::Inst* inst) -> std::optional<IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
|
||||
inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
auto base0 = IR::BreadthFirstSearch(ptr_composite->Arg(0), pred);
|
||||
auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred);
|
||||
ASSERT_MSG(base0 && base1 && "ReadConst not from constant memory");
|
||||
|
||||
IR::Inst* ptr_lo = base0.value();
|
||||
ptr_lo = pass_info.DeduplicateInstruction(ptr_lo);
|
||||
|
||||
auto ptr_uses_kv =
|
||||
pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{});
|
||||
PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second;
|
||||
|
||||
user_list[inst.Arg(1).U32()] = &inst;
|
||||
|
||||
if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg();
|
||||
pass_info.srt_roots[ud_reg] = ptr_lo;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GenerateSrtProgram(info, pass_info);
|
||||
|
||||
// Assign offsets to duplicate readconsts
|
||||
for (IR::Inst* readconst : all_readconsts) {
|
||||
ASSERT(pass_info.vn_to_inst.contains(pass_info.gvn_table.GetValueNumber(readconst)));
|
||||
IR::Inst* original = pass_info.DeduplicateInstruction(readconst);
|
||||
readconst->SetFlags<u32>(original->Flags<u32>());
|
||||
}
|
||||
|
||||
info.RefreshFlatBuf();
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -12,6 +12,7 @@ void SsaRewritePass(IR::BlockList& program);
|
|||
void IdentityRemovalPass(IR::BlockList& program);
|
||||
void DeadCodeEliminationPass(IR::Program& program);
|
||||
void ConstantPropagationPass(IR::BlockList& program);
|
||||
void FlattenExtendedUserdataPass(IR::Program& program);
|
||||
void ResourceTrackingPass(IR::Program& program);
|
||||
void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
|
|
|
@ -13,12 +13,7 @@
|
|||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
|
||||
struct SharpLocation {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
|
||||
auto operator<=>(const SharpLocation&) const = default;
|
||||
};
|
||||
using SharpLocation = u32;
|
||||
|
||||
bool IsBufferAtomic(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
|
@ -155,9 +150,7 @@ public:
|
|||
if (desc.is_gds_buffer && existing.is_gds_buffer) {
|
||||
return true;
|
||||
}
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset &&
|
||||
desc.inline_cbuf == existing.inline_cbuf;
|
||||
return desc.sharp_idx == existing.sharp_idx && desc.inline_cbuf == existing.inline_cbuf;
|
||||
})};
|
||||
auto& buffer = buffer_resources[index];
|
||||
buffer.used_types |= desc.used_types;
|
||||
|
@ -167,8 +160,7 @@ public:
|
|||
|
||||
u32 Add(const TextureBufferResource& desc) {
|
||||
const u32 index{Add(texture_buffer_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
return desc.sharp_idx == existing.sharp_idx;
|
||||
})};
|
||||
auto& buffer = texture_buffer_resources[index];
|
||||
buffer.is_written |= desc.is_written;
|
||||
|
@ -177,8 +169,7 @@ public:
|
|||
|
||||
u32 Add(const ImageResource& desc) {
|
||||
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
return desc.sharp_idx == existing.sharp_idx;
|
||||
})};
|
||||
auto& image = image_resources[index];
|
||||
image.is_storage |= desc.is_storage;
|
||||
|
@ -187,8 +178,7 @@ public:
|
|||
|
||||
u32 Add(const SamplerResource& desc) {
|
||||
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
return desc.sharp_idx == existing.sharp_idx;
|
||||
})};
|
||||
return index;
|
||||
}
|
||||
|
@ -259,48 +249,25 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
|
|||
return {prod2, true};
|
||||
}
|
||||
|
||||
SharpLocation TrackSharp(const IR::Inst* inst) {
|
||||
SharpLocation TrackSharp(const IR::Inst* inst, const Shader::Info& info) {
|
||||
// Search until we find a potential sharp source.
|
||||
const auto pred0 = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
|
||||
inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(inst, pred0);
|
||||
const auto result = IR::BreadthFirstSearch(inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track sharp source");
|
||||
inst = result.value();
|
||||
// If its from user data not much else to do.
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return SharpLocation{
|
||||
.sgpr_base = u32(IR::ScalarReg::Max),
|
||||
.dword_offset = u32(inst->Arg(0).ScalarReg()),
|
||||
};
|
||||
return static_cast<u32>(inst->Arg(0).ScalarReg());
|
||||
} else {
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst,
|
||||
"Sharp load not from constant memory");
|
||||
return inst->Flags<u32>();
|
||||
}
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
|
||||
|
||||
// Retrieve offset from base.
|
||||
const u32 dword_offset = inst->Arg(1).U32();
|
||||
const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive();
|
||||
|
||||
// Retrieve SGPR pair that holds sbase
|
||||
const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
|
||||
ASSERT(inst->GetOpcode() != IR::Opcode::ReadConst);
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst->Arg(0).ScalarReg();
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred1);
|
||||
const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred1);
|
||||
ASSERT_MSG(base0 && base1, "Nested resource loads not supported");
|
||||
|
||||
// Return retrieved location.
|
||||
return SharpLocation{
|
||||
.sgpr_base = u32(base0.value()),
|
||||
.dword_offset = dword_offset,
|
||||
};
|
||||
}
|
||||
|
||||
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||
|
@ -327,8 +294,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
|||
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
|
||||
// Assign a binding to this sharp.
|
||||
return descriptors.Add(BufferResource{
|
||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||
.dword_offset = 0,
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
|
||||
.inline_cbuf = cbuf,
|
||||
});
|
||||
|
@ -341,11 +307,10 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||
binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.sharp_idx = sharp,
|
||||
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
|
||||
.is_written = IsBufferStore(inst),
|
||||
});
|
||||
|
@ -404,11 +369,10 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.sharp_idx = sharp,
|
||||
.nfmt = buffer.GetNumberFmt(),
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
|
@ -456,18 +420,16 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
return descriptors.Add(SamplerResource{
|
||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||
.dword_offset = 0,
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
|
||||
});
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
return descriptors.Add(SamplerResource{
|
||||
.sgpr_base = ssharp.sgpr_base,
|
||||
.dword_offset = ssharp.dword_offset,
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
|
@ -647,9 +609,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle);
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
|
@ -658,8 +620,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
const bool is_storage = IsImageStorageInstruction(inst);
|
||||
const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sgpr_base = tsharp.sgpr_base,
|
||||
.dword_offset = tsharp.dword_offset,
|
||||
.sharp_idx = tsharp,
|
||||
.type = type,
|
||||
.nfmt = image.GetNumberFmt(),
|
||||
.is_storage = is_storage,
|
||||
|
@ -763,6 +724,7 @@ void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
|
||||
Descriptors descriptors{info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
|
|
|
@ -63,6 +63,9 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::LaneId:
|
||||
info.uses_lane_id = true;
|
||||
break;
|
||||
case IR::Opcode::ReadConst:
|
||||
info.has_readconst = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
37
src/shader_recompiler/ir/passes/srt.h
Normal file
37
src/shader_recompiler/ir/passes/srt.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/container/set.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/);
|
||||
|
||||
struct PersistentSrtInfo {
|
||||
// Special case when fetch shader uses step rates.
|
||||
struct SrtSharpReservation {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 num_dwords;
|
||||
};
|
||||
|
||||
PFN_SrtWalker walker_func{};
|
||||
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
|
||||
u32 flattened_bufsize_dw = 16; // NumUserDataRegs
|
||||
|
||||
// Special case for fetch shaders because we don't generate IR to read from step rate buffers,
|
||||
// so we won't see usage with GetUserData/ReadConst.
|
||||
// Reserve space in the flattened buffer for a sharp ahead of time
|
||||
u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
|
||||
u32 rv = flattened_bufsize_dw;
|
||||
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
|
||||
flattened_bufsize_dw += num_dwords;
|
||||
return rv;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
157
src/shader_recompiler/ir/srt_gvn_table.h
Normal file
157
src/shader_recompiler/ir/srt_gvn_table.h
Normal file
|
@ -0,0 +1,157 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <unordered_map>
|
||||
#include <boost/container/set.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/hash.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
// Does global value numbering on a subset of instructions that are used
|
||||
// for loads from shader resource tables.
|
||||
// Inspiration from spirv-opt
|
||||
|
||||
class SrtGvnTable {
|
||||
public:
|
||||
using ValueNumberTable = std::unordered_map<IR::Value, u32>;
|
||||
using ValueNum = u32;
|
||||
|
||||
SrtGvnTable() : value_numbers(), next_num(0) {}
|
||||
|
||||
u32 GetValueNumber(IR::Inst* inst) {
|
||||
return GetValueNumber(IR::Value{inst});
|
||||
}
|
||||
|
||||
u32 GetValueNumber(IR::Value v) {
|
||||
v = v.Resolve();
|
||||
if (auto it = value_numbers.find(v); it != value_numbers.end()) {
|
||||
return it->second;
|
||||
}
|
||||
if (auto inst = v.TryInstRecursive()) {
|
||||
return ComputeInstValueNumber(inst);
|
||||
}
|
||||
return NextValueNumber(v);
|
||||
}
|
||||
|
||||
private:
|
||||
u32 ComputeInstValueNumber(IR::Inst* inst) {
|
||||
ASSERT(!value_numbers.contains(
|
||||
IR::Value(inst))); // Should always be checking before calling this function
|
||||
|
||||
if (inst->MayHaveSideEffects()) {
|
||||
return NextValueNumber(IR::Value(inst));
|
||||
}
|
||||
|
||||
u32 vn;
|
||||
|
||||
switch (inst->GetOpcode()) {
|
||||
case IR::Opcode::Phi: {
|
||||
// hack to get to parity with main
|
||||
// Need to fix ssa_rewrite pass to remove certain phis
|
||||
std::optional<IR::Value> source = TryRemoveTrivialPhi(inst);
|
||||
if (!source) {
|
||||
const auto pred = [](IR::Inst* inst) -> std::optional<IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
||||
inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
source = IR::BreadthFirstSearch(inst, pred).transform([](auto inst) {
|
||||
return IR::Value{inst};
|
||||
});
|
||||
ASSERT(source);
|
||||
}
|
||||
vn = GetValueNumber(source.value());
|
||||
value_numbers[IR::Value(inst)] = vn;
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetUserData:
|
||||
case IR::Opcode::CompositeConstructU32x2:
|
||||
case IR::Opcode::ReadConst: {
|
||||
InstVector iv = MakeInstVector(inst);
|
||||
if (auto it = iv_to_vn.find(iv); it != iv_to_vn.end()) {
|
||||
vn = it->second;
|
||||
value_numbers[IR::Value(inst)] = vn;
|
||||
} else {
|
||||
vn = NextValueNumber(IR::Value(inst));
|
||||
iv_to_vn.emplace(std::move(iv), vn);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
vn = NextValueNumber(IR::Value(inst));
|
||||
break;
|
||||
}
|
||||
|
||||
return vn;
|
||||
}
|
||||
|
||||
u32 NextValueNumber(IR::Value v) {
|
||||
u32 rv = next_num++;
|
||||
value_numbers[v] = rv;
|
||||
return rv;
|
||||
}
|
||||
|
||||
ValueNumberTable value_numbers;
|
||||
u32 next_num;
|
||||
|
||||
using InstVector = boost::container::small_vector<u32, 8>;
|
||||
|
||||
InstVector MakeInstVector(IR::Inst* inst) {
|
||||
ASSERT(inst->GetOpcode() != IR::Opcode::Identity);
|
||||
InstVector iv;
|
||||
iv.reserve(2 + inst->NumArgs());
|
||||
iv.push_back(static_cast<u32>(inst->GetOpcode()));
|
||||
iv.push_back(inst->Flags<u32>());
|
||||
for (auto i = 0; i < inst->NumArgs(); i++) {
|
||||
iv.push_back(GetValueNumber(inst->Arg(i)));
|
||||
}
|
||||
return iv;
|
||||
}
|
||||
|
||||
// Temp workaround for something like this:
|
||||
// [0000555558a5baf8] %297 = Phi [ %24, {Block $1} ], [ %297, {Block $5} ] (uses: 4)
|
||||
// [0000555558a4e038] %305 = CompositeConstructU32x2 %297, %296 (uses: 4)
|
||||
// [0000555558a4e0a8] %306 = ReadConst %305, #0 (uses: 2)
|
||||
// Should probably be fixed in ssa_rewrite
|
||||
std::optional<IR::Value> TryRemoveTrivialPhi(IR::Inst* phi) {
|
||||
IR::Value single_source{};
|
||||
|
||||
for (auto i = 0; i < phi->NumArgs(); i++) {
|
||||
IR::Value v = phi->Arg(i).Resolve();
|
||||
if (v == IR::Value(phi)) {
|
||||
continue;
|
||||
}
|
||||
if (!single_source.IsEmpty() && single_source != v) {
|
||||
return std::nullopt;
|
||||
}
|
||||
single_source = v;
|
||||
}
|
||||
|
||||
ASSERT(!single_source.IsEmpty());
|
||||
phi->ReplaceUsesWith(single_source);
|
||||
return single_source;
|
||||
}
|
||||
|
||||
struct HashInstVector {
|
||||
size_t operator()(const InstVector& iv) const {
|
||||
u32 h = 0;
|
||||
for (auto vn : iv) {
|
||||
h = HashCombine(vn, h);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
std::unordered_map<InstVector, u32, HashInstVector> iv_to_vn;
|
||||
};
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -1,7 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <string>
|
||||
#include <cstddef>
|
||||
#include <string_view>
|
||||
#include "common/hash.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
@ -97,3 +99,52 @@ bool Value::operator!=(const Value& other) const {
|
|||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
||||
namespace std {
|
||||
std::size_t hash<Shader::IR::Value>::operator()(const Shader::IR::Value& v) const {
|
||||
using namespace Shader::IR;
|
||||
|
||||
u64 h = HashCombine(static_cast<u64>(v.type), 0ULL);
|
||||
|
||||
switch (v.type) {
|
||||
case Type::Void:
|
||||
return h;
|
||||
case Type::Opaque:
|
||||
return reinterpret_cast<u64>(v.InstRecursive());
|
||||
case Type::ScalarReg:
|
||||
return HashCombine(static_cast<u64>(v.sreg), h);
|
||||
case Type::VectorReg:
|
||||
return HashCombine(static_cast<u64>(v.vreg), h);
|
||||
case Type::Attribute:
|
||||
return HashCombine(static_cast<u64>(v.attribute), h);
|
||||
case Type::U1:
|
||||
return HashCombine(static_cast<u64>(v.attribute), h);
|
||||
case Type::U8:
|
||||
return HashCombine(static_cast<u64>(v.imm_u8), h);
|
||||
case Type::U16:
|
||||
case Type::F16:
|
||||
return HashCombine(static_cast<u64>(v.imm_u16), h);
|
||||
case Type::U32:
|
||||
case Type::F32:
|
||||
return HashCombine(static_cast<u64>(v.imm_u32), h);
|
||||
case Type::U64:
|
||||
case Type::F64:
|
||||
return HashCombine(static_cast<u64>(v.imm_u64), h);
|
||||
case Type::U32x2:
|
||||
case Type::U32x3:
|
||||
case Type::U32x4:
|
||||
case Type::F16x2:
|
||||
case Type::F16x3:
|
||||
case Type::F16x4:
|
||||
case Type::F32x2:
|
||||
case Type::F32x3:
|
||||
case Type::F32x4:
|
||||
case Type::F64x2:
|
||||
case Type::F64x3:
|
||||
case Type::F64x4:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid type {}", v.type);
|
||||
}
|
||||
} // namespace std
|
||||
|
|
|
@ -29,6 +29,7 @@ class Value {
|
|||
public:
|
||||
Value() noexcept = default;
|
||||
explicit Value(IR::Inst* value) noexcept;
|
||||
explicit Value(const IR::Inst* value) noexcept;
|
||||
explicit Value(IR::ScalarReg reg) noexcept;
|
||||
explicit Value(IR::VectorReg reg) noexcept;
|
||||
explicit Value(IR::Attribute value) noexcept;
|
||||
|
@ -82,6 +83,8 @@ private:
|
|||
f64 imm_f64;
|
||||
const char* string_literal;
|
||||
};
|
||||
|
||||
friend class std::hash<Value>;
|
||||
};
|
||||
static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
|
||||
static_assert(std::is_trivially_copyable_v<Value>);
|
||||
|
@ -364,3 +367,10 @@ inline const char* Value::StringLiteral() const {
|
|||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Shader::IR::Value> {
|
||||
std::size_t operator()(const Shader::IR::Value& v) const;
|
||||
};
|
||||
} // namespace std
|
|
@ -64,6 +64,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
||||
}
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
|
||||
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
||||
Shader::Optimization::ResourceTrackingPass(program);
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "common/types.h"
|
||||
#include "shader_recompiler/backend/bindings.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
|
@ -52,6 +53,9 @@ struct StageSpecialization {
|
|||
Backend::Bindings start_)
|
||||
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
|
||||
u32 binding{};
|
||||
if (info->has_readconst) {
|
||||
binding++;
|
||||
}
|
||||
ForEachSharp(binding, buffers, info->buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.stride = sharp.GetStride();
|
||||
|
@ -90,6 +94,12 @@ struct StageSpecialization {
|
|||
return false;
|
||||
}
|
||||
u32 binding{};
|
||||
if (info->has_readconst != other.info->has_readconst) {
|
||||
return false;
|
||||
}
|
||||
if (info->has_readconst) {
|
||||
binding++;
|
||||
}
|
||||
for (u32 i = 0; i < buffers.size(); i++) {
|
||||
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
|
||||
return false;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
|
@ -156,7 +157,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
continue;
|
||||
}
|
||||
|
||||
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
const auto& buffer = vs_info.ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -301,6 +302,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
|||
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
|
||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
||||
ASSERT(data.size_bytes() <= StreamThreshold);
|
||||
const u64 offset = stream_buffer.Copy(reinterpret_cast<VAddr>(data.data()), data.size_bytes(),
|
||||
instance.UniformMinAlignment());
|
||||
return {&stream_buffer, offset};
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer, BufferId buffer_id) {
|
||||
// For small uniform buffers that have not been modified by gpu
|
||||
|
|
|
@ -84,6 +84,8 @@ public:
|
|||
/// Writes a value to GPU buffer.
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
|
||||
|
||||
/// Obtains a buffer for the specified region.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer = false,
|
||||
|
|
|
@ -25,6 +25,15 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
|
||||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
|
||||
if (info->has_readconst) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
for (const auto& buffer : info->buffers) {
|
||||
const auto sharp = buffer.GetSharp(*info);
|
||||
bindings.push_back({
|
||||
|
|
|
@ -60,7 +60,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
}
|
||||
|
||||
const auto buffer =
|
||||
vs_info->ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -327,6 +327,15 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
if (!stage) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (stage->has_readconst) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = gp_stage_flags,
|
||||
});
|
||||
}
|
||||
for (const auto& buffer : stage->buffers) {
|
||||
const auto sharp = buffer.GetSharp(*stage);
|
||||
bindings.push_back({
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <ranges>
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/hash.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
|
@ -22,10 +23,6 @@ namespace Vulkan {
|
|||
|
||||
using Shader::VsOutput;
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
||||
|
||||
constexpr static std::array DescriptorHeapSizes = {
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 8192},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eStorageBuffer, 1024},
|
||||
|
@ -351,7 +348,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
continue;
|
||||
}
|
||||
const auto& buffer =
|
||||
vs_info->ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -424,7 +421,8 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
|
|||
}
|
||||
|
||||
Program* program = it_pgm->second;
|
||||
const auto& info = program->info;
|
||||
auto& info = program->info;
|
||||
info.RefreshFlatBuf();
|
||||
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
|
||||
size_t perm_idx = program->modules.size();
|
||||
vk::ShaderModule module{};
|
||||
|
|
|
@ -57,6 +57,22 @@ void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache,
|
|||
}
|
||||
}
|
||||
|
||||
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
|
||||
if (stage.has_readconst) {
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
|
||||
buffer_infos.emplace_back(vk_buffer->Handle(), offset,
|
||||
stage.flattened_ud_buf.size() * sizeof(u32));
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
|
||||
// Second pass to re-bind buffers that were updated after binding
|
||||
for (u32 i = 0; i < buffer_bindings.size(); i++) {
|
||||
const auto& [buffer_id, vsharp] = buffer_bindings[i];
|
||||
|
|
|
@ -12,6 +12,10 @@
|
|||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "vk_rasterizer.h"
|
||||
|
||||
#ifdef MemoryBarrier
|
||||
#undef MemoryBarrier
|
||||
#endif
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||
|
|
Loading…
Reference in a new issue