mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-01 04:35:59 +00:00
Image subresources barriers (#904)
* video_core: texture: image subresources state tracking * shader_recompiler: use one binding if the same image is read and written * video_core: added rebinding of changed textures after overlap resolve * don't use pointers; slight `FindTexture` refactoring * video_core: buffer_cache: don't copy over the image size * redundant barriers removed; fixes * regression fixes * texture_cache: 3d texture layers count fixup * shader_recompiler: support for partially bound cubemaps * added support for cubemap arrays * don't bind unused color buffers * fixed depth promotion to do not use stencil * doors * bonfire lit * cubemap array index calculation * final touches
This commit is contained in:
parent
4fc28b39af
commit
3939bc4f10
|
@ -579,6 +579,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||
src/video_core/renderer_vulkan/vk_master_semaphore.h
|
||||
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
|
||||
src/video_core/renderer_vulkan/vk_pipeline_cache.h
|
||||
src/video_core/renderer_vulkan/vk_pipeline_common.cpp
|
||||
src/video_core/renderer_vulkan/vk_pipeline_common.h
|
||||
src/video_core/renderer_vulkan/vk_platform.cpp
|
||||
src/video_core/renderer_vulkan/vk_platform.h
|
||||
src/video_core/renderer_vulkan/vk_rasterizer.cpp
|
||||
|
|
|
@ -157,8 +157,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
|
|||
ImageOperands operands;
|
||||
operands.AddOffset(ctx, offset);
|
||||
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||
return ctx.OpBitcast(
|
||||
ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands));
|
||||
const Id texel =
|
||||
texture.is_storage
|
||||
? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands)
|
||||
: ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands);
|
||||
return ctx.OpBitcast(ctx.F32[4], texel);
|
||||
}
|
||||
|
||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) {
|
||||
|
|
|
@ -510,7 +510,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
case AmdGpu::ImageType::Color3D:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format);
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled,
|
||||
format);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -534,6 +535,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
|
||||
.pointer_type = pointer_type,
|
||||
.image_type = image_type,
|
||||
.is_storage = image_desc.is_storage,
|
||||
});
|
||||
interfaces.push_back(id);
|
||||
++binding;
|
||||
|
|
|
@ -200,6 +200,7 @@ public:
|
|||
Id sampled_type;
|
||||
Id pointer_type;
|
||||
Id image_type;
|
||||
bool is_storage = false;
|
||||
};
|
||||
|
||||
struct BufferDefinition {
|
||||
|
@ -216,8 +217,8 @@ public:
|
|||
u32 binding;
|
||||
Id image_type;
|
||||
Id result_type;
|
||||
bool is_integer;
|
||||
bool is_storage;
|
||||
bool is_integer = false;
|
||||
bool is_storage = false;
|
||||
};
|
||||
|
||||
u32& binding;
|
||||
|
|
|
@ -1032,6 +1032,7 @@ void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) {
|
|||
|
||||
m_instruction.control.mimg = *reinterpret_cast<InstControlMIMG*>(&hexInstruction);
|
||||
m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode);
|
||||
ASSERT(m_instruction.control.mimg.r128 == 0);
|
||||
}
|
||||
|
||||
void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) {
|
||||
|
|
|
@ -71,6 +71,9 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
ir.SetAttribute(attrib, comp, swizzle(i));
|
||||
}
|
||||
}
|
||||
if (IR::IsMrt(attrib)) {
|
||||
info.mrt_mask |= 1u << u8(attrib);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -546,6 +546,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||
info.explicit_lod.Assign(explicit_lod);
|
||||
info.has_derivatives.Assign(has_derivatives);
|
||||
info.is_array.Assign(mimg.da);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
const IR::Value texel = [&]() -> IR::Value {
|
||||
|
@ -630,6 +631,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
|||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||
// info.explicit_lod.Assign(explicit_lod);
|
||||
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
||||
info.is_array.Assign(mimg.da);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
const IR::Value texel = [&]() -> IR::Value {
|
||||
|
|
|
@ -64,9 +64,10 @@ struct ImageResource {
|
|||
u32 dword_offset;
|
||||
AmdGpu::ImageType type;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_storage;
|
||||
bool is_depth;
|
||||
bool is_storage{};
|
||||
bool is_depth{};
|
||||
bool is_atomic{};
|
||||
bool is_array{};
|
||||
|
||||
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
|
@ -171,6 +172,7 @@ struct Info {
|
|||
bool uses_fp64{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
u8 mrt_mask{0u};
|
||||
|
||||
explicit Info(Stage stage_, ShaderParams params)
|
||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
|
|
|
@ -200,9 +200,10 @@ public:
|
|||
u32 Add(const ImageResource& desc) {
|
||||
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset && desc.type == existing.type &&
|
||||
desc.is_storage == existing.is_storage;
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
})};
|
||||
auto& image = image_resources[index];
|
||||
image.is_storage |= desc.is_storage;
|
||||
return index;
|
||||
}
|
||||
|
||||
|
@ -441,18 +442,29 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
const IR::Value& z, bool is_storage) {
|
||||
const IR::Value& z, bool is_storage, bool is_array) {
|
||||
// When cubemap is written with imageStore it is treated like 2DArray.
|
||||
if (is_storage) {
|
||||
return ir.CompositeConstruct(s, t, z);
|
||||
}
|
||||
|
||||
ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below
|
||||
|
||||
// We need to fix x and y coordinate,
|
||||
// because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
|
||||
// We already force the scale value to be 1.0 when handling v_cubema_f32,
|
||||
// here we subtract 1.5 to recover the original value.
|
||||
const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
|
||||
const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
|
||||
if (is_array) {
|
||||
const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z});
|
||||
const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u));
|
||||
const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u));
|
||||
return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id),
|
||||
ir.ConvertIToF(32, 32, false, slice_id));
|
||||
} else {
|
||||
return ir.CompositeConstruct(x, y, z);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
|
@ -481,14 +493,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_storage = IsImageStorageInstruction(inst);
|
||||
const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sgpr_base = tsharp.sgpr_base,
|
||||
.dword_offset = tsharp.dword_offset,
|
||||
.type = image.GetType(),
|
||||
.type = type,
|
||||
.nfmt = static_cast<AmdGpu::NumberFormat>(image.GetNumberFmt()),
|
||||
.is_storage = is_storage,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
});
|
||||
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
|
@ -545,7 +559,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
case AmdGpu::ImageType::Color3D: // x, y, z
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
case AmdGpu::ImageType::Cube: // x, y, face
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage),
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage,
|
||||
inst_info.is_array),
|
||||
body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
|
||||
|
|
|
@ -59,6 +59,7 @@ union TextureInstInfo {
|
|||
BitField<5, 1, u32> has_offset;
|
||||
BitField<6, 2, u32> gather_comp;
|
||||
BitField<8, 1, u32> has_derivatives;
|
||||
BitField<9, 1, u32> is_array;
|
||||
};
|
||||
|
||||
union BufferInstInfo {
|
||||
|
|
|
@ -62,7 +62,8 @@ struct StageSpecialization {
|
|||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.GetType();
|
||||
spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray
|
||||
: sharp.GetType();
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
}
|
||||
|
|
|
@ -238,10 +238,15 @@ struct Image {
|
|||
return pitch + 1;
|
||||
}
|
||||
|
||||
u32 NumLayers() const {
|
||||
u32 NumLayers(bool is_array) const {
|
||||
u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1;
|
||||
if (GetType() == ImageType::Cube) {
|
||||
slices *= 6;
|
||||
if (is_array) {
|
||||
slices = last_array + 1;
|
||||
ASSERT(slices % 6 == 0);
|
||||
} else {
|
||||
slices = 6;
|
||||
}
|
||||
}
|
||||
if (pow2pad) {
|
||||
slices = std::bit_ceil(slices);
|
||||
|
@ -282,6 +287,11 @@ struct Image {
|
|||
bool IsTiled() const {
|
||||
return GetTilingMode() != TilingMode::Display_Linear;
|
||||
}
|
||||
|
||||
bool IsPartialCubemap() const {
|
||||
const auto viewed_slice = last_array - base_array + 1;
|
||||
return GetType() == ImageType::Cube && viewed_slice < 6;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Image) == 32); // 256bits
|
||||
|
||||
|
|
|
@ -581,15 +581,23 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
|||
return false;
|
||||
}
|
||||
Image& image = texture_cache.GetImage(image_id);
|
||||
ASSERT_MSG(device_addr == image.info.guest_address,
|
||||
"Texel buffer aliases image subresources {:x} : {:x}", device_addr,
|
||||
image.info.guest_address);
|
||||
boost::container::small_vector<vk::BufferImageCopy, 8> copies;
|
||||
u32 offset = buffer.Offset(image.cpu_addr);
|
||||
const u32 num_layers = image.info.resources.layers;
|
||||
u32 total_size = 0;
|
||||
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
||||
const u32 width = std::max(image.info.size.width >> m, 1u);
|
||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
const u32 depth =
|
||||
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
||||
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
||||
offset += mip_ofs * num_layers;
|
||||
if (offset + (mip_size * num_layers) > buffer.SizeBytes()) {
|
||||
break;
|
||||
}
|
||||
copies.push_back({
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||
|
@ -603,11 +611,11 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
|||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
});
|
||||
offset += mip_ofs * num_layers;
|
||||
total_size += mip_size * num_layers;
|
||||
}
|
||||
if (!copies.empty()) {
|
||||
scheduler.EndRendering();
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
|
||||
copies);
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
@ -55,4 +56,13 @@ vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags support
|
|||
|
||||
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
|
||||
|
||||
static inline vk::Format PromoteFormatToDepth(vk::Format fmt) {
|
||||
if (fmt == vk::Format::eR32Sfloat) {
|
||||
return vk::Format::eD32Sfloat;
|
||||
} else if (fmt == vk::Format::eR16Unorm) {
|
||||
return vk::Format::eD16Unorm;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Vulkan::LiverpoolToVK
|
||||
|
|
|
@ -202,7 +202,8 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop
|
|||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf);
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {},
|
||||
cmdbuf);
|
||||
|
||||
const std::array pre_barrier{
|
||||
vk::ImageMemoryBarrier{
|
||||
|
@ -228,7 +229,7 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop
|
|||
|
||||
// Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
|
||||
cmdbuf.blitImage(
|
||||
image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal,
|
||||
image.image, image.last_state.layout, frame->image, vk::ImageLayout::eTransferDstOptimal,
|
||||
MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height),
|
||||
vk::Filter::eLinear);
|
||||
|
||||
|
@ -269,6 +270,9 @@ void RendererVulkan::Present(Frame* frame) {
|
|||
|
||||
auto& scheduler = present_scheduler;
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
ImGui::Core::Render(cmdbuf, frame);
|
||||
|
||||
{
|
||||
auto* profiler_ctx = instance.GetProfilerContext();
|
||||
TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame",
|
||||
|
@ -326,8 +330,6 @@ void RendererVulkan::Present(Frame* frame) {
|
|||
},
|
||||
};
|
||||
|
||||
ImGui::Core::Render(cmdbuf, frame);
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
|
@ -15,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache,
|
||||
u64 compute_key_, const Shader::Info& info_,
|
||||
vk::ShaderModule module)
|
||||
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, compute_key{compute_key_},
|
||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, compute_key{compute_key_},
|
||||
info{&info_} {
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
|
@ -108,12 +109,13 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
|
||||
Shader::PushData push_data{};
|
||||
u32 binding{};
|
||||
|
||||
image_infos.clear();
|
||||
|
||||
for (const auto& desc : info->buffers) {
|
||||
bool is_storage = true;
|
||||
if (desc.is_gds_buffer) {
|
||||
|
@ -213,35 +215,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
});
|
||||
}
|
||||
|
||||
for (const auto& image_desc : info->images) {
|
||||
const auto tsharp = image_desc.GetSharp(*info);
|
||||
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
} else {
|
||||
auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view,
|
||||
vk::ImageLayout::eGeneral);
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
BindTextures(texture_cache, *info, binding, set_writes);
|
||||
|
||||
if (texture_cache.IsMeta(tsharp.Address())) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)");
|
||||
}
|
||||
}
|
||||
for (const auto& sampler : info->samplers) {
|
||||
const auto ssharp = sampler.GetSharp(*info);
|
||||
if (ssharp.force_degamma) {
|
||||
|
|
|
@ -3,9 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class BufferCache;
|
||||
|
@ -18,27 +17,17 @@ class Instance;
|
|||
class Scheduler;
|
||||
class DescriptorHeap;
|
||||
|
||||
class ComputePipeline {
|
||||
class ComputePipeline : public Pipeline {
|
||||
public:
|
||||
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
|
||||
DescriptorHeap& desc_heap, vk::PipelineCache pipeline_cache,
|
||||
u64 compute_key, const Shader::Info& info, vk::ShaderModule module);
|
||||
ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
|
||||
vk::PipelineCache pipeline_cache, u64 compute_key, const Shader::Info& info,
|
||||
vk::ShaderModule module);
|
||||
~ComputePipeline();
|
||||
|
||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
bool BindResources(VideoCore::BufferCache& buffer_cache,
|
||||
VideoCore::TextureCache& texture_cache) const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
DescriptorHeap& desc_heap;
|
||||
vk::UniquePipeline pipeline;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
u64 compute_key;
|
||||
const Shader::Info* info;
|
||||
bool uses_push_descriptors{};
|
||||
|
|
|
@ -21,7 +21,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||
std::span<const vk::ShaderModule> modules)
|
||||
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, key{key_} {
|
||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} {
|
||||
const vk::Device device = instance.GetDevice();
|
||||
std::ranges::copy(infos, stages.begin());
|
||||
BuildDescSetLayout();
|
||||
|
@ -41,8 +41,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
};
|
||||
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||
|
||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
|
||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||
const auto& vs_info = stages[u32(Shader::Stage::Vertex)];
|
||||
for (const auto& input : vs_info->vs_inputs) {
|
||||
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
|
@ -52,13 +52,13 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
}
|
||||
|
||||
const auto buffer = vs_info->ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
attributes.push_back({
|
||||
vertex_attributes.push_back({
|
||||
.location = input.binding,
|
||||
.binding = input.binding,
|
||||
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||
.offset = 0,
|
||||
});
|
||||
bindings.push_back({
|
||||
vertex_bindings.push_back({
|
||||
.binding = input.binding,
|
||||
.stride = buffer.GetStride(),
|
||||
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None
|
||||
|
@ -68,10 +68,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
|
||||
.pVertexBindingDescriptions = bindings.data(),
|
||||
.vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
|
||||
.pVertexAttributeDescriptions = attributes.data(),
|
||||
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
|
||||
.pVertexBindingDescriptions = vertex_bindings.data(),
|
||||
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
|
||||
.pVertexAttributeDescriptions = vertex_attributes.data(),
|
||||
};
|
||||
|
||||
if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) {
|
||||
|
@ -291,8 +291,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||
|
||||
void GraphicsPipeline::BuildDescSetLayout() {
|
||||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
u32 binding{};
|
||||
|
||||
for (const auto* stage : stages) {
|
||||
if (!stage) {
|
||||
continue;
|
||||
|
@ -352,12 +353,13 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
|
||||
Shader::PushData push_data{};
|
||||
u32 binding{};
|
||||
|
||||
image_infos.clear();
|
||||
|
||||
for (const auto* stage : stages) {
|
||||
if (!stage) {
|
||||
continue;
|
||||
|
@ -444,44 +446,15 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
});
|
||||
}
|
||||
|
||||
boost::container::static_vector<AmdGpu::Image, 32> tsharps;
|
||||
for (const auto& image_desc : stage->images) {
|
||||
const auto tsharp = image_desc.GetSharp(*stage);
|
||||
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
tsharps.emplace_back(tsharp);
|
||||
VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
} else {
|
||||
auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view,
|
||||
vk::ImageLayout::eGeneral);
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
BindTextures(texture_cache, *stage, binding, set_writes);
|
||||
|
||||
if (texture_cache.IsMeta(tsharp.Address())) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)");
|
||||
}
|
||||
}
|
||||
for (const auto& sampler : stage->samplers) {
|
||||
auto ssharp = sampler.GetSharp(*stage);
|
||||
if (ssharp.force_degamma) {
|
||||
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
|
||||
}
|
||||
if (sampler.disable_aniso) {
|
||||
const auto& tsharp = tsharps[sampler.associated_image];
|
||||
const auto& tsharp = stage->images[sampler.associated_image].GetSharp(*stage);
|
||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||
ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One);
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#include "common/types.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class BufferCache;
|
||||
|
@ -33,6 +33,7 @@ struct GraphicsPipelineKey {
|
|||
Liverpool::DepthControl depth_stencil;
|
||||
u32 depth_bias_enable;
|
||||
u32 num_samples;
|
||||
u32 mrt_mask;
|
||||
Liverpool::StencilControl stencil;
|
||||
Liverpool::PrimitiveType prim_type;
|
||||
u32 enable_primitive_restart;
|
||||
|
@ -50,11 +51,10 @@ struct GraphicsPipelineKey {
|
|||
}
|
||||
};
|
||||
|
||||
class GraphicsPipeline {
|
||||
class GraphicsPipeline : public Pipeline {
|
||||
public:
|
||||
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
|
||||
DescriptorHeap& desc_heap, const GraphicsPipelineKey& key,
|
||||
vk::PipelineCache pipeline_cache,
|
||||
GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
|
||||
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
|
||||
std::span<const Shader::Info*, MaxShaderStages> stages,
|
||||
std::span<const vk::ShaderModule> modules);
|
||||
~GraphicsPipeline();
|
||||
|
@ -62,14 +62,6 @@ public:
|
|||
void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache,
|
||||
VideoCore::TextureCache& texture_cache) const;
|
||||
|
||||
vk::Pipeline Handle() const noexcept {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
vk::PipelineLayout GetLayout() const {
|
||||
return *pipeline_layout;
|
||||
}
|
||||
|
||||
const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
|
||||
return *stages[u32(stage)];
|
||||
}
|
||||
|
@ -83,6 +75,10 @@ public:
|
|||
return key.write_masks;
|
||||
}
|
||||
|
||||
auto GetMrtMask() const {
|
||||
return key.mrt_mask;
|
||||
}
|
||||
|
||||
bool IsDepthEnabled() const {
|
||||
return key.depth_stencil.depth_enable.Value();
|
||||
}
|
||||
|
@ -91,12 +87,6 @@ private:
|
|||
void BuildDescSetLayout();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
DescriptorHeap& desc_heap;
|
||||
vk::UniquePipeline pipeline;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
std::array<const Shader::Info*, MaxShaderStages> stages{};
|
||||
GraphicsPipelineKey key;
|
||||
bool uses_push_descriptors{};
|
||||
|
|
|
@ -282,6 +282,7 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceFeatures2{
|
||||
.features{
|
||||
.robustBufferAccess = features.robustBufferAccess,
|
||||
.imageCubeArray = features.imageCubeArray,
|
||||
.independentBlend = features.independentBlend,
|
||||
.geometryShader = features.geometryShader,
|
||||
.logicOp = features.logicOp,
|
||||
|
|
|
@ -234,18 +234,20 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.front_face = regs.polygon_control.front_face;
|
||||
key.num_samples = regs.aa_config.NumSamples();
|
||||
|
||||
const auto skip_cb_binding =
|
||||
const bool skip_cb_binding =
|
||||
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||
|
||||
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
|
||||
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary order.
|
||||
// We need to do some arrays compaction at this stage
|
||||
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
|
||||
// order. We need to do some arrays compaction at this stage
|
||||
key.color_formats.fill(vk::Format::eUndefined);
|
||||
key.blend_controls.fill({});
|
||||
key.write_masks.fill({});
|
||||
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
|
||||
int remapped_cb{};
|
||||
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
|
||||
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
|
||||
// recompiler.
|
||||
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
auto const& col_buf = regs.color_buffers[cb];
|
||||
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) {
|
||||
continue;
|
||||
|
@ -258,11 +260,6 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
if (base_format == key.color_formats[remapped_cb]) {
|
||||
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
|
||||
}
|
||||
key.blend_controls[remapped_cb] = regs.blend_control[cb];
|
||||
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
||||
!col_buf.info.blend_bypass);
|
||||
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
|
||||
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
|
||||
|
||||
++remapped_cb;
|
||||
}
|
||||
|
@ -309,6 +306,28 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
|
||||
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding);
|
||||
}
|
||||
|
||||
const auto* fs_info = infos[u32(Shader::Stage::Fragment)];
|
||||
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
||||
|
||||
// Second pass to fill remain CB pipeline key data
|
||||
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
auto const& col_buf = regs.color_buffers[cb];
|
||||
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb) ||
|
||||
(key.mrt_mask & (1u << cb)) == 0) {
|
||||
key.color_formats[cb] = vk::Format::eUndefined;
|
||||
key.mrt_swizzles[cb] = Liverpool::ColorBuffer::SwapMode::Standard;
|
||||
continue;
|
||||
}
|
||||
|
||||
key.blend_controls[remapped_cb] = regs.blend_control[cb];
|
||||
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
||||
!col_buf.info.blend_bypass);
|
||||
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
|
||||
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
|
||||
|
||||
++remapped_cb;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
80
src/video_core/renderer_vulkan/vk_pipeline_common.cpp
Normal file
80
src/video_core/renderer_vulkan/vk_pipeline_common.cpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 32> Pipeline::image_infos;
|
||||
|
||||
Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_,
|
||||
vk::PipelineCache pipeline_cache)
|
||||
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_} {}
|
||||
|
||||
Pipeline::~Pipeline() = default;
|
||||
|
||||
void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
|
||||
u32& binding, DescriptorWrites& set_writes) const {
|
||||
|
||||
using ImageBindingInfo = std::tuple<VideoCore::ImageId, AmdGpu::Image, Shader::ImageResource>;
|
||||
boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
|
||||
|
||||
for (const auto& image_desc : stage.images) {
|
||||
const auto tsharp = image_desc.GetSharp(stage);
|
||||
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
VideoCore::ImageInfo image_info{tsharp, image_desc};
|
||||
const auto image_id = texture_cache.FindImage(image_info);
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
image.flags |= VideoCore::ImageFlagBits::Bound;
|
||||
image_bindings.emplace_back(image_id, tsharp, image_desc);
|
||||
} else {
|
||||
image_bindings.emplace_back(VideoCore::ImageId{}, tsharp, image_desc);
|
||||
}
|
||||
|
||||
if (texture_cache.IsMeta(tsharp.Address())) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)");
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass to re-bind images that were updated after binding
|
||||
for (auto [image_id, tsharp, desc] : image_bindings) {
|
||||
if (!image_id) {
|
||||
if (instance.IsNullDescriptorSupported()) {
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
} else {
|
||||
auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view,
|
||||
vk::ImageLayout::eGeneral);
|
||||
}
|
||||
} else {
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
if (True(image.flags & VideoCore::ImageFlagBits::NeedsRebind)) {
|
||||
image_id = texture_cache.FindImage(image.info);
|
||||
}
|
||||
VideoCore::ImageViewInfo view_info{tsharp, desc};
|
||||
auto& image_view = texture_cache.FindTexture(image_id, view_info);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
|
||||
texture_cache.GetImage(image_id).last_state.layout);
|
||||
image.flags &=
|
||||
~(VideoCore::ImageFlagBits::NeedsRebind | VideoCore::ImageFlagBits::Bound);
|
||||
}
|
||||
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage
|
||||
: vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
48
src/video_core/renderer_vulkan/vk_pipeline_common.h
Normal file
48
src/video_core/renderer_vulkan/vk_pipeline_common.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class BufferCache;
|
||||
class TextureCache;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
class DescriptorHeap;
|
||||
|
||||
class Pipeline {
|
||||
public:
|
||||
Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
|
||||
vk::PipelineCache pipeline_cache);
|
||||
virtual ~Pipeline();
|
||||
|
||||
vk::Pipeline Handle() const noexcept {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
vk::PipelineLayout GetLayout() const noexcept {
|
||||
return *pipeline_layout;
|
||||
}
|
||||
|
||||
using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>;
|
||||
void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
|
||||
u32& binding, DescriptorWrites& set_writes) const;
|
||||
|
||||
protected:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
DescriptorHeap& desc_heap;
|
||||
vk::UniquePipeline pipeline;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
static boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -44,7 +44,6 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(
|
|||
case 0xc81ad50e:
|
||||
case 0xb7c39078:
|
||||
case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE
|
||||
case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error)
|
||||
return VK_FALSE;
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -62,7 +62,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
buffer_cache.BindVertexBuffers(vs_info);
|
||||
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
|
||||
|
||||
BeginRendering();
|
||||
BeginRendering(*pipeline);
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets();
|
||||
|
@ -102,7 +102,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si
|
|||
buffer_cache.BindVertexBuffers(vs_info);
|
||||
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, 0);
|
||||
|
||||
BeginRendering();
|
||||
BeginRendering(*pipeline);
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true);
|
||||
|
@ -179,7 +179,7 @@ void Rasterizer::Finish() {
|
|||
scheduler.Finish();
|
||||
}
|
||||
|
||||
void Rasterizer::BeginRendering() {
|
||||
void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) {
|
||||
const auto& regs = liverpool->regs;
|
||||
RenderState state;
|
||||
|
||||
|
@ -199,6 +199,13 @@ void Rasterizer::BeginRendering() {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Skip stale color buffers if shader doesn't output to them. Otherwise it will perform
|
||||
// an unnecessary transition and may result in state conflict if the resource is already
|
||||
// bound for reading.
|
||||
if ((pipeline.GetMrtMask() & (1 << col_buf_id)) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
||||
VideoCore::ImageInfo image_info{col_buf, hint};
|
||||
VideoCore::ImageViewInfo view_info{col_buf, false /*!!image.info.usage.vo_buffer*/};
|
||||
|
@ -240,7 +247,7 @@ void Rasterizer::BeginRendering() {
|
|||
state.depth_image = image.image;
|
||||
state.depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = image.layout,
|
||||
.imageLayout = image.last_state.layout,
|
||||
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
|
||||
|
|
|
@ -52,7 +52,7 @@ public:
|
|||
void Finish();
|
||||
|
||||
private:
|
||||
void BeginRendering();
|
||||
void BeginRendering(const GraphicsPipeline& pipeline);
|
||||
|
||||
void UpdateDynamicState(const GraphicsPipeline& pipeline);
|
||||
void UpdateViewportScissorState();
|
||||
|
|
|
@ -59,58 +59,6 @@ void Scheduler::EndRendering() {
|
|||
}
|
||||
is_rendering = false;
|
||||
current_cmdbuf.endRendering();
|
||||
|
||||
boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers;
|
||||
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
|
||||
barriers.push_back(vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = render_state.color_images[i],
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
}
|
||||
if (render_state.has_depth || render_state.has_stencil) {
|
||||
barriers.push_back(vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = render_state.depth_attachment.imageLayout,
|
||||
.newLayout = render_state.depth_attachment.imageLayout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = render_state.depth_image,
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eDepth |
|
||||
(render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil
|
||||
: vk::ImageAspectFlagBits::eNone),
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if (!barriers.empty()) {
|
||||
const auto src_stages =
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput |
|
||||
(render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests |
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests
|
||||
: vk::PipelineStageFlagBits::eNone);
|
||||
current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::Flush(SubmitInfo& info) {
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#define VULKAN_HPP_NO_EXCEPTIONS
|
||||
#include <ranges>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
@ -124,7 +125,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
// the texture cache should re-create the resource with the usage requested
|
||||
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
|
||||
vk::ImageCreateFlagBits::eExtendedUsage};
|
||||
if (info.props.is_cube) {
|
||||
if (info.props.is_cube || (info.type == vk::ImageType::e2D && info.resources.layers >= 6)) {
|
||||
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
|
||||
} else if (info.props.is_volume) {
|
||||
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
||||
|
@ -179,16 +180,83 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
info.guest_size_bytes);
|
||||
}
|
||||
|
||||
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
|
||||
vk::CommandBuffer cmdbuf) {
|
||||
if (dst_layout == layout && dst_mask == access_mask) {
|
||||
return;
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
|
||||
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range) {
|
||||
const bool needs_partial_transition =
|
||||
subres_range &&
|
||||
(subres_range->base != SubresourceBase{} || subres_range->extent != info.resources);
|
||||
const bool partially_transited = !subresource_states.empty();
|
||||
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> barriers{};
|
||||
if (needs_partial_transition || partially_transited) {
|
||||
if (!partially_transited) {
|
||||
subresource_states.resize(info.resources.levels * info.resources.layers);
|
||||
std::fill(subresource_states.begin(), subresource_states.end(), last_state);
|
||||
}
|
||||
|
||||
const vk::ImageMemoryBarrier barrier = {
|
||||
.srcAccessMask = access_mask,
|
||||
// In case of partial transition, we need to change the specified subresources only.
|
||||
// Otherwise all subresources need to be set to the same state so we can use a full
|
||||
// resource transition for the next time.
|
||||
const auto mips =
|
||||
needs_partial_transition
|
||||
? std::ranges::views::iota(subres_range->base.level,
|
||||
subres_range->base.level + subres_range->extent.levels)
|
||||
: std::views::iota(0u, info.resources.levels);
|
||||
const auto layers =
|
||||
needs_partial_transition
|
||||
? std::ranges::views::iota(subres_range->base.layer,
|
||||
subres_range->base.layer + subres_range->extent.layers)
|
||||
: std::views::iota(0u, info.resources.layers);
|
||||
|
||||
for (u32 mip : mips) {
|
||||
for (u32 layer : layers) {
|
||||
// NOTE: these loops may produce a lot of small barriers.
|
||||
// If this becomes a problem, we can optimize it by merging adjacent barriers.
|
||||
const auto subres_idx = mip * info.resources.layers + layer;
|
||||
ASSERT(subres_idx < subresource_states.size());
|
||||
auto& state = subresource_states[subres_idx];
|
||||
|
||||
if (state.layout != dst_layout || state.access_mask != dst_mask) {
|
||||
barriers.emplace_back(vk::ImageMemoryBarrier2{
|
||||
.srcStageMask = state.pl_stage,
|
||||
.srcAccessMask = state.access_mask,
|
||||
.dstStageMask = dst_stage,
|
||||
.dstAccessMask = dst_mask,
|
||||
.oldLayout = layout,
|
||||
.oldLayout = state.layout,
|
||||
.newLayout = dst_layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = mip,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = layer,
|
||||
.layerCount = 1,
|
||||
},
|
||||
});
|
||||
state.layout = dst_layout;
|
||||
state.access_mask = dst_mask;
|
||||
state.pl_stage = dst_stage;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!needs_partial_transition) {
|
||||
subresource_states.clear();
|
||||
}
|
||||
} else { // Full resource transition
|
||||
if (last_state.layout == dst_layout && last_state.access_mask == dst_mask) {
|
||||
return {};
|
||||
}
|
||||
|
||||
barriers.emplace_back(vk::ImageMemoryBarrier2{
|
||||
.srcStageMask = last_state.pl_stage,
|
||||
.srcAccessMask = last_state.access_mask,
|
||||
.dstStageMask = dst_stage,
|
||||
.dstAccessMask = dst_mask,
|
||||
.oldLayout = last_state.layout,
|
||||
.newLayout = dst_layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
|
@ -200,31 +268,44 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
|
|||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
last_state.layout = dst_layout;
|
||||
last_state.access_mask = dst_mask;
|
||||
last_state.pl_stage = dst_stage;
|
||||
|
||||
return barriers;
|
||||
}
|
||||
|
||||
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf /*= {}*/) {
|
||||
// Adjust pipieline stage
|
||||
const vk::PipelineStageFlags dst_pl_stage =
|
||||
(dst_mask == vk::AccessFlagBits::eTransferRead ||
|
||||
dst_mask == vk::AccessFlagBits::eTransferWrite)
|
||||
? vk::PipelineStageFlagBits::eTransfer
|
||||
: vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader;
|
||||
const vk::PipelineStageFlags2 dst_pl_stage =
|
||||
(dst_mask == vk::AccessFlagBits2::eTransferRead ||
|
||||
dst_mask == vk::AccessFlagBits2::eTransferWrite)
|
||||
? vk::PipelineStageFlagBits2::eTransfer
|
||||
: vk::PipelineStageFlagBits2::eAllGraphics | vk::PipelineStageFlagBits2::eComputeShader;
|
||||
|
||||
const auto barriers = GetBarriers(dst_layout, dst_mask, dst_pl_stage, range);
|
||||
if (barriers.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cmdbuf) {
|
||||
// When using external cmdbuf you are responsible for ending rp.
|
||||
scheduler->EndRendering();
|
||||
cmdbuf = scheduler->CommandBuffer();
|
||||
}
|
||||
cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {},
|
||||
barrier);
|
||||
|
||||
layout = dst_layout;
|
||||
access_mask = dst_mask;
|
||||
pl_stage = dst_pl_stage;
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
|
||||
.pImageMemoryBarriers = barriers.data(),
|
||||
});
|
||||
}
|
||||
|
||||
void Image::Upload(vk::Buffer buffer, u64 offset) {
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
// Copy to the image.
|
||||
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil
|
||||
|
@ -248,12 +329,12 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
|
|||
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
}
|
||||
|
||||
void Image::CopyImage(const Image& image) {
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
auto cmdbuf = scheduler->CommandBuffer();
|
||||
|
||||
|
@ -279,15 +360,16 @@ void Image::CopyImage(const Image& image) {
|
|||
.extent = {mip_w, mip_h, mip_d},
|
||||
});
|
||||
}
|
||||
cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy);
|
||||
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout,
|
||||
image_copy);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
}
|
||||
|
||||
void Image::CopyMip(const Image& image, u32 mip) {
|
||||
scheduler->EndRendering();
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
auto cmdbuf = scheduler->CommandBuffer();
|
||||
|
||||
|
@ -313,10 +395,11 @@ void Image::CopyMip(const Image& image, u32 mip) {
|
|||
},
|
||||
.extent = {mip_w, mip_h, mip_d},
|
||||
};
|
||||
cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy);
|
||||
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout,
|
||||
image_copy);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
}
|
||||
|
||||
Image::~Image() = default;
|
||||
|
|
|
@ -32,6 +32,8 @@ enum ImageFlagBits : u32 {
|
|||
Registered = 1 << 6, ///< True when the image is registered
|
||||
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
||||
MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered
|
||||
Bound = 1 << 9, ///< True when the image is bound to a descriptor set
|
||||
NeedsRebind = 1 << 10, ///< True when the image needs to be rebound
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||
|
||||
|
@ -91,8 +93,11 @@ struct Image {
|
|||
return image_view_ids[std::distance(image_view_infos.begin(), it)];
|
||||
}
|
||||
|
||||
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
|
||||
vk::CommandBuffer cmdbuf = {});
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
|
||||
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);
|
||||
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
|
||||
void Upload(vk::Buffer buffer, u64 offset);
|
||||
|
||||
void CopyImage(const Image& image);
|
||||
|
@ -111,10 +116,14 @@ struct Image {
|
|||
|
||||
// Resource state tracking
|
||||
vk::ImageUsageFlags usage;
|
||||
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
|
||||
vk::Flags<vk::AccessFlagBits> access_mask = vk::AccessFlagBits::eNone;
|
||||
struct State {
|
||||
vk::Flags<vk::PipelineStageFlagBits2> pl_stage = vk::PipelineStageFlagBits2::eAllCommands;
|
||||
vk::Flags<vk::AccessFlagBits2> access_mask = vk::AccessFlagBits2::eNone;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
boost::container::small_vector<u64, 14> mip_hashes;
|
||||
};
|
||||
State last_state{};
|
||||
std::vector<State> subresource_states{};
|
||||
boost::container::small_vector<u64, 14> mip_hashes{};
|
||||
u64 tick_accessed_last{0};
|
||||
};
|
||||
|
||||
|
|
|
@ -200,18 +200,12 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
|
|||
mips_layout.emplace_back(depth_slice_sz, pitch, 0);
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) noexcept {
|
||||
ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept {
|
||||
tiling_mode = image.GetTilingMode();
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||
// Override format if image is forced to be a depth target
|
||||
if (force_depth) {
|
||||
if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) {
|
||||
pixel_format = vk::Format::eD32SfloatS8Uint;
|
||||
} else if (pixel_format == vk::Format::eR16Unorm) {
|
||||
pixel_format = vk::Format::eD16UnormS8Uint;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
if (desc.is_depth) {
|
||||
pixel_format = LiverpoolToVK::PromoteFormatToDepth(pixel_format);
|
||||
}
|
||||
type = ConvertImageType(image.GetType());
|
||||
props.is_tiled = image.IsTiled();
|
||||
|
@ -224,7 +218,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n
|
|||
size.depth = props.is_volume ? image.depth + 1 : 1;
|
||||
pitch = image.Pitch();
|
||||
resources.levels = image.NumLevels();
|
||||
resources.layers = image.NumLayers();
|
||||
resources.layers = image.NumLayers(desc.is_array);
|
||||
num_bits = NumBits(image.GetDataFmt());
|
||||
usage.texture = true;
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "common/types.h"
|
||||
#include "core/libraries/videoout/buffer.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
|
@ -19,7 +20,7 @@ struct ImageInfo {
|
|||
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
||||
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
||||
ImageInfo(const AmdGpu::Image& image, bool force_depth = false) noexcept;
|
||||
ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
|
||||
|
||||
bool IsTiled() const {
|
||||
return tiling_mode != AmdGpu::TilingMode::Display_Linear;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
@ -66,19 +67,40 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
|
|||
return format;
|
||||
}
|
||||
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept
|
||||
: is_storage{is_storage_} {
|
||||
type = ConvertImageViewType(image.GetType());
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept
|
||||
: is_storage{desc.is_storage} {
|
||||
const auto dfmt = image.GetDataFmt();
|
||||
auto nfmt = image.GetNumberFmt();
|
||||
if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) {
|
||||
nfmt = AmdGpu::NumberFormat::Unorm;
|
||||
}
|
||||
format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt);
|
||||
if (desc.is_depth) {
|
||||
format = Vulkan::LiverpoolToVK::PromoteFormatToDepth(format);
|
||||
}
|
||||
range.base.level = image.base_level;
|
||||
range.base.layer = image.base_array;
|
||||
range.extent.levels = image.last_level + 1;
|
||||
range.extent.layers = image.last_array + 1;
|
||||
range.extent.levels = image.last_level - image.base_level + 1;
|
||||
range.extent.layers = image.last_array - image.base_array + 1;
|
||||
type = ConvertImageViewType(image.GetType());
|
||||
|
||||
// Adjust view type for partial cubemaps and arrays
|
||||
if (image.IsPartialCubemap()) {
|
||||
type = vk::ImageViewType::e2DArray;
|
||||
}
|
||||
if (type == vk::ImageViewType::eCube) {
|
||||
if (desc.is_array) {
|
||||
type = vk::ImageViewType::eCubeArray;
|
||||
} else {
|
||||
// Some games try to bind an array of cubemaps while shader reads only single one.
|
||||
range.extent.layers = std::min(range.extent.layers, 6u);
|
||||
}
|
||||
}
|
||||
if (type == vk::ImageViewType::e3D && range.extent.layers > 1) {
|
||||
// Some games pass incorrect layer count for 3D textures so we need to fixup it.
|
||||
range.extent.layers = 1;
|
||||
}
|
||||
|
||||
if (!is_storage) {
|
||||
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
|
||||
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
|
||||
|
@ -103,7 +125,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
|
|||
const auto base_format =
|
||||
Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat());
|
||||
range.base.layer = col_buffer.view.slice_start;
|
||||
range.extent.layers = col_buffer.NumSlices();
|
||||
range.extent.layers = col_buffer.NumSlices() - range.base.layer;
|
||||
format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(
|
||||
base_format, col_buffer.info.comp_swap.Value(), is_vo_surface);
|
||||
}
|
||||
|
@ -115,7 +137,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
|
|||
depth_buffer.stencil_info.format);
|
||||
is_storage = ctl.depth_write_enable;
|
||||
range.base.layer = view.slice_start;
|
||||
range.extent.layers = view.NumSlices();
|
||||
range.extent.layers = view.NumSlices() - range.base.layer;
|
||||
}
|
||||
|
||||
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
|
||||
|
@ -147,9 +169,9 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
|||
.subresourceRange{
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = info.range.base.level,
|
||||
.levelCount = info.range.extent.levels - info.range.base.level,
|
||||
.levelCount = info.range.extent.levels,
|
||||
.baseArrayLayer = info.range.base.layer,
|
||||
.layerCount = info.range.extent.layers - info.range.base.layer,
|
||||
.layerCount = info.range.extent.layers,
|
||||
},
|
||||
};
|
||||
image_view = instance.GetDevice().createImageViewUnique(image_view_ci);
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
@ -17,7 +18,7 @@ namespace VideoCore {
|
|||
|
||||
struct ImageViewInfo {
|
||||
ImageViewInfo() = default;
|
||||
ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept;
|
||||
ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
|
||||
ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept;
|
||||
ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
|
||||
AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl);
|
||||
|
|
|
@ -87,8 +87,7 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Image
|
|||
auto new_image_id = slot_images.insert(instance, scheduler, requested_info);
|
||||
RegisterImage(new_image_id);
|
||||
|
||||
// auto& new_image = slot_images[new_image_id];
|
||||
// TODO: need to run a helper for depth copy here
|
||||
// TODO: perform a depth copy here
|
||||
|
||||
FreeImage(cache_image_id);
|
||||
return new_image_id;
|
||||
|
@ -98,7 +97,11 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Image
|
|||
!requested_info.usage.depth_target &&
|
||||
(requested_info.usage.texture || requested_info.usage.storage);
|
||||
if (cache_info.usage.depth_target && should_bind_as_texture) {
|
||||
if (cache_info.resources == requested_info.resources) {
|
||||
return cache_image_id;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -154,7 +157,7 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_
|
|||
|
||||
if (tex_cache_image.info.IsMipOf(image_info)) {
|
||||
tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal,
|
||||
vk::AccessFlagBits::eTransferRead);
|
||||
vk::AccessFlagBits2::eTransferRead, {});
|
||||
|
||||
const auto num_mips_to_copy = tex_cache_image.info.resources.levels;
|
||||
ASSERT(num_mips_to_copy == 1);
|
||||
|
@ -176,9 +179,13 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
|
|||
auto& src_image = slot_images[image_id];
|
||||
auto& new_image = slot_images[new_image_id];
|
||||
|
||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
|
||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
new_image.CopyImage(src_image);
|
||||
|
||||
if (True(src_image.flags & ImageFlagBits::Bound)) {
|
||||
src_image.flags |= ImageFlagBits::NeedsRebind;
|
||||
}
|
||||
|
||||
FreeImage(image_id);
|
||||
|
||||
TrackImage(new_image_id);
|
||||
|
@ -255,21 +262,21 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
|||
return slot_image_views[view_id];
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
||||
const ImageId image_id = FindImage(info);
|
||||
ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) {
|
||||
Image& image = slot_images[image_id];
|
||||
UpdateImage(image_id);
|
||||
auto& usage = image.info.usage;
|
||||
|
||||
if (view_info.is_storage) {
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
|
||||
view_info.range);
|
||||
usage.storage = true;
|
||||
} else {
|
||||
const auto new_layout = image.info.IsDepthStencil()
|
||||
? vk::ImageLayout::eDepthStencilReadOnlyOptimal
|
||||
: vk::ImageLayout::eShaderReadOnlyOptimal;
|
||||
image.Transit(new_layout, vk::AccessFlagBits::eShaderRead);
|
||||
image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, view_info.range);
|
||||
usage.texture = true;
|
||||
}
|
||||
|
||||
|
@ -284,8 +291,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
|
|||
UpdateImage(image_id);
|
||||
|
||||
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::AccessFlagBits::eColorAttachmentWrite |
|
||||
vk::AccessFlagBits::eColorAttachmentRead);
|
||||
vk::AccessFlagBits2::eColorAttachmentWrite |
|
||||
vk::AccessFlagBits2::eColorAttachmentRead,
|
||||
view_info.range);
|
||||
|
||||
// Register meta data for this color buffer
|
||||
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
|
||||
|
@ -330,8 +338,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
|
|||
: vk::ImageLayout::eDepthAttachmentOptimal
|
||||
: has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal
|
||||
: vk::ImageLayout::eDepthReadOnlyOptimal;
|
||||
image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead);
|
||||
image.Transit(new_layout,
|
||||
vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits2::eDepthStencilAttachmentRead,
|
||||
view_info.range);
|
||||
|
||||
// Register meta data for this depth buffer
|
||||
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
|
||||
|
@ -404,7 +414,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
|||
sched_ptr->EndRendering();
|
||||
|
||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {},
|
||||
cmdbuf);
|
||||
|
||||
const VAddr image_addr = image.info.guest_address;
|
||||
const size_t image_size = image.info.guest_size_bytes;
|
||||
|
|
|
@ -59,9 +59,8 @@ public:
|
|||
/// Retrieves the image handle of the image with the provided attributes.
|
||||
[[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {});
|
||||
|
||||
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,
|
||||
const ImageViewInfo& view_info);
|
||||
/// Retrieves an image view with the properties of the specified image id.
|
||||
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info);
|
||||
|
||||
/// Retrieves the render target with specified properties
|
||||
[[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info,
|
||||
|
|
Loading…
Reference in a new issue