texture_cache: Minor barrier cleanup

* Batch image and buffer barriers in a single command
This commit is contained in:
IndecisiveTurtle 2024-12-24 22:43:08 +02:00
parent a56b092854
commit 33b481fdf5
5 changed files with 158 additions and 77 deletions

View file

@ -510,21 +510,48 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
}; };
scheduler.EndRendering(); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
static constexpr vk::MemoryBarrier READ_BARRIER{ const std::array pre_barriers = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite, vk::BufferMemoryBarrier2{
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
.buffer = overlap.Handle(),
.offset = 0,
.size = overlap.SizeBytes(),
},
}; };
static constexpr vk::MemoryBarrier WRITE_BARRIER{ const std::array post_barriers = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, vk::BufferMemoryBarrier2{
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.buffer = overlap.Handle(),
.offset = 0,
.size = overlap.SizeBytes(),
},
vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = new_buffer.Handle(),
.offset = dst_base_offset,
.size = overlap.SizeBytes(),
},
}; };
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, cmdbuf.pipelineBarrier2(vk::DependencyInfo{
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
READ_BARRIER, {}, {}); .bufferMemoryBarrierCount = 1,
cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); .pBufferMemoryBarriers = pre_barriers.data(),
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, });
vk::PipelineStageFlagBits::eAllCommands, cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
.pBufferMemoryBarriers = post_barriers.data(),
});
DeleteBuffer(overlap_id); DeleteBuffer(overlap_id);
} }
@ -628,21 +655,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
} }
scheduler.EndRendering(); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
static constexpr vk::MemoryBarrier READ_BARRIER{ const vk::BufferMemoryBarrier2 pre_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, .srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = 0,
.size = buffer.SizeBytes(),
}; };
static constexpr vk::MemoryBarrier WRITE_BARRIER{ const vk::BufferMemoryBarrier2 post_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, .srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer.Handle(),
.offset = 0,
.size = buffer.SizeBytes(),
}; };
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, cmdbuf.pipelineBarrier2(vk::DependencyInfo{
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
READ_BARRIER, {}, {}); .bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, cmdbuf.pipelineBarrier2(vk::DependencyInfo{
vk::PipelineStageFlagBits::eAllCommands, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); .bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
} }
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
@ -692,10 +733,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
} }
if (!copies.empty()) { if (!copies.empty()) {
scheduler.EndRendering(); scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = max_offset - size,
.size = size,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = buffer.Handle(),
.offset = max_offset - size,
.size = size,
};
auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer, {});
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
.pImageMemoryBarriers = barriers.data(),
});
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(),
copies); copies);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
} }
return true; return true;
} }

View file

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
// Include the vulkan platform specific header // Include the vulkan platform specific header
#if defined(ANDROID) #if defined(ANDROID)
#define VK_USE_PLATFORM_ANDROID_KHR #define VK_USE_PLATFORM_ANDROID_KHR

View file

@ -542,52 +542,60 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
sched_ptr->EndRendering(); sched_ptr->EndRendering();
const auto cmdbuf = sched_ptr->CommandBuffer(); const auto cmdbuf = sched_ptr->CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {},
cmdbuf);
const VAddr image_addr = image.info.guest_address; const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size_bytes; const size_t image_size = image.info.guest_size_bytes;
const auto [vk_buffer, buf_offset] = const auto [vk_buffer, buf_offset] =
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty); buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
// hazard // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW hazard
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer)) { vk::PipelineStageFlagBits2::eTransfer)) {
const auto dependencies = vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1, .bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &barrier.value(), .pBufferMemoryBarriers = &barrier.value(),
}; });
cmdbuf.pipelineBarrier2(dependencies);
} }
const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image); const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info);
for (auto& copy : image_copy) { for (auto& copy : image_copy) {
copy.bufferOffset += offset; copy.bufferOffset += offset;
} }
const vk::BufferMemoryBarrier pre_barrier{ const vk::BufferMemoryBarrier2 pre_barrier{
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits::eTransferRead, .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
.buffer = buffer, .buffer = buffer,
.offset = offset, .offset = offset,
.size = image_size, .size = image_size,
}; };
const vk::BufferMemoryBarrier post_barrier{ const vk::BufferMemoryBarrier2 post_barrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, .srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer, .buffer = buffer,
.offset = offset, .offset = offset,
.size = image_size, .size = image_size,
}; };
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, const auto image_barriers =
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
{}, pre_barrier, {}); vk::PipelineStageFlagBits2::eTransfer, {});
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
.pImageMemoryBarriers = image_barriers.data(),
});
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, cmdbuf.pipelineBarrier2(vk::DependencyInfo{
vk::PipelineStageFlagBits::eAllCommands, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1,
{}, post_barrier, {}); .pBufferMemoryBarriers = &post_barrier,
});
image.flags &= ~ImageFlagBits::Dirty; image.flags &= ~ImageFlagBits::Dirty;
} }

View file

@ -4,6 +4,7 @@
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/tile_manager.h" #include "video_core/texture_cache/tile_manager.h"
@ -82,10 +83,10 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) {
return format; return format;
} }
const DetilerContext* TileManager::GetDetiler(const Image& image) const { const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
const auto format = DemoteImageFormatForDetiling(image.info.pixel_format); const auto format = DemoteImageFormatForDetiling(info.pixel_format);
switch (image.info.tiling_mode) { switch (info.tiling_mode) {
case AmdGpu::TilingMode::Texture_MicroTiled: case AmdGpu::TilingMode::Texture_MicroTiled:
switch (format) { switch (format) {
case vk::Format::eR8Uint: case vk::Format::eR8Uint:
@ -254,23 +255,23 @@ void TileManager::FreeBuffer(ScratchBuffer buffer) {
} }
std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset, std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset,
Image& image) { const ImageInfo& info) {
if (!image.info.props.is_tiled) { if (!info.props.is_tiled) {
return {in_buffer, in_offset}; return {in_buffer, in_offset};
} }
const auto* detiler = GetDetiler(image); const auto* detiler = GetDetiler(info);
if (!detiler) { if (!detiler) {
if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && if (info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled &&
image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled &&
image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) {
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); vk::to_string(info.pixel_format), NameOf(info.tiling_mode));
} }
return {in_buffer, in_offset}; return {in_buffer, in_offset};
} }
const u32 image_size = image.info.guest_size_bytes; const u32 image_size = info.guest_size_bytes;
// Prepare output buffer // Prepare output buffer
auto out_buffer = AllocBuffer(image_size, true); auto out_buffer = AllocBuffer(image_size, true);
@ -313,22 +314,21 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
set_writes); set_writes);
DetilerParams params; DetilerParams params;
params.num_levels = image.info.resources.levels; params.num_levels = info.resources.levels;
params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); params.pitch0 = info.pitch >> (info.props.is_block ? 2u : 0u);
params.height = image.info.size.height; params.height = info.size.height;
if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) {
ASSERT(image.info.resources.levels == 1); ASSERT(info.resources.levels == 1);
ASSERT(image.info.num_bits >= 32); ASSERT(info.num_bits >= 32);
const auto tiles_per_row = image.info.pitch / 8u; const auto tiles_per_row = info.pitch / 8u;
const auto tiles_per_slice = tiles_per_row * ((image.info.size.height + 7u) / 8u); const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u);
params.sizes[0] = tiles_per_row; params.sizes[0] = tiles_per_row;
params.sizes[1] = tiles_per_slice; params.sizes[1] = tiles_per_slice;
} else { } else {
ASSERT(info.resources.levels <= 14);
ASSERT(image.info.resources.levels <= 14);
std::memset(&params.sizes, 0, sizeof(params.sizes)); std::memset(&params.sizes, 0, sizeof(params.sizes));
for (int m = 0; m < image.info.resources.levels; ++m) { for (int m = 0; m < info.resources.levels; ++m) {
params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + params.sizes[m] = info.mips_layout[m].size * info.resources.layers +
(m > 0 ? params.sizes[m - 1] : 0); (m > 0 ? params.sizes[m - 1] : 0);
} }
} }
@ -337,7 +337,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
&params); &params);
ASSERT((image_size % 64) == 0); ASSERT((image_size % 64) == 0);
const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u); const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u);
const auto num_tiles = image_size / (64 * (bpp / 8)); const auto num_tiles = image_size / (64 * (bpp / 8));
cmdbuf.dispatch(num_tiles, 1, 1); cmdbuf.dispatch(num_tiles, 1, 1);
return {out_buffer.first, 0}; return {out_buffer.first, 0};

View file

@ -5,11 +5,11 @@
#include "common/types.h" #include "common/types.h"
#include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/buffer.h"
#include "video_core/texture_cache/image.h"
namespace VideoCore { namespace VideoCore {
class TextureCache; class TextureCache;
struct ImageInfo;
enum DetilerType : u32 { enum DetilerType : u32 {
Micro8x1, Micro8x1,
@ -36,14 +36,14 @@ public:
TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
~TileManager(); ~TileManager();
std::pair<vk::Buffer, u32> TryDetile(vk::Buffer in_buffer, u32 in_offset, Image& image); std::pair<vk::Buffer, u32> TryDetile(vk::Buffer in_buffer, u32 in_offset, const ImageInfo& info);
ScratchBuffer AllocBuffer(u32 size, bool is_storage = false); ScratchBuffer AllocBuffer(u32 size, bool is_storage = false);
void Upload(ScratchBuffer buffer, const void* data, size_t size); void Upload(ScratchBuffer buffer, const void* data, size_t size);
void FreeBuffer(ScratchBuffer buffer); void FreeBuffer(ScratchBuffer buffer);
private: private:
const DetilerContext* GetDetiler(const Image& image) const; const DetilerContext* GetDetiler(const ImageInfo& info) const;
private: private:
const Vulkan::Instance& instance; const Vulkan::Instance& instance;