mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-15 11:25:13 +00:00
renderer_vulkan: Cleanup and improve barriers in caches (#1865)
* texture_cache: Stricter barriers on image upload * buffer_cache: Stricter barrier for vkCmdUpdateBuffer * vk_rasterizer: Barrier also normal buffers and make it apply to all stages * texture_cache: Minor barrier cleanup * Batch image and buffer barriers in a single command * clang format
This commit is contained in:
parent
f7a8e2409c
commit
77d2172441
|
@ -259,7 +259,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
||||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
||||||
return &slot_buffers[buffer_id];
|
return &slot_buffers[buffer_id];
|
||||||
}();
|
}();
|
||||||
const vk::BufferMemoryBarrier2 buf_barrier = {
|
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.buffer = buffer->Handle(),
|
||||||
|
.offset = buffer->Offset(address),
|
||||||
|
.size = num_bytes,
|
||||||
|
};
|
||||||
|
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
@ -271,9 +280,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
.bufferMemoryBarrierCount = 1,
|
.bufferMemoryBarrierCount = 1,
|
||||||
.pBufferMemoryBarriers = &buf_barrier,
|
.pBufferMemoryBarriers = &pre_barrier,
|
||||||
|
});
|
||||||
|
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &post_barrier,
|
||||||
});
|
});
|
||||||
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
|
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
|
||||||
|
@ -465,21 +479,48 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
||||||
};
|
};
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
static constexpr vk::MemoryBarrier READ_BARRIER{
|
const std::array pre_barriers = {
|
||||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
vk::BufferMemoryBarrier2{
|
||||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||||
|
.buffer = overlap.Handle(),
|
||||||
|
.offset = 0,
|
||||||
|
.size = overlap.SizeBytes(),
|
||||||
|
},
|
||||||
};
|
};
|
||||||
static constexpr vk::MemoryBarrier WRITE_BARRIER{
|
const std::array post_barriers = {
|
||||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
vk::BufferMemoryBarrier2{
|
||||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.buffer = overlap.Handle(),
|
||||||
|
.offset = 0,
|
||||||
|
.size = overlap.SizeBytes(),
|
||||||
|
},
|
||||||
|
vk::BufferMemoryBarrier2{
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.buffer = new_buffer.Handle(),
|
||||||
|
.offset = dst_base_offset,
|
||||||
|
.size = overlap.SizeBytes(),
|
||||||
|
},
|
||||||
};
|
};
|
||||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
READ_BARRIER, {}, {});
|
.bufferMemoryBarrierCount = 1,
|
||||||
cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy);
|
.pBufferMemoryBarriers = pre_barriers.data(),
|
||||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
});
|
||||||
vk::PipelineStageFlagBits::eAllCommands,
|
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
|
||||||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
|
||||||
|
.pBufferMemoryBarriers = post_barriers.data(),
|
||||||
|
});
|
||||||
DeleteBuffer(overlap_id);
|
DeleteBuffer(overlap_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -583,21 +624,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||||
}
|
}
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
static constexpr vk::MemoryBarrier READ_BARRIER{
|
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.buffer = buffer.Handle(),
|
||||||
|
.offset = 0,
|
||||||
|
.size = buffer.SizeBytes(),
|
||||||
};
|
};
|
||||||
static constexpr vk::MemoryBarrier WRITE_BARRIER{
|
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.buffer = buffer.Handle(),
|
||||||
|
.offset = 0,
|
||||||
|
.size = buffer.SizeBytes(),
|
||||||
};
|
};
|
||||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
READ_BARRIER, {}, {});
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &pre_barrier,
|
||||||
|
});
|
||||||
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
|
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
|
||||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
vk::PipelineStageFlagBits::eAllCommands,
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &post_barrier,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
||||||
|
@ -647,10 +702,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||||
}
|
}
|
||||||
if (!copies.empty()) {
|
if (!copies.empty()) {
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.buffer = buffer.Handle(),
|
||||||
|
.offset = max_offset - size,
|
||||||
|
.size = size,
|
||||||
|
};
|
||||||
|
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||||
|
.buffer = buffer.Handle(),
|
||||||
|
.offset = max_offset - size,
|
||||||
|
.size = size,
|
||||||
|
};
|
||||||
|
auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal,
|
||||||
|
vk::AccessFlagBits2::eTransferRead,
|
||||||
|
vk::PipelineStageFlagBits2::eTransfer, {});
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &pre_barrier,
|
||||||
|
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
|
||||||
|
.pImageMemoryBarriers = barriers.data(),
|
||||||
|
});
|
||||||
|
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(),
|
||||||
copies);
|
copies);
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &post_barrier,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -562,6 +562,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||||
push_data.AddOffset(binding.buffer, adjust);
|
push_data.AddOffset(binding.buffer, adjust);
|
||||||
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned,
|
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned,
|
||||||
vsharp.GetSize() + adjust);
|
vsharp.GetSize() + adjust);
|
||||||
|
if (auto barrier =
|
||||||
|
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||||
|
: vk::AccessFlagBits2::eShaderRead,
|
||||||
|
vk::PipelineStageFlagBits2::eAllCommands)) {
|
||||||
|
buffer_barriers.emplace_back(*barrier);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
|
@ -600,7 +606,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||||
if (auto barrier =
|
if (auto barrier =
|
||||||
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||||
: vk::AccessFlagBits2::eShaderRead,
|
: vk::AccessFlagBits2::eShaderRead,
|
||||||
vk::PipelineStageFlagBits2::eComputeShader)) {
|
vk::PipelineStageFlagBits2::eAllCommands)) {
|
||||||
buffer_barriers.emplace_back(*barrier);
|
buffer_barriers.emplace_back(*barrier);
|
||||||
}
|
}
|
||||||
if (desc.is_written) {
|
if (desc.is_written) {
|
||||||
|
|
|
@ -542,31 +542,62 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||||
sched_ptr->EndRendering();
|
sched_ptr->EndRendering();
|
||||||
|
|
||||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {},
|
|
||||||
cmdbuf);
|
|
||||||
|
|
||||||
const VAddr image_addr = image.info.guest_address;
|
const VAddr image_addr = image.info.guest_address;
|
||||||
const size_t image_size = image.info.guest_size_bytes;
|
const size_t image_size = image.info.guest_size_bytes;
|
||||||
const auto [vk_buffer, buf_offset] =
|
const auto [vk_buffer, buf_offset] =
|
||||||
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
|
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
|
||||||
|
|
||||||
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
|
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
|
||||||
// hazard
|
// hazard
|
||||||
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||||
vk::PipelineStageFlagBits2::eTransfer)) {
|
vk::PipelineStageFlagBits2::eTransfer)) {
|
||||||
const auto dependencies = vk::DependencyInfo{
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
.bufferMemoryBarrierCount = 1,
|
.bufferMemoryBarrierCount = 1,
|
||||||
.pBufferMemoryBarriers = &barrier.value(),
|
.pBufferMemoryBarriers = &barrier.value(),
|
||||||
};
|
});
|
||||||
cmdbuf.pipelineBarrier2(dependencies);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image);
|
const auto [buffer, offset] =
|
||||||
|
tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info);
|
||||||
for (auto& copy : image_copy) {
|
for (auto& copy : image_copy) {
|
||||||
copy.bufferOffset += offset;
|
copy.bufferOffset += offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const vk::BufferMemoryBarrier2 pre_barrier{
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||||
|
.buffer = buffer,
|
||||||
|
.offset = offset,
|
||||||
|
.size = image_size,
|
||||||
|
};
|
||||||
|
const vk::BufferMemoryBarrier2 post_barrier{
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||||
|
.buffer = buffer,
|
||||||
|
.offset = offset,
|
||||||
|
.size = image_size,
|
||||||
|
};
|
||||||
|
const auto image_barriers =
|
||||||
|
image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
vk::PipelineStageFlagBits2::eTransfer, {});
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &pre_barrier,
|
||||||
|
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
|
||||||
|
.pImageMemoryBarriers = image_barriers.data(),
|
||||||
|
});
|
||||||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &post_barrier,
|
||||||
|
});
|
||||||
image.flags &= ~ImageFlagBits::Dirty;
|
image.flags &= ~ImageFlagBits::Dirty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||||
|
#include "video_core/texture_cache/image_info.h"
|
||||||
#include "video_core/texture_cache/image_view.h"
|
#include "video_core/texture_cache/image_view.h"
|
||||||
#include "video_core/texture_cache/tile_manager.h"
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
|
||||||
|
@ -86,10 +87,10 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
const DetilerContext* TileManager::GetDetiler(const Image& image) const {
|
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
||||||
const auto format = DemoteImageFormatForDetiling(image.info.pixel_format);
|
const auto format = DemoteImageFormatForDetiling(info.pixel_format);
|
||||||
|
|
||||||
switch (image.info.tiling_mode) {
|
switch (info.tiling_mode) {
|
||||||
case AmdGpu::TilingMode::Texture_MicroTiled:
|
case AmdGpu::TilingMode::Texture_MicroTiled:
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case vk::Format::eR8Uint:
|
case vk::Format::eR8Uint:
|
||||||
|
@ -258,23 +259,23 @@ void TileManager::FreeBuffer(ScratchBuffer buffer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset,
|
std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset,
|
||||||
Image& image) {
|
const ImageInfo& info) {
|
||||||
if (!image.info.props.is_tiled) {
|
if (!info.props.is_tiled) {
|
||||||
return {in_buffer, in_offset};
|
return {in_buffer, in_offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto* detiler = GetDetiler(image);
|
const auto* detiler = GetDetiler(info);
|
||||||
if (!detiler) {
|
if (!detiler) {
|
||||||
if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled &&
|
if (info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled &&
|
||||||
image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled &&
|
info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled &&
|
||||||
image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) {
|
info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) {
|
||||||
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
|
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
|
||||||
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
|
vk::to_string(info.pixel_format), NameOf(info.tiling_mode));
|
||||||
}
|
}
|
||||||
return {in_buffer, in_offset};
|
return {in_buffer, in_offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 image_size = image.info.guest_size_bytes;
|
const u32 image_size = info.guest_size_bytes;
|
||||||
|
|
||||||
// Prepare output buffer
|
// Prepare output buffer
|
||||||
auto out_buffer = AllocBuffer(image_size, true);
|
auto out_buffer = AllocBuffer(image_size, true);
|
||||||
|
@ -317,22 +318,21 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
|
||||||
set_writes);
|
set_writes);
|
||||||
|
|
||||||
DetilerParams params;
|
DetilerParams params;
|
||||||
params.num_levels = image.info.resources.levels;
|
params.num_levels = info.resources.levels;
|
||||||
params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u);
|
params.pitch0 = info.pitch >> (info.props.is_block ? 2u : 0u);
|
||||||
params.height = image.info.size.height;
|
params.height = info.size.height;
|
||||||
if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) {
|
if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) {
|
||||||
ASSERT(image.info.resources.levels == 1);
|
ASSERT(info.resources.levels == 1);
|
||||||
ASSERT(image.info.num_bits >= 32);
|
ASSERT(info.num_bits >= 32);
|
||||||
const auto tiles_per_row = image.info.pitch / 8u;
|
const auto tiles_per_row = info.pitch / 8u;
|
||||||
const auto tiles_per_slice = tiles_per_row * ((image.info.size.height + 7u) / 8u);
|
const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u);
|
||||||
params.sizes[0] = tiles_per_row;
|
params.sizes[0] = tiles_per_row;
|
||||||
params.sizes[1] = tiles_per_slice;
|
params.sizes[1] = tiles_per_slice;
|
||||||
} else {
|
} else {
|
||||||
|
ASSERT(info.resources.levels <= 14);
|
||||||
ASSERT(image.info.resources.levels <= 14);
|
|
||||||
std::memset(¶ms.sizes, 0, sizeof(params.sizes));
|
std::memset(¶ms.sizes, 0, sizeof(params.sizes));
|
||||||
for (int m = 0; m < image.info.resources.levels; ++m) {
|
for (int m = 0; m < info.resources.levels; ++m) {
|
||||||
params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers +
|
params.sizes[m] = info.mips_layout[m].size * info.resources.layers +
|
||||||
(m > 0 ? params.sizes[m - 1] : 0);
|
(m > 0 ? params.sizes[m - 1] : 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -341,20 +341,9 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
|
||||||
¶ms);
|
¶ms);
|
||||||
|
|
||||||
ASSERT((image_size % 64) == 0);
|
ASSERT((image_size % 64) == 0);
|
||||||
const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u);
|
const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u);
|
||||||
const auto num_tiles = image_size / (64 * (bpp / 8));
|
const auto num_tiles = image_size / (64 * (bpp / 8));
|
||||||
cmdbuf.dispatch(num_tiles, 1, 1);
|
cmdbuf.dispatch(num_tiles, 1, 1);
|
||||||
|
|
||||||
const vk::BufferMemoryBarrier post_barrier{
|
|
||||||
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
|
||||||
.buffer = out_buffer.first,
|
|
||||||
.size = image_size,
|
|
||||||
};
|
|
||||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
|
||||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
|
||||||
{}, post_barrier, {});
|
|
||||||
|
|
||||||
return {out_buffer.first, 0};
|
return {out_buffer.first, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,11 +5,11 @@
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/buffer_cache/buffer.h"
|
#include "video_core/buffer_cache/buffer.h"
|
||||||
#include "video_core/texture_cache/image.h"
|
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
class TextureCache;
|
class TextureCache;
|
||||||
|
struct ImageInfo;
|
||||||
|
|
||||||
enum DetilerType : u32 {
|
enum DetilerType : u32 {
|
||||||
Micro8x1,
|
Micro8x1,
|
||||||
|
@ -36,14 +36,15 @@ public:
|
||||||
TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||||
~TileManager();
|
~TileManager();
|
||||||
|
|
||||||
std::pair<vk::Buffer, u32> TryDetile(vk::Buffer in_buffer, u32 in_offset, Image& image);
|
std::pair<vk::Buffer, u32> TryDetile(vk::Buffer in_buffer, u32 in_offset,
|
||||||
|
const ImageInfo& info);
|
||||||
|
|
||||||
ScratchBuffer AllocBuffer(u32 size, bool is_storage = false);
|
ScratchBuffer AllocBuffer(u32 size, bool is_storage = false);
|
||||||
void Upload(ScratchBuffer buffer, const void* data, size_t size);
|
void Upload(ScratchBuffer buffer, const void* data, size_t size);
|
||||||
void FreeBuffer(ScratchBuffer buffer);
|
void FreeBuffer(ScratchBuffer buffer);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const DetilerContext* GetDetiler(const Image& image) const;
|
const DetilerContext* GetDetiler(const ImageInfo& info) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Vulkan::Instance& instance;
|
const Vulkan::Instance& instance;
|
||||||
|
|
Loading…
Reference in a new issue