diff --git a/CMakeLists.txt b/CMakeLists.txt index 16b49cf08..f59ab063d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -738,6 +738,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_resource_pool.h src/video_core/renderer_vulkan/vk_scheduler.cpp src/video_core/renderer_vulkan/vk_scheduler.h + src/video_core/renderer_vulkan/vk_shader_hle.cpp + src/video_core/renderer_vulkan/vk_shader_hle.h src/video_core/renderer_vulkan/vk_shader_util.cpp src/video_core/renderer_vulkan/vk_shader_util.h src/video_core/renderer_vulkan/vk_swapchain.cpp diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0471fdb0a..33358b850 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -8,6 +8,7 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_hle.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/texture_cache.h" #include "vk_rasterizer.h" @@ -318,6 +319,11 @@ void Rasterizer::DispatchDirect() { return; } + const auto& cs = pipeline->GetStage(Shader::Stage::Compute); + if (ExecuteShaderHLE(cs, liverpool->regs, *this)) { + return; + } + if (!BindResources(pipeline)) { return; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index fe8aceba7..401619afa 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -28,6 +28,14 @@ public: AmdGpu::Liverpool* liverpool); ~Rasterizer(); + [[nodiscard]] Scheduler& GetScheduler() noexcept { + return scheduler; + } + + [[nodiscard]] VideoCore::BufferCache& GetBufferCache() noexcept { + return buffer_cache; + } + [[nodiscard]] VideoCore::TextureCache& GetTextureCache() noexcept { return texture_cache; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 1140bfbc2..45a9228c9 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -10,6 +10,10 @@ #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" +namespace tracy { +class VkCtxScope; +} + namespace Vulkan { class Instance; diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp new file mode 100644 index 000000000..7c6b25e7b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -0,0 +1,134 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_hle.h" + +#include "vk_rasterizer.h" + +namespace Vulkan { + +static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; + +bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, + Rasterizer& rasterizer) { + auto& scheduler = rasterizer.GetScheduler(); + auto& buffer_cache = rasterizer.GetBufferCache(); + + // Copy shader defines three formatted buffers as inputs: control, source, and destination. + const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info); + const auto src_buf_sharp = info.texture_buffers[1].GetSharp(info); + const auto dst_buf_sharp = info.texture_buffers[2].GetSharp(info); + const auto buf_stride = src_buf_sharp.GetStride(); + ASSERT(buf_stride == dst_buf_sharp.GetStride()); + + struct CopyShaderControl { + u32 dst_idx; + u32 src_idx; + u32 end; + }; + static_assert(sizeof(CopyShaderControl) == 12); + ASSERT(ctl_buf_sharp.GetStride() == sizeof(CopyShaderControl)); + const auto ctl_buf = reinterpret_cast<CopyShaderControl*>(ctl_buf_sharp.base_address); + + // Add list of copies + boost::container::set<vk::BufferCopy> copies; + for (u32 i = 0; i < regs.cs_program.dim_x; i++) { + const auto& [dst_idx, src_idx, end] = ctl_buf[i]; + const u32 local_dst_offset = dst_idx * buf_stride; + const u32 local_src_offset = src_idx * buf_stride; + const u32 local_size = (end + 1) * buf_stride; + copies.emplace(local_src_offset, local_dst_offset, local_size); + } + + scheduler.EndRendering(); + + static constexpr vk::MemoryBarrier READ_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + }; + static constexpr vk::MemoryBarrier WRITE_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + scheduler.CommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {}); + + static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB; + boost::container::small_vector<vk::BufferCopy, 32> batch_copies; + while (!copies.empty()) { + // Place first copy into the current batch + auto it = copies.begin(); + auto src_offset_min = it->srcOffset; + auto src_offset_max = it->srcOffset + it->size; + auto dst_offset_min = it->dstOffset; + auto dst_offset_max = it->dstOffset + it->size; + batch_copies.emplace_back(*it); + it = copies.erase(it); + + while (it != copies.end()) { + // Compute new src and dst bounds if we were to batch this copy + auto new_src_offset_min = std::min(src_offset_min, it->srcOffset); + auto new_src_offset_max = std::max(src_offset_max, it->srcOffset + it->size); + if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) { + ++it; + continue; + } + + auto new_dst_offset_min = std::min(dst_offset_min, it->dstOffset); + auto new_dst_offset_max = std::max(dst_offset_max, it->dstOffset + it->size); + if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) { + ++it; + continue; + } + + // We can batch this copy + src_offset_min = new_src_offset_min; + src_offset_max = new_src_offset_max; + dst_offset_min = new_dst_offset_min; + dst_offset_max = new_dst_offset_max; + batch_copies.emplace_back(*it); + it = copies.erase(it); + } + + // Obtain buffers for the total source and destination ranges. + const auto [src_buf, src_buf_offset] = + buffer_cache.ObtainBuffer(src_buf_sharp.base_address + src_offset_min, + src_offset_max - src_offset_min, false, false); + const auto [dst_buf, dst_buf_offset] = + buffer_cache.ObtainBuffer(dst_buf_sharp.base_address + dst_offset_min, + dst_offset_max - dst_offset_min, true, false); + + // Apply found buffer base. + for (auto& vk_copy : batch_copies) { + vk_copy.srcOffset = vk_copy.srcOffset - src_offset_min + src_buf_offset; + vk_copy.dstOffset = vk_copy.dstOffset - dst_offset_min + dst_buf_offset; + } + + // Execute buffer copies. + LOG_TRACE(Render_Vulkan, "HLE buffer copy: src_size = {}, dst_size = {}", + src_offset_max - src_offset_min, dst_offset_max - dst_offset_min); + scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), batch_copies); + batch_copies.clear(); + } + + scheduler.CommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + + return true; +} + +bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, + Rasterizer& rasterizer) { + switch (info.pgm_hash) { + case COPY_SHADER_HASH: + return ExecuteCopyShaderHLE(info, regs, rasterizer); + default: + return false; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.h b/src/video_core/renderer_vulkan/vk_shader_hle.h new file mode 100644 index 000000000..fda9b1735 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_hle.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/amdgpu/liverpool.h" + +namespace Shader { +struct Info; +} + +namespace Vulkan { + +class Rasterizer; + +/// Attempts to execute a shader using HLE if possible. +bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, + Rasterizer& rasterizer); + +} // namespace Vulkan