From e9ede8d62749d2697c0b807296846d132acf4919 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:17:14 +0200 Subject: [PATCH] Revert "DmaData and Recompiler fixes (#1775)" (#1784) This reverts commit cafd40f2c2f2d0062979ad1ec12b6d755eeb4e81. --- .../frontend/control_flow_graph.cpp | 1 - src/video_core/amdgpu/liverpool.cpp | 26 ++-- src/video_core/buffer_cache/buffer_cache.cpp | 124 +----------------- src/video_core/buffer_cache/buffer_cache.h | 1 - .../renderer_vulkan/vk_rasterizer.cpp | 4 - .../renderer_vulkan/vk_rasterizer.h | 1 - 6 files changed, 17 insertions(+), 140 deletions(-) diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 1fb129f6..8c3122b2 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -80,7 +80,6 @@ void CFG::EmitLabels() { if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); AddLabel(target); - AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 820903ab..8db2d63c 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -573,21 +573,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true, false); + rasterizer->InlineData(dma_data->dst_addr_lo, + dma_data->SrcAddress(), + dma_data->NumBytes(), true); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, - dma_data->NumBytes(), false, true); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), - dma_data->SrcAddress(), dma_data->NumBytes(), - false, false); + rasterizer->InlineData(dma_data->DstAddress(), + dma_data->SrcAddress(), + dma_data->NumBytes(), false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); @@ -731,20 +731,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true, false); + rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, - dma_data->NumBytes(), false, true); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->SrcAddress(), - dma_data->NumBytes(), false, false); + rasterizer->InlineData(dma_data->DstAddress(), + dma_data->SrcAddress(), dma_data->NumBytes(), + false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 31b2a2c5..e9fc0649 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -312,23 +312,8 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier_before = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer->Handle(), - .offset = buffer->Offset(address), - .size = num_bytes, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier_before, - }); - cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); - const vk::BufferMemoryBarrier2 buf_barrier_after = { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + const vk::BufferMemoryBarrier2 buf_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, @@ -339,96 +324,9 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier_after, - }); -} - -void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { - if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) { - if (!src_gds && !IsRegionRegistered(src, num_bytes)) { - // Both buffers were not transferred to GPU yet. Can safely copy in host memory. - memcpy(std::bit_cast(dst), std::bit_cast(src), num_bytes); - return; - } - // Without a readback there's nothing we can do with this - // Fallback to creating dst buffer on GPU to at least have this data there - } - if (!src_gds && !IsRegionRegistered(src, num_bytes)) { - InlineData(dst, std::bit_cast(src), num_bytes, dst_gds); - return; - } - auto& src_buffer = [&] -> const Buffer& { - if (src_gds) { - return gds_buffer; - } - const BufferId buffer_id = FindBuffer(src, num_bytes); - return slot_buffers[buffer_id]; - }(); - auto& dst_buffer = [&] -> const Buffer& { - if (dst_gds) { - return gds_buffer; - } - const BufferId buffer_id = FindBuffer(dst, num_bytes); - return slot_buffers[buffer_id]; - }(); - vk::BufferCopy region{ - .srcOffset = src_buffer.Offset(src), - .dstOffset = dst_buffer.Offset(dst), - .size = num_bytes, - }; - const vk::BufferMemoryBarrier2 buf_barriers_before[2] = { - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = dst_buffer.Handle(), - .offset = dst_buffer.Offset(dst), - .size = num_bytes, - }, - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eTransferRead, - .buffer = src_buffer.Handle(), - .offset = src_buffer.Offset(src), - .size = num_bytes, - }, - }; - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 2, - .pBufferMemoryBarriers = buf_barriers_before, - }); - cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region); - const vk::BufferMemoryBarrier2 buf_barriers_after[2] = { - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = dst_buffer.Handle(), - .offset = dst_buffer.Offset(dst), - .size = num_bytes, - }, - { - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eTransferRead, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .buffer = src_buffer.Handle(), - .offset = src_buffer.Offset(src), - .size = num_bytes, - }, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 2, - .pBufferMemoryBarriers = buf_barriers_after, + .pBufferMemoryBarriers = &buf_barrier, }); + cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { @@ -803,22 +701,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, scheduler.EndRendering(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, - }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {}); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); } return true; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 4c57e9c2..e6291341 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -87,7 +87,6 @@ public: /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); [[nodiscard]] std::pair ObtainHostUBO(std::span data); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9e9b40ca..fef4c7ec 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -840,10 +840,6 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo buffer_cache.InlineData(address, value, num_bytes, is_gds); } -void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { - buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds); -} - u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { auto* gds_buf = buffer_cache.GetGdsBuffer(); u32 value; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b5bead69..ec1b5e13 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -53,7 +53,6 @@ public: void ScopedMarkerInsertColor(const std::string_view& str, const u32 color); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size); bool IsMapped(VAddr addr, u64 size);