DmaData and Recompiler fixes (#1775)

* liverpool: fix dmadata packet handling

* recompiler: emit a label right after s_branch to prevent dead code interferrence

* specialize barriers
This commit is contained in:
Vladislav Mikhalin 2024-12-14 15:33:06 +03:00 committed by GitHub
parent c765f16e97
commit b8d1ebe666
6 changed files with 140 additions and 17 deletions

View file

@ -80,6 +80,7 @@ void CFG::EmitLabels() {
if (inst.IsUnconditionalBranch()) {
const u32 target = inst.BranchTarget(pc);
AddLabel(target);
AddLabel(pc + inst.length);
} else if (inst.IsConditionalBranch()) {
const u32 true_label = inst.BranchTarget(pc);
const u32 false_label = pc + inst.length;

View file

@ -573,21 +573,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
true);
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Gds) {
rasterizer->InlineData(dma_data->dst_addr_lo,
dma_data->SrcAddress<const void*>(),
dma_data->NumBytes(), true);
rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
dma_data->NumBytes(), true, false);
} else if (dma_data->src_sel == DmaDataSrc::Data &&
dma_data->dst_sel == DmaDataDst::Memory) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data,
sizeof(u32), false);
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
dma_data->dst_sel == DmaDataDst::Memory) {
// LOG_WARNING(Render_Vulkan, "GDS memory read");
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
dma_data->NumBytes(), false, true);
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Memory) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
dma_data->SrcAddress<const void*>(),
dma_data->NumBytes(), false);
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(),
dma_data->SrcAddress<VAddr>(), dma_data->NumBytes(),
false, false);
} else {
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
@ -731,20 +731,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true);
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Gds) {
rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress<const void*>(),
dma_data->NumBytes(), true);
rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
dma_data->NumBytes(), true, false);
} else if (dma_data->src_sel == DmaDataSrc::Data &&
dma_data->dst_sel == DmaDataDst::Memory) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data, sizeof(u32),
false);
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
dma_data->dst_sel == DmaDataDst::Memory) {
// LOG_WARNING(Render_Vulkan, "GDS memory read");
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
dma_data->NumBytes(), false, true);
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Memory) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
dma_data->SrcAddress<const void*>(), dma_data->NumBytes(),
false);
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->SrcAddress<VAddr>(),
dma_data->NumBytes(), false, false);
} else {
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));

View file

@ -312,8 +312,23 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
const BufferId buffer_id = FindBuffer(address, num_bytes);
return &slot_buffers[buffer_id];
}();
const vk::BufferMemoryBarrier2 buf_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
const vk::BufferMemoryBarrier2 buf_barrier_before = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer->Handle(),
.offset = buffer->Offset(address),
.size = num_bytes,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &buf_barrier_before,
});
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
const vk::BufferMemoryBarrier2 buf_barrier_after = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
@ -324,9 +339,96 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &buf_barrier,
.pBufferMemoryBarriers = &buf_barrier_after,
});
}
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) {
if (!src_gds && !IsRegionRegistered(src, num_bytes)) {
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
return;
}
// Without a readback there's nothing we can do with this
// Fallback to creating dst buffer on GPU to at least have this data there
}
if (!src_gds && !IsRegionRegistered(src, num_bytes)) {
InlineData(dst, std::bit_cast<void*>(src), num_bytes, dst_gds);
return;
}
auto& src_buffer = [&] -> const Buffer& {
if (src_gds) {
return gds_buffer;
}
const BufferId buffer_id = FindBuffer(src, num_bytes);
return slot_buffers[buffer_id];
}();
auto& dst_buffer = [&] -> const Buffer& {
if (dst_gds) {
return gds_buffer;
}
const BufferId buffer_id = FindBuffer(dst, num_bytes);
return slot_buffers[buffer_id];
}();
vk::BufferCopy region{
.srcOffset = src_buffer.Offset(src),
.dstOffset = dst_buffer.Offset(dst),
.size = num_bytes,
};
const vk::BufferMemoryBarrier2 buf_barriers_before[2] = {
{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = dst_buffer.Handle(),
.offset = dst_buffer.Offset(dst),
.size = num_bytes,
},
{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
.buffer = src_buffer.Handle(),
.offset = src_buffer.Offset(src),
.size = num_bytes,
},
};
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 2,
.pBufferMemoryBarriers = buf_barriers_before,
});
cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region);
const vk::BufferMemoryBarrier2 buf_barriers_after[2] = {
{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = dst_buffer.Handle(),
.offset = dst_buffer.Offset(dst),
.size = num_bytes,
},
{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.buffer = src_buffer.Handle(),
.offset = src_buffer.Offset(src),
.size = num_bytes,
},
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 2,
.pBufferMemoryBarriers = buf_barriers_after,
});
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
}
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
@ -701,8 +803,22 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
const auto cmdbuf = scheduler.CommandBuffer();
static constexpr vk::MemoryBarrier READ_BARRIER{
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
};
static constexpr vk::MemoryBarrier WRITE_BARRIER{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {});
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
copies);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
}
return true;
}

View file

@ -87,6 +87,7 @@ public:
/// Writes a value to GPU buffer.
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);

View file

@ -840,6 +840,10 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo
buffer_cache.InlineData(address, value, num_bytes, is_gds);
}
void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds);
}
u32 Rasterizer::ReadDataFromGds(u32 gds_offset) {
auto* gds_buf = buffer_cache.GetGdsBuffer();
u32 value;

View file

@ -53,6 +53,7 @@ public:
void ScopedMarkerInsertColor(const std::string_view& str, const u32 color);
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
u32 ReadDataFromGds(u32 gsd_offset);
bool InvalidateMemory(VAddr addr, u64 size);
bool IsMapped(VAddr addr, u64 size);