mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-27 18:16:23 +00:00
DmaData and Recompiler fixes (#1775)
* liverpool: fix dmadata packet handling * recompiler: emit a label right after s_branch to prevent dead code interferrence * specialize barriers
This commit is contained in:
parent
3c0c921ef5
commit
cafd40f2c2
|
@ -80,6 +80,7 @@ void CFG::EmitLabels() {
|
|||
if (inst.IsUnconditionalBranch()) {
|
||||
const u32 target = inst.BranchTarget(pc);
|
||||
AddLabel(target);
|
||||
AddLabel(pc + inst.length);
|
||||
} else if (inst.IsConditionalBranch()) {
|
||||
const u32 true_label = inst.BranchTarget(pc);
|
||||
const u32 false_label = pc + inst.length;
|
||||
|
|
|
@ -573,21 +573,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
true);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Gds) {
|
||||
rasterizer->InlineData(dma_data->dst_addr_lo,
|
||||
dma_data->SrcAddress<const void*>(),
|
||||
dma_data->NumBytes(), true);
|
||||
rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
|
||||
dma_data->NumBytes(), true, false);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data,
|
||||
sizeof(u32), false);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
// LOG_WARNING(Render_Vulkan, "GDS memory read");
|
||||
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
|
||||
dma_data->NumBytes(), false, true);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
||||
dma_data->SrcAddress<const void*>(),
|
||||
dma_data->NumBytes(), false);
|
||||
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(),
|
||||
dma_data->SrcAddress<VAddr>(), dma_data->NumBytes(),
|
||||
false, false);
|
||||
} else {
|
||||
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
||||
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
||||
|
@ -731,20 +731,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Gds) {
|
||||
rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress<const void*>(),
|
||||
dma_data->NumBytes(), true);
|
||||
rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
|
||||
dma_data->NumBytes(), true, false);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data, sizeof(u32),
|
||||
false);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
// LOG_WARNING(Render_Vulkan, "GDS memory read");
|
||||
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
|
||||
dma_data->NumBytes(), false, true);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
||||
dma_data->SrcAddress<const void*>(), dma_data->NumBytes(),
|
||||
false);
|
||||
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->SrcAddress<VAddr>(),
|
||||
dma_data->NumBytes(), false, false);
|
||||
} else {
|
||||
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
||||
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
||||
|
|
|
@ -312,8 +312,23 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
|||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
||||
return &slot_buffers[buffer_id];
|
||||
}();
|
||||
const vk::BufferMemoryBarrier2 buf_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
const vk::BufferMemoryBarrier2 buf_barrier_before = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer->Handle(),
|
||||
.offset = buffer->Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &buf_barrier_before,
|
||||
});
|
||||
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
|
||||
const vk::BufferMemoryBarrier2 buf_barrier_after = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
|
@ -324,9 +339,96 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
|||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &buf_barrier,
|
||||
.pBufferMemoryBarriers = &buf_barrier_after,
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||
if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) {
|
||||
if (!src_gds && !IsRegionRegistered(src, num_bytes)) {
|
||||
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
|
||||
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
|
||||
return;
|
||||
}
|
||||
// Without a readback there's nothing we can do with this
|
||||
// Fallback to creating dst buffer on GPU to at least have this data there
|
||||
}
|
||||
if (!src_gds && !IsRegionRegistered(src, num_bytes)) {
|
||||
InlineData(dst, std::bit_cast<void*>(src), num_bytes, dst_gds);
|
||||
return;
|
||||
}
|
||||
auto& src_buffer = [&] -> const Buffer& {
|
||||
if (src_gds) {
|
||||
return gds_buffer;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(src, num_bytes);
|
||||
return slot_buffers[buffer_id];
|
||||
}();
|
||||
auto& dst_buffer = [&] -> const Buffer& {
|
||||
if (dst_gds) {
|
||||
return gds_buffer;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(dst, num_bytes);
|
||||
return slot_buffers[buffer_id];
|
||||
}();
|
||||
vk::BufferCopy region{
|
||||
.srcOffset = src_buffer.Offset(src),
|
||||
.dstOffset = dst_buffer.Offset(dst),
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 buf_barriers_before[2] = {
|
||||
{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = dst_buffer.Handle(),
|
||||
.offset = dst_buffer.Offset(dst),
|
||||
.size = num_bytes,
|
||||
},
|
||||
{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.buffer = src_buffer.Handle(),
|
||||
.offset = src_buffer.Offset(src),
|
||||
.size = num_bytes,
|
||||
},
|
||||
};
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 2,
|
||||
.pBufferMemoryBarriers = buf_barriers_before,
|
||||
});
|
||||
cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region);
|
||||
const vk::BufferMemoryBarrier2 buf_barriers_after[2] = {
|
||||
{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = dst_buffer.Handle(),
|
||||
.offset = dst_buffer.Offset(dst),
|
||||
.size = num_bytes,
|
||||
},
|
||||
{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = src_buffer.Handle(),
|
||||
.offset = src_buffer.Offset(src),
|
||||
.size = num_bytes,
|
||||
},
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 2,
|
||||
.pBufferMemoryBarriers = buf_barriers_after,
|
||||
});
|
||||
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
|
||||
|
@ -701,8 +803,22 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
|||
scheduler.EndRendering();
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
static constexpr vk::MemoryBarrier READ_BARRIER{
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
|
||||
};
|
||||
static constexpr vk::MemoryBarrier WRITE_BARRIER{
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
||||
};
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {});
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
|
||||
copies);
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -87,6 +87,7 @@ public:
|
|||
|
||||
/// Writes a value to GPU buffer.
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
|
||||
|
||||
|
|
|
@ -840,6 +840,10 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo
|
|||
buffer_cache.InlineData(address, value, num_bytes, is_gds);
|
||||
}
|
||||
|
||||
void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
|
||||
buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds);
|
||||
}
|
||||
|
||||
u32 Rasterizer::ReadDataFromGds(u32 gds_offset) {
|
||||
auto* gds_buf = buffer_cache.GetGdsBuffer();
|
||||
u32 value;
|
||||
|
|
|
@ -53,6 +53,7 @@ public:
|
|||
void ScopedMarkerInsertColor(const std::string_view& str, const u32 color);
|
||||
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
|
||||
u32 ReadDataFromGds(u32 gsd_offset);
|
||||
bool InvalidateMemory(VAddr addr, u64 size);
|
||||
bool IsMapped(VAddr addr, u64 size);
|
||||
|
|
Loading…
Reference in a new issue