mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-28 09:18:25 +00:00
vk_shader_hle: Make more efficient
This commit is contained in:
parent
cc0c49876b
commit
363c6c1b7b
|
@ -30,16 +30,18 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg
|
||||||
};
|
};
|
||||||
static_assert(sizeof(CopyShaderControl) == 12);
|
static_assert(sizeof(CopyShaderControl) == 12);
|
||||||
ASSERT(ctl_buf_sharp.GetStride() == sizeof(CopyShaderControl));
|
ASSERT(ctl_buf_sharp.GetStride() == sizeof(CopyShaderControl));
|
||||||
const auto ctl_buf = reinterpret_cast<CopyShaderControl*>(ctl_buf_sharp.base_address);
|
const auto ctl_buf = reinterpret_cast<const CopyShaderControl*>(ctl_buf_sharp.base_address);
|
||||||
|
|
||||||
|
static std::vector<vk::BufferCopy> copies;
|
||||||
|
copies.clear();
|
||||||
|
copies.reserve(regs.cs_program.dim_x);
|
||||||
|
|
||||||
// Add list of copies
|
|
||||||
boost::container::set<vk::BufferCopy> copies;
|
|
||||||
for (u32 i = 0; i < regs.cs_program.dim_x; i++) {
|
for (u32 i = 0; i < regs.cs_program.dim_x; i++) {
|
||||||
const auto& [dst_idx, src_idx, end] = ctl_buf[i];
|
const auto& [dst_idx, src_idx, end] = ctl_buf[i];
|
||||||
const u32 local_dst_offset = dst_idx * buf_stride;
|
const u32 local_dst_offset = dst_idx * buf_stride;
|
||||||
const u32 local_src_offset = src_idx * buf_stride;
|
const u32 local_src_offset = src_idx * buf_stride;
|
||||||
const u32 local_size = (end + 1) * buf_stride;
|
const u32 local_size = (end + 1) * buf_stride;
|
||||||
copies.emplace(local_src_offset, local_dst_offset, local_size);
|
copies.emplace_back(local_src_offset, local_dst_offset, local_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
|
@ -57,30 +59,29 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg
|
||||||
vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {});
|
vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {});
|
||||||
|
|
||||||
static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB;
|
static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB;
|
||||||
boost::container::small_vector<vk::BufferCopy, 32> batch_copies;
|
u32 batch_start = 0;
|
||||||
while (!copies.empty()) {
|
u32 batch_end = 1;
|
||||||
// Place first copy into the current batch
|
|
||||||
auto it = copies.begin();
|
|
||||||
auto src_offset_min = it->srcOffset;
|
|
||||||
auto src_offset_max = it->srcOffset + it->size;
|
|
||||||
auto dst_offset_min = it->dstOffset;
|
|
||||||
auto dst_offset_max = it->dstOffset + it->size;
|
|
||||||
batch_copies.emplace_back(*it);
|
|
||||||
it = copies.erase(it);
|
|
||||||
|
|
||||||
while (it != copies.end()) {
|
while (batch_end < copies.size()) {
|
||||||
|
// Place first copy into the current batch
|
||||||
|
const auto& copy = copies[batch_start];
|
||||||
|
auto src_offset_min = copy.srcOffset;
|
||||||
|
auto src_offset_max = copy.srcOffset + copy.size;
|
||||||
|
auto dst_offset_min = copy.dstOffset;
|
||||||
|
auto dst_offset_max = copy.dstOffset + copy.size;
|
||||||
|
|
||||||
|
for (int i = batch_start + 1; i < copies.size(); i++) {
|
||||||
// Compute new src and dst bounds if we were to batch this copy
|
// Compute new src and dst bounds if we were to batch this copy
|
||||||
auto new_src_offset_min = std::min(src_offset_min, it->srcOffset);
|
const auto [src_offset, dst_offset, size] = copies[i];
|
||||||
auto new_src_offset_max = std::max(src_offset_max, it->srcOffset + it->size);
|
auto new_src_offset_min = std::min(src_offset_min, src_offset);
|
||||||
|
auto new_src_offset_max = std::max(src_offset_max, src_offset + size);
|
||||||
if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) {
|
if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) {
|
||||||
++it;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto new_dst_offset_min = std::min(dst_offset_min, it->dstOffset);
|
auto new_dst_offset_min = std::min(dst_offset_min, dst_offset);
|
||||||
auto new_dst_offset_max = std::max(dst_offset_max, it->dstOffset + it->size);
|
auto new_dst_offset_max = std::max(dst_offset_max, dst_offset + size);
|
||||||
if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) {
|
if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) {
|
||||||
++it;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,8 +90,10 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg
|
||||||
src_offset_max = new_src_offset_max;
|
src_offset_max = new_src_offset_max;
|
||||||
dst_offset_min = new_dst_offset_min;
|
dst_offset_min = new_dst_offset_min;
|
||||||
dst_offset_max = new_dst_offset_max;
|
dst_offset_max = new_dst_offset_max;
|
||||||
batch_copies.emplace_back(*it);
|
if (i != batch_end) {
|
||||||
it = copies.erase(it);
|
std::swap(copies[i], copies[batch_end]);
|
||||||
|
}
|
||||||
|
++batch_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Obtain buffers for the total source and destination ranges.
|
// Obtain buffers for the total source and destination ranges.
|
||||||
|
@ -102,16 +105,18 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg
|
||||||
dst_offset_max - dst_offset_min, true, false);
|
dst_offset_max - dst_offset_min, true, false);
|
||||||
|
|
||||||
// Apply found buffer base.
|
// Apply found buffer base.
|
||||||
for (auto& vk_copy : batch_copies) {
|
const auto vk_copies = std::span{copies}.subspan(batch_start, batch_end - batch_start);
|
||||||
vk_copy.srcOffset = vk_copy.srcOffset - src_offset_min + src_buf_offset;
|
for (auto& copy : vk_copies) {
|
||||||
vk_copy.dstOffset = vk_copy.dstOffset - dst_offset_min + dst_buf_offset;
|
copy.srcOffset = copy.srcOffset - src_offset_min + src_buf_offset;
|
||||||
|
copy.dstOffset = copy.dstOffset - dst_offset_min + dst_buf_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute buffer copies.
|
// Execute buffer copies.
|
||||||
LOG_TRACE(Render_Vulkan, "HLE buffer copy: src_size = {}, dst_size = {}",
|
LOG_TRACE(Render_Vulkan, "HLE buffer copy: src_size = {}, dst_size = {}",
|
||||||
src_offset_max - src_offset_min, dst_offset_max - dst_offset_min);
|
src_offset_max - src_offset_min, dst_offset_max - dst_offset_min);
|
||||||
scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), batch_copies);
|
scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), vk_copies);
|
||||||
batch_copies.clear();
|
batch_start = batch_end;
|
||||||
|
++batch_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
scheduler.CommandBuffer().pipelineBarrier(
|
scheduler.CommandBuffer().pipelineBarrier(
|
||||||
|
|
Loading…
Reference in a new issue