mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-29 01:38:24 +00:00
texture_cache: Subresource uploads
This commit is contained in:
parent
54cae5dff8
commit
7702ceb8d1
|
@ -33,7 +33,7 @@ endif()
|
|||
|
||||
option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)
|
||||
option(ENABLE_DISCORD_RPC "Enable the Discord RPC integration" ON)
|
||||
CMAKE_DEPENDENT_OPTION(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" ON "NOT LINUX" OFF)
|
||||
CMAKE_DEPENDENT_OPTION(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" ON "NOT LINUX OR APPLE" OFF)
|
||||
|
||||
# First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR.
|
||||
if (APPLE AND CMAKE_OSX_ARCHITECTURES)
|
||||
|
|
|
@ -550,7 +550,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
sizeof(u32), false);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
LOG_WARNING(Render_Vulkan, "GDS memory read");
|
||||
LOG_DEBUG(Render_Vulkan, "GDS memory read");
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
||||
|
|
|
@ -137,6 +137,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
|
||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||
ASSERT(info.resources.layers * info.resources.levels <= 64);
|
||||
subres_state =
|
||||
std::numeric_limits<u64>::max() >> (64 - info.resources.levels * info.resources.layers);
|
||||
mip_hashes.resize(info.resources.levels);
|
||||
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
||||
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
|
||||
|
|
|
@ -91,9 +91,24 @@ struct Image {
|
|||
return image_view_ids[std::distance(image_view_infos.begin(), it)];
|
||||
}
|
||||
|
||||
void ForEachSubresource(VAddr addr, size_t size, auto&& func) {
|
||||
const u32 num_layers = info.resources.layers;
|
||||
for (u32 m = 0; const auto& mip : info.mips_layout) {
|
||||
for (u32 l = 0; l < num_layers; l++) {
|
||||
const VAddr mip_addr = info.guest_address + mip.offset * num_layers + mip.size * l;
|
||||
const VAddr mip_addr_end = mip_addr + mip.size;
|
||||
if (mip_addr < addr + size && addr < mip_addr_end) {
|
||||
func(m * num_layers + l);
|
||||
}
|
||||
}
|
||||
m++;
|
||||
}
|
||||
}
|
||||
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
|
||||
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);
|
||||
|
||||
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
|
||||
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
|
||||
void Upload(vk::Buffer buffer, u64 offset);
|
||||
|
@ -111,6 +126,7 @@ struct Image {
|
|||
VAddr cpu_addr_end = 0;
|
||||
std::vector<ImageViewInfo> image_view_infos;
|
||||
std::vector<ImageViewId> image_view_ids;
|
||||
u64 subres_state{};
|
||||
|
||||
// Resource state tracking
|
||||
vk::ImageUsageFlags usage;
|
||||
|
|
|
@ -46,8 +46,10 @@ TextureCache::~TextureCache() = default;
|
|||
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
|
||||
std::scoped_lock lock{mutex};
|
||||
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
|
||||
// Ensure image is reuploaded when accessed again.
|
||||
// Mark any subresources as dirty.
|
||||
image.flags |= ImageFlagBits::CpuDirty;
|
||||
image.ForEachSubresource(address, size,
|
||||
[&](u32 index) { image.subres_state |= 1ULL << index; });
|
||||
// Untrack image, so the range is unprotected and the guest can write freely.
|
||||
UntrackImage(image_id);
|
||||
});
|
||||
|
@ -57,12 +59,13 @@ void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) {
|
|||
std::scoped_lock lock{mutex};
|
||||
ForEachImageInRegion(address, max_size, [&](ImageId image_id, Image& image) {
|
||||
// Only consider images that match base address.
|
||||
// TODO: Maybe also consider subresources
|
||||
if (image.info.guest_address != address) {
|
||||
return;
|
||||
}
|
||||
// Ensure image is reuploaded when accessed again.
|
||||
// Mark any subresources as dirty.
|
||||
image.flags |= ImageFlagBits::GpuDirty;
|
||||
image.ForEachSubresource(address, max_size,
|
||||
[&](u32 index) { image.subres_state |= 1ULL << index; });
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -375,12 +378,18 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
|||
return;
|
||||
}
|
||||
|
||||
const auto& num_layers = image.info.resources.layers;
|
||||
const auto& num_mips = image.info.resources.levels;
|
||||
const u32 num_layers = image.info.resources.layers;
|
||||
const u32 num_mips = image.info.resources.levels;
|
||||
ASSERT(num_mips == image.info.mips_layout.size());
|
||||
|
||||
boost::container::small_vector<vk::BufferImageCopy, 14> image_copy{};
|
||||
for (u32 m = 0; m < num_mips; m++) {
|
||||
const u32 mask = (1 << num_layers) - 1;
|
||||
const u64 subres_state = (image.subres_state >> (m * num_layers)) & mask;
|
||||
if (subres_state == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 width = std::max(image.info.size.width >> m, 1u);
|
||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
const u32 depth =
|
||||
|
@ -399,19 +408,40 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
|||
image.mip_hashes[m] = hash;
|
||||
}
|
||||
|
||||
image_copy.push_back({
|
||||
.bufferOffset = mip_ofs * num_layers,
|
||||
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<u32>(mip_height),
|
||||
.imageSubresource{
|
||||
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = num_layers,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
});
|
||||
if (subres_state == mask) {
|
||||
image_copy.push_back({
|
||||
.bufferOffset = mip_ofs * num_layers,
|
||||
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<u32>(mip_height),
|
||||
.imageSubresource{
|
||||
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = num_layers,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
});
|
||||
} else {
|
||||
for (u32 l = 0; l < num_layers; l++) {
|
||||
if (!(subres_state & (1 << l))) {
|
||||
continue;
|
||||
}
|
||||
image_copy.push_back({
|
||||
.bufferOffset = mip_ofs * num_layers + mip_size * l,
|
||||
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<u32>(mip_height),
|
||||
.imageSubresource{
|
||||
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = l,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (image_copy.empty()) {
|
||||
|
@ -447,6 +477,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
|||
|
||||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
image.flags &= ~ImageFlagBits::Dirty;
|
||||
image.subres_state = 0;
|
||||
}
|
||||
|
||||
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||
|
|
Loading…
Reference in a new issue