Build stabilization (#413)

* shader_recompiler: fix for float convert and debug asserts

* libraries: kernel: correct return code on invalid semaphore

* amdgpu: additional case for cb extents retrieval heuristic

* removed redundant check in assert

* amdgpu: fix for linear tiling mode detection fin color buffers

* texture_cache: fix for unexpected scheduler flushes by detiler

* renderer_vulkan: missing depth barrier

* texture_cache: missed slices in rt view; + detiler format
This commit is contained in:
psucien 2024-08-12 16:23:01 +02:00 committed by GitHub
parent ace39957ef
commit 3d0fdf11f0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 69 additions and 25 deletions

View file

@ -174,10 +174,16 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3
} }
s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) { s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) {
if (!sem) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
return sem->Wait(true, needCount, pTimeout); return sem->Wait(true, needCount, pTimeout);
} }
s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
if (!sem) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
if (!sem->Signal(signalCount)) { if (!sem->Signal(signalCount)) {
return ORBIS_KERNEL_ERROR_EINVAL; return ORBIS_KERNEL_ERROR_EINVAL;
} }
@ -185,10 +191,16 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
} }
s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) {
if (!sem) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
return sem->Wait(false, needCount, nullptr); return sem->Wait(false, needCount, nullptr);
} }
int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) { int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) {
if (!sem) {
return ORBIS_KERNEL_ERROR_ESRCH;
}
return sem->Cancel(setCount, pNumWaitThreads); return sem->Cancel(setCount, pNumWaitThreads);
} }

View file

@ -386,19 +386,12 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
if (is_signed) { if (is_signed) {
value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset,
ctx.ConstU32(bit_width)); ctx.ConstU32(bit_width));
value = ctx.OpConvertSToF(ctx.F32[1], value);
} else { } else {
value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset, value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset,
ctx.ConstU32(bit_width)); ctx.ConstU32(bit_width));
value = ctx.OpConvertUToF(ctx.F32[1], value);
}
} else {
if (is_signed) {
value = ctx.OpConvertSToF(ctx.F32[1], value);
} else {
value = ctx.OpConvertUToF(ctx.F32[1], value);
} }
} }
value = ctx.OpBitcast(ctx.F32[1], value);
return ConvertValue(ctx, value, num_format, bit_width); return ConvertValue(ctx, value, num_format, bit_width);
} }
break; break;

View file

@ -33,14 +33,14 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
operands.operands); operands.operands);
} }
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
Id offset) { Id offset) {
const auto& texture = ctx.images[handle & 0xFFFF]; const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords,
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); spv::ImageOperandsMask::Lod, lod);
} }
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,

View file

@ -359,7 +359,7 @@ Id EmitConvertU32U16(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset); Id offset);
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
Id offset); Id offset);
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias_lc, const IR::Value& offset); Id bias_lc, const IR::Value& offset);

View file

@ -376,9 +376,11 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
return -1; return -1;
} }
// We have found this pattern. Build the sharp. // We have found this pattern. Build the sharp.
std::array<u64, 2> buffer; std::array<u32, 4> buffer;
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32(); buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32; buffer[1] = 0;
buffer[2] = handle->Arg(2).U32();
buffer[3] = handle->Arg(3).U32();
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer); cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
// Assign a binding to this sharp. // Assign a binding to this sharp.
return descriptors.Add(BufferResource{ return descriptors.Add(BufferResource{

View file

@ -116,7 +116,7 @@ struct PushData {
std::array<u8, 32> buf_offsets; std::array<u8, 32> buf_offsets;
void AddOffset(u32 binding, u32 offset) { void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 64 && binding < 32); ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset; buf_offsets[binding] = offset;
} }
}; };

View file

@ -237,7 +237,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
ASSERT(col_buf_id < NumColorBuffers); ASSERT(col_buf_id < NumColorBuffers);
const auto nop_offset = header->type3.count; const auto nop_offset = header->type3.count;
if (nop_offset == 0x0e || nop_offset == 0x0d) { if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
ASSERT_MSG(payload[nop_offset] == 0xc0001000, ASSERT_MSG(payload[nop_offset] == 0xc0001000,
"NOP hint is missing in CB setup sequence"); "NOP hint is missing in CB setup sequence");
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];

View file

@ -766,7 +766,7 @@ struct Liverpool {
} }
TilingMode GetTilingMode() const { TilingMode GetTilingMode() const {
return attrib.tile_mode_index; return info.linear_general ? TilingMode::Display_Linear : attrib.tile_mode_index;
} }
bool IsTiled() const { bool IsTiled() const {

View file

@ -146,6 +146,10 @@ public:
return offset; return offset;
} }
u64 GetFreeSize() const {
return size_bytes - offset - mapped_size;
}
private: private:
struct Watch { struct Watch {
u64 tick{}; u64 tick{};

View file

@ -152,7 +152,8 @@ void Rasterizer::BeginRendering() {
.stencil = regs.stencil_clear}}, .stencil = regs.stencil_clear}},
}; };
texture_cache.TouchMeta(htile_address, false); texture_cache.TouchMeta(htile_address, false);
state.num_depth_attachments++; state.has_depth = true;
state.has_stencil = image.info.usage.stencil;
} }
scheduler.BeginRendering(state); scheduler.BeginRendering(state);
} }

View file

@ -38,8 +38,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
.layerCount = 1, .layerCount = 1,
.colorAttachmentCount = render_state.num_color_attachments, .colorAttachmentCount = render_state.num_color_attachments,
.pColorAttachments = render_state.color_attachments.data(), .pColorAttachments = render_state.color_attachments.data(),
.pDepthAttachment = .pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr,
render_state.num_depth_attachments ? &render_state.depth_attachment : nullptr,
}; };
current_cmdbuf.beginRendering(rendering_info); current_cmdbuf.beginRendering(rendering_info);
@ -50,6 +49,8 @@ void Scheduler::EndRendering() {
return; return;
} }
is_rendering = false; is_rendering = false;
current_cmdbuf.endRendering();
boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers; boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers;
for (size_t i = 0; i < render_state.num_color_attachments; ++i) { for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
barriers.push_back(vk::ImageMemoryBarrier{ barriers.push_back(vk::ImageMemoryBarrier{
@ -70,10 +71,35 @@ void Scheduler::EndRendering() {
}, },
}); });
} }
current_cmdbuf.endRendering(); if (render_state.has_depth) {
barriers.push_back(vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.oldLayout = render_state.depth_attachment.imageLayout,
.newLayout = render_state.depth_attachment.imageLayout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = render_state.depth_image,
.subresourceRange =
{
.aspectMask = vk::ImageAspectFlagBits::eDepth |
(render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil
: vk::ImageAspectFlagBits::eNone),
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
}
if (!barriers.empty()) { if (!barriers.empty()) {
current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, const auto src_stages =
vk::PipelineStageFlagBits::eFragmentShader, vk::PipelineStageFlagBits::eColorAttachmentOutput |
(render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests |
vk::PipelineStageFlagBits::eEarlyFragmentTests
: vk::PipelineStageFlagBits::eNone);
current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader,
vk::DependencyFlagBits::eByRegion, {}, {}, barriers); vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
} }
} }

View file

@ -20,7 +20,8 @@ struct RenderState {
vk::RenderingAttachmentInfo depth_attachment{}; vk::RenderingAttachmentInfo depth_attachment{};
vk::Image depth_image{}; vk::Image depth_image{};
u32 num_color_attachments{}; u32 num_color_attachments{};
u32 num_depth_attachments{}; bool has_depth{};
bool has_stencil{};
u32 width = std::numeric_limits<u32>::max(); u32 width = std::numeric_limits<u32>::max();
u32 height = std::numeric_limits<u32>::max(); u32 height = std::numeric_limits<u32>::max();

View file

@ -189,6 +189,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
resources.layers = num_slices; resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
usage.depth_target = true; usage.depth_target = true;
usage.stencil =
buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
guest_address = buffer.Address(); guest_address = buffer.Address();
const auto depth_slice_sz = buffer.GetDepthSliceSize(); const auto depth_slice_sz = buffer.GetDepthSliceSize();
@ -260,7 +262,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
case AmdGpu::TilingMode::Display_MacroTiled: case AmdGpu::TilingMode::Display_MacroTiled:
case AmdGpu::TilingMode::Texture_MacroTiled: case AmdGpu::TilingMode::Texture_MacroTiled:
case AmdGpu::TilingMode::Depth_MacroTiled: { case AmdGpu::TilingMode::Depth_MacroTiled: {
// ASSERT(!props.is_cube && !props.is_block); ASSERT(!props.is_block);
ASSERT(num_samples == 1); ASSERT(num_samples == 1);
std::tie(mip_info.pitch, mip_info.size) = std::tie(mip_info.pitch, mip_info.size) =
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);

View file

@ -92,6 +92,8 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
bool is_vo_surface) noexcept { bool is_vo_surface) noexcept {
const auto base_format = const auto base_format =
Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat());
range.base.layer = col_buffer.view.slice_start;
range.extent.layers = col_buffer.NumSlices();
format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(
base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); base_format, col_buffer.info.comp_swap.Value(), is_vo_surface);
} }

View file

@ -194,6 +194,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR32G32Sfloat: case vk::Format::eR32G32Sfloat:
case vk::Format::eR32G32Uint: case vk::Format::eR32G32Uint:
case vk::Format::eR16G16B16A16Unorm: case vk::Format::eR16G16B16A16Unorm:
case vk::Format::eR16G16B16A16Sfloat:
return vk::Format::eR32G32Uint; return vk::Format::eR32G32Uint;
case vk::Format::eBc2SrgbBlock: case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock: case vk::Format::eBc2UnormBlock:
@ -397,7 +398,7 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
const u32 image_size = image.info.guest_size_bytes; const u32 image_size = image.info.guest_size_bytes;
const auto [in_buffer, in_offset] = [&] -> std::pair<vk::Buffer, u32> { const auto [in_buffer, in_offset] = [&] -> std::pair<vk::Buffer, u32> {
// Use stream buffer for smaller textures. // Use stream buffer for smaller textures.
if (image_size <= StreamBufferSize) { if (image_size <= stream_buffer.GetFreeSize()) {
u32 offset = stream_buffer.Copy(image.info.guest_address, image_size); u32 offset = stream_buffer.Copy(image.info.guest_address, image_size);
return {stream_buffer.Handle(), offset}; return {stream_buffer.Handle(), offset};
} }