Merge pull request #201 from shadps4-emu/stabilization_4

Proper color buffers color handling and various fixes
This commit is contained in:
georgemoralis 2024-06-16 01:18:43 +03:00 committed by GitHub
commit 3552484b33
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 164 additions and 45 deletions

View file

@ -51,4 +51,7 @@ enum MarkersPallete : int {
#define GPU_SCOPE_LOCATION(name, color) \ #define GPU_SCOPE_LOCATION(name, color) \
tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color}; tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color};
#define MUTEX_LOCATION(name) \
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
#define FRAME_END FrameMark #define FRAME_END FrameMark

View file

@ -622,7 +622,6 @@ int PS4_SYSV_ABI sceGnmGetShaderStatus() {
VAddr PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress() { VAddr PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress() {
LOG_TRACE(Lib_GnmDriver, "called"); LOG_TRACE(Lib_GnmDriver, "called");
// Actual virtual buffer address is hardcoded in the driver to 0xff00'000
return tessellation_factors_ring_addr; return tessellation_factors_ring_addr;
} }
@ -964,15 +963,16 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1 0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1
0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0 0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0
0x7e020b01u, // v_cvt_f32_i32 v1, v1 0x7e020b01u, // v_cvt_f32_i32 v1, v1
0x7E000B00U, 0x7e000b00U, // v_cvt_f32_i32 v0, v0
0x7e040280u, // v_cvt_f32_i32 v0, v0 0x7e040280u, // v_mov_b32 v2, 0
0x7e0602f2u, // v_mov_b32 v3, 1.0 0x7e0602f2u, // v_mov_b32 v3, 1.0
0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done 0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done
0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3 0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3
0xbf810000u, // s_endpgm 0xbf810000u, // s_endpgm
// OrbShdr header // Binary header
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
// VS regs
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u, 0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u,
}; };
// clang-format on // clang-format on
@ -1512,9 +1512,9 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
const auto& ccb_span = std::span<const u32>{ccb, ccb_size_dw}; const auto& ccb_span = std::span<const u32>{ccb, ccb_size_dw};
if (Config::dumpPM4()) { if (Config::dumpPM4()) {
static auto last_frame_num = frames_submitted; static auto last_frame_num = -1LL;
static u32 seq_num{}; static u32 seq_num{};
if (last_frame_num && last_frame_num == frames_submitted) { if (last_frame_num == frames_submitted) {
++seq_num; ++seq_num;
} else { } else {
last_frame_num = frames_submitted; last_frame_num = frames_submitted;

View file

@ -429,7 +429,11 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut
int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr); int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr);
static auto mutex_loc = MUTEX_LOCATION("mutex");
(*mutex)->tracy_lock = std::make_unique<tracy::LockableCtx>(&mutex_loc);
if (name != nullptr) { if (name != nullptr) {
(*mutex)->tracy_lock->CustomName(name, std::strlen(name));
LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result); LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result);
} }
@ -526,7 +530,11 @@ int PS4_SYSV_ABI scePthreadMutexattrSetprotocol(ScePthreadMutexattr* attr, int p
UNREACHABLE_MSG("Invalid protocol: {}", protocol); UNREACHABLE_MSG("Invalid protocol: {}", protocol);
} }
#if _WIN64
int result = 0;
#else
int result = pthread_mutexattr_setprotocol(&(*attr)->pth_mutex_attr, pprotocol); int result = pthread_mutexattr_setprotocol(&(*attr)->pth_mutex_attr, pprotocol);
#endif
(*attr)->pprotocol = pprotocol; (*attr)->pprotocol = pprotocol;
return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL; return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL;
} }
@ -537,10 +545,15 @@ int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) {
return SCE_KERNEL_ERROR_EINVAL; return SCE_KERNEL_ERROR_EINVAL;
} }
(*mutex)->tracy_lock->BeforeLock();
int result = pthread_mutex_lock(&(*mutex)->pth_mutex); int result = pthread_mutex_lock(&(*mutex)->pth_mutex);
if (result != 0) { if (result != 0) {
LOG_TRACE(Kernel_Pthread, "Locked name={}, result={}", (*mutex)->name, result); LOG_TRACE(Kernel_Pthread, "Locked name={}, result={}", (*mutex)->name, result);
} }
(*mutex)->tracy_lock->AfterLock();
switch (result) { switch (result) {
case 0: case 0:
return SCE_OK; return SCE_OK;
@ -565,6 +578,9 @@ int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) {
if (result != 0) { if (result != 0) {
LOG_TRACE(Kernel_Pthread, "Unlocking name={}, result={}", (*mutex)->name, result); LOG_TRACE(Kernel_Pthread, "Unlocking name={}, result={}", (*mutex)->name, result);
} }
(*mutex)->tracy_lock->AfterUnlock();
switch (result) { switch (result) {
case 0: case 0:
return SCE_OK; return SCE_OK;
@ -1095,6 +1111,9 @@ int PS4_SYSV_ABI scePthreadMutexTrylock(ScePthreadMutex* mutex) {
if (result != 0) { if (result != 0) {
LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
} }
(*mutex)->tracy_lock->AfterTryLock(result == 0);
switch (result) { switch (result) {
case 0: case 0:
return ORBIS_OK; return ORBIS_OK;

View file

@ -9,6 +9,7 @@
#include <vector> #include <vector>
#include <pthread.h> #include <pthread.h>
#include <sched.h> #include <sched.h>
#include "common/debug.h"
#include "common/types.h" #include "common/types.h"
namespace Core::Loader { namespace Core::Loader {
@ -72,6 +73,7 @@ struct PthreadMutexInternal {
u8 reserved[256]; u8 reserved[256];
std::string name; std::string name;
pthread_mutex_t pth_mutex; pthread_mutex_t pth_mutex;
std::unique_ptr<tracy::LockableCtx> tracy_lock;
}; };
struct PthreadMutexattrInternal { struct PthreadMutexattrInternal {

View file

@ -134,6 +134,7 @@ int VideoOutDriver::RegisterBuffers(VideoOutPort* port, s32 startIndex, void* co
.address_right = 0, .address_right = 0,
}; };
renderer->RegisterVideoOutSurface(group, address);
LOG_INFO(Lib_VideoOut, "buffers[{}] = {:#x}", i + startIndex, address); LOG_INFO(Lib_VideoOut, "buffers[{}] = {:#x}", i + startIndex, address);
} }

View file

@ -49,7 +49,9 @@ struct Liverpool {
using UserData = std::array<u32, NumShaderUserData>; using UserData = std::array<u32, NumShaderUserData>;
struct BinaryInfo { struct BinaryInfo {
u8 signature[7]; static constexpr u8 signature_ref[] = {0x4f, 0x72, 0x62, 0x53, 0x68, 0x64, 0x72}; // OrbShdr
std::array<u8, sizeof(signature_ref)> signature;
u8 version; u8 version;
u32 pssl_or_cg : 1; u32 pssl_or_cg : 1;
u32 cached : 1; u32 cached : 1;
@ -65,6 +67,11 @@ struct Liverpool {
u8 reserved3; u8 reserved3;
u64 shader_hash; u64 shader_hash;
u32 crc32; u32 crc32;
bool Valid() const {
return shader_hash && crc32 &&
(std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0);
}
}; };
struct ShaderProgram { struct ShaderProgram {
@ -134,6 +141,14 @@ struct Liverpool {
} }
}; };
template <typename Shader>
static constexpr auto* GetBinaryInfo(const Shader& sh) {
const auto* code = sh.template Address<u32>();
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
return bininfo;
}
union PsInputControl { union PsInputControl {
u32 raw; u32 raw;
BitField<0, 5, u32> input_offset; BitField<0, 5, u32> input_offset;

View file

@ -277,6 +277,7 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color) {
} }
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) { num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32G32B32A32Sfloat; return vk::Format::eR32G32B32A32Sfloat;
@ -291,7 +292,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
} }
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) { num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eB8G8R8A8Srgb; return vk::Format::eR8G8B8A8Srgb;
} }
if (data_format == AmdGpu::DataFormat::Format32_32_32 && if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) { num_format == AmdGpu::NumberFormat::Float) {
@ -353,6 +354,31 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
} }
vk::Format AdjustColorBufferFormat(vk::Format base_format,
Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface) {
ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard ||
comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate,
"Unsupported component swap mode {}", static_cast<u32>(comp_swap));
const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate;
switch (base_format) {
case vk::Format::eR8G8B8A8Unorm:
return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm : base_format;
case vk::Format::eB8G8R8A8Unorm:
return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm : base_format;
case vk::Format::eR8G8B8A8Srgb:
return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm
: is_vo_surface ? vk::Format::eR8G8B8A8Unorm
: base_format;
case vk::Format::eB8G8R8A8Srgb:
return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm
: is_vo_surface ? vk::Format::eB8G8R8A8Unorm
: base_format;
}
UNREACHABLE_MSG("Unsupported base format {}", vk::to_string(base_format));
}
vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) { vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) {
if (z_format == DepthBuffer::ZFormat::Z32Float && if (z_format == DepthBuffer::ZFormat::Z32Float &&
stencil_format == DepthBuffer::StencilFormat::Stencil8) { stencil_format == DepthBuffer::StencilFormat::Stencil8) {

View file

@ -40,6 +40,9 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color);
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
vk::Format AdjustColorBufferFormat(vk::Format base_format,
Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface);
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format); Liverpool::DepthBuffer::StencilFormat stencil_format);

View file

@ -192,19 +192,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
return true; return true;
} }
Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address) {
// Request presentation image from the texture cache.
const auto info = VideoCore::ImageInfo{attribute};
auto& image = texture_cache.FindImage(info, cpu_address);
return PrepareFrameInternal(image);
}
Frame* RendererVulkan::PrepareBlankFrame() {
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
return PrepareFrameInternal(image);
}
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
// Request a free presentation frame. // Request a free presentation frame.
Frame* frame = GetRenderFrame(); Frame* frame = GetRenderFrame();

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <condition_variable> #include <condition_variable>
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/vk_swapchain.h"
@ -38,8 +39,28 @@ public:
~RendererVulkan(); ~RendererVulkan();
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address); VAddr cpu_address) {
Frame* PrepareBlankFrame(); auto& image = RegisterVideoOutSurface(attribute, cpu_address);
return PrepareFrameInternal(image);
}
Frame* PrepareBlankFrame() {
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
return PrepareFrameInternal(image);
}
VideoCore::Image& RegisterVideoOutSurface(
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address);
const auto info = VideoCore::ImageInfo{attribute};
return texture_cache.FindImage(info, cpu_address);
}
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
return std::find_if(vo_buffers_addr.cbegin(), vo_buffers_addr.cend(), [&](VAddr vo_buffer) {
return vo_buffer == color_buffer.Address();
}) != vo_buffers_addr.cend();
}
bool ShowSplash(Frame* frame = nullptr); bool ShowSplash(Frame* frame = nullptr);
void Present(Frame* frame); void Present(Frame* frame);
@ -63,6 +84,7 @@ private:
std::condition_variable free_cv; std::condition_variable free_cv;
std::condition_variable_any frame_cv; std::condition_variable_any frame_cv;
std::optional<VideoCore::Image> splash_img; std::optional<VideoCore::Image> splash_img;
std::vector<VAddr> vo_buffers_addr;
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -95,8 +95,9 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
const u32 size = vsharp.GetSize(); const u32 size = vsharp.GetSize();
const VAddr addr = vsharp.base_address.Value(); const VAddr addr = vsharp.base_address.Value();
texture_cache.OnCpuWrite(addr); texture_cache.OnCpuWrite(addr);
const u32 offset = const u32 offset = staging.Copy(addr, size,
staging.Copy(addr, size, buffer.is_storage ? 4 : instance.UniformMinAlignment()); buffer.is_storage ? instance.StorageMinAlignment()
: instance.UniformMinAlignment());
// const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr); // const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
buffer_infos.emplace_back(staging.Handle(), offset, size); buffer_infos.emplace_back(staging.Handle(), offset, size);
set_writes.push_back({ set_writes.push_back({

View file

@ -327,7 +327,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset); const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const u32 size = vsharp.GetSize(); const u32 size = vsharp.GetSize();
const u32 offset = staging.Copy(vsharp.base_address.Value(), size, const u32 offset = staging.Copy(vsharp.base_address.Value(), size,
buffer.is_storage ? 4 : instance.UniformMinAlignment()); buffer.is_storage ? instance.StorageMinAlignment()
: instance.UniformMinAlignment());
buffer_infos.emplace_back(staging.Handle(), offset, size); buffer_infos.emplace_back(staging.Handle(), offset, size);
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
@ -399,7 +400,7 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
}; };
// Calculate buffers memory overlaps // Calculate buffers memory overlaps
std::vector<BufferRange> ranges{}; boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
for (const auto& input : vs_info.vs_inputs) { for (const auto& input : vs_info.vs_inputs) {
const auto& buffer = guest_buffers.emplace_back( const auto& buffer = guest_buffers.emplace_back(
vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset)); vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset));

View file

@ -71,7 +71,7 @@ public:
} }
[[nodiscard]] bool IsEmbeddedVs() const noexcept { [[nodiscard]] bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x59c556606a027efd; static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[0] == EmbeddedVsHash; return key.stage_hashes[0] == EmbeddedVsHash;
} }

View file

@ -213,6 +213,7 @@ bool Instance::CreateDevice() {
}, },
vk::PhysicalDeviceVulkan12Features{ vk::PhysicalDeviceVulkan12Features{
.scalarBlockLayout = true, .scalarBlockLayout = true,
.uniformBufferStandardLayout = true,
.hostQueryReset = true, .hostQueryReset = true,
.timelineSemaphore = true, .timelineSemaphore = true,
}, },

View file

@ -169,6 +169,11 @@ public:
return properties.limits.minUniformBufferOffsetAlignment; return properties.limits.minUniformBufferOffsetAlignment;
} }
/// Returns the minimum required alignment for storage buffers
vk::DeviceSize StorageMinAlignment() const {
return properties.limits.minStorageBufferOffsetAlignment;
}
/// Returns the minimum alignemt required for accessing host-mapped device memory /// Returns the minimum alignemt required for accessing host-mapped device memory
vk::DeviceSize NonCoherentAtomSize() const { vk::DeviceSize NonCoherentAtomSize() const {
return properties.limits.nonCoherentAtomSize; return properties.limits.nonCoherentAtomSize;

View file

@ -1,7 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <xxhash.h>
#include "common/config.h" #include "common/config.h"
#include "common/io_file.h" #include "common/io_file.h"
#include "common/path_util.h" #include "common/path_util.h"
@ -9,11 +8,14 @@
#include "shader_recompiler/exception.h" #include "shader_recompiler/exception.h"
#include "shader_recompiler/recompiler.h" #include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
namespace Vulkan { namespace Vulkan {
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data, Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
@ -74,8 +76,8 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
const ComputePipeline* PipelineCache::GetComputePipeline() { const ComputePipeline* PipelineCache::GetComputePipeline() {
const auto& cs_pgm = liverpool->regs.cs_program; const auto& cs_pgm = liverpool->regs.cs_program;
ASSERT(cs_pgm.Address() != nullptr); ASSERT(cs_pgm.Address() != nullptr);
const auto code = cs_pgm.Code(); const auto* bininfo = Liverpool::GetBinaryInfo(cs_pgm);
compute_key = XXH3_64bits(code.data(), code.size_bytes()); compute_key = bininfo->shader_hash;
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
if (is_new) { if (is_new) {
it.value() = CreateComputePipeline(); it.value() = CreateComputePipeline();
@ -130,8 +132,11 @@ void PipelineCache::RefreshGraphicsKey() {
if (!col_buf) { if (!col_buf) {
continue; continue;
} }
key.color_formats[remapped_cb] = const auto base_format =
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf);
key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat(
base_format, col_buf.info.comp_swap.Value(), is_vo_surface);
key.blend_controls[remapped_cb] = regs.blend_control[cb]; key.blend_controls[remapped_cb] = regs.blend_control[cb];
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
!col_buf.info.blend_bypass); !col_buf.info.blend_bypass);
@ -147,8 +152,8 @@ void PipelineCache::RefreshGraphicsKey() {
key.stage_hashes[i] = 0; key.stage_hashes[i] = 0;
continue; continue;
} }
const auto code = pgm->Code(); const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
key.stage_hashes[i] = XXH3_64bits(code.data(), code.size_bytes()); key.stage_hashes[i] = bininfo->shader_hash;
} }
} }
@ -243,7 +248,7 @@ void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stag
if (!std::filesystem::exists(dump_dir)) { if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir); std::filesystem::create_directories(dump_dir);
} }
const auto filename = fmt::format("{}_{:#X}.{}", stage, hash, ext); const auto filename = fmt::format("{}_{:#018x}.{}", stage, hash, ext);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteSpan(code); file.WriteSpan(code);
} }

View file

@ -124,9 +124,9 @@ void Swapchain::FindPresentFormat() {
const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface); const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface);
// If there is a single undefined surface format, the device doesn't care, so we'll just use // If there is a single undefined surface format, the device doesn't care, so we'll just use
// RGBA. // RGBA sRGB.
if (formats[0].format == vk::Format::eUndefined) { if (formats[0].format == vk::Format::eUndefined) {
surface_format.format = vk::Format::eR8G8B8A8Unorm; surface_format.format = vk::Format::eR8G8B8A8Srgb;
surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear; surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
return; return;
} }
@ -134,7 +134,7 @@ void Swapchain::FindPresentFormat() {
// Try to find a suitable format. // Try to find a suitable format.
for (const vk::SurfaceFormatKHR& sformat : formats) { for (const vk::SurfaceFormatKHR& sformat : formats) {
vk::Format format = sformat.format; vk::Format format = sformat.format;
if (format != vk::Format::eR8G8B8A8Unorm && format != vk::Format::eB8G8R8A8Unorm) { if (format != vk::Format::eR8G8B8A8Srgb && format != vk::Format::eB8G8R8A8Srgb) {
continue; continue;
} }

View file

@ -22,7 +22,7 @@ static vk::Format ConvertPixelFormat(const VideoOutFormat format) {
case VideoOutFormat::A8R8G8B8Srgb: case VideoOutFormat::A8R8G8B8Srgb:
return vk::Format::eB8G8R8A8Srgb; return vk::Format::eB8G8R8A8Srgb;
case VideoOutFormat::A8B8G8R8Srgb: case VideoOutFormat::A8B8G8R8Srgb:
return vk::Format::eA8B8G8R8SrgbPack32; return vk::Format::eR8G8B8A8Srgb;
case VideoOutFormat::A2R10G10B10: case VideoOutFormat::A2R10G10B10:
case VideoOutFormat::A2R10G10B10Srgb: case VideoOutFormat::A2R10G10B10Srgb:
return vk::Format::eA2R10G10B10UnormPack32; return vk::Format::eA2R10G10B10UnormPack32;
@ -57,6 +57,17 @@ bool ImageInfo::IsBlockCoded() const {
} }
} }
bool ImageInfo::IsPacked() const {
switch (pixel_format) {
case vk::Format::eB5G5R5A1UnormPack16:
[[fallthrough]];
case vk::Format::eB5G6R5UnormPack16:
return true;
default:
return false;
}
}
bool ImageInfo::IsDepthStencil() const { bool ImageInfo::IsDepthStencil() const {
switch (pixel_format) { switch (pixel_format) {
case vk::Format::eD16Unorm: case vk::Format::eD16Unorm:
@ -76,7 +87,7 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
if (info.IsDepthStencil()) { if (info.IsDepthStencil()) {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else { } else {
if (!info.IsBlockCoded()) { if (!info.IsBlockCoded() && !info.IsPacked()) {
usage |= vk::ImageUsageFlagBits::eColorAttachment; usage |= vk::ImageUsageFlagBits::eColorAttachment;
} }
} }
@ -110,8 +121,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
size.width = attrib.width; size.width = attrib.width;
size.height = attrib.height; size.height = attrib.height;
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7; pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7;
const bool is_32bpp = pixel_format == vk::Format::eB8G8R8A8Srgb || const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float;
pixel_format == vk::Format::eA8B8G8R8SrgbPack32;
ASSERT(is_32bpp); ASSERT(is_32bpp);
if (!is_tiled) { if (!is_tiled) {
guest_size_bytes = pitch * size.height * 4; guest_size_bytes = pitch * size.height * 4;
@ -122,6 +132,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
} else { } else {
guest_size_bytes = pitch * 128 * ((size.height + 63) & (~63)) * 4; guest_size_bytes = pitch * 128 * ((size.height + 63) & (~63)) * 4;
} }
is_vo_surface = true;
} }
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,

View file

@ -43,10 +43,12 @@ struct ImageInfo {
explicit ImageInfo(const AmdGpu::Image& image) noexcept; explicit ImageInfo(const AmdGpu::Image& image) noexcept;
bool IsBlockCoded() const; bool IsBlockCoded() const;
bool IsPacked() const;
bool IsDepthStencil() const; bool IsDepthStencil() const;
bool is_tiled = false; bool is_tiled = false;
bool is_storage = false; bool is_storage = false;
bool is_vo_surface = false;
vk::Format pixel_format = vk::Format::eUndefined; vk::Format pixel_format = vk::Format::eUndefined;
vk::ImageType type = vk::ImageType::e1D; vk::ImageType type = vk::ImageType::e1D;
vk::ImageUsageFlags usage; vk::ImageUsageFlags usage;

View file

@ -62,6 +62,14 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexce
} }
} }
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
bool is_vo_surface) noexcept {
const auto base_format =
Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat());
format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(
base_format, col_buffer.info.comp_swap.Value(), is_vo_surface);
}
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
std::optional<vk::ImageUsageFlags> usage_override /*= {}*/) std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
: info{info_} { : info{info_} {

View file

@ -3,6 +3,7 @@
#pragma once #pragma once
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h" #include "video_core/texture_cache/types.h"
@ -19,6 +20,8 @@ namespace VideoCore {
struct ImageViewInfo { struct ImageViewInfo {
explicit ImageViewInfo() = default; explicit ImageViewInfo() = default;
explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept;
explicit ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
bool is_vo_surface) noexcept;
vk::ImageViewType type = vk::ImageViewType::e2D; vk::ImageViewType type = vk::ImageViewType::e2D;
vk::Format format = vk::Format::eR8G8B8A8Unorm; vk::Format format = vk::Format::eR8G8B8A8Unorm;

View file

@ -183,8 +183,7 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eColorAttachmentWrite |
vk::AccessFlagBits::eColorAttachmentRead); vk::AccessFlagBits::eColorAttachmentRead);
ImageViewInfo view_info; ImageViewInfo view_info{buffer, image.info.is_vo_surface};
view_info.format = info.pixel_format;
return RegisterImageView(image, view_info); return RegisterImageView(image, view_info);
} }

View file

@ -178,8 +178,12 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
switch (format) { switch (format) {
case vk::Format::eR8Unorm: case vk::Format::eR8Unorm:
return vk::Format::eR8Uint; return vk::Format::eR8Uint;
case vk::Format::eR8G8B8A8Srgb:
[[fallthrough]];
case vk::Format::eB8G8R8A8Srgb: case vk::Format::eB8G8R8A8Srgb:
[[fallthrough]]; [[fallthrough]];
case vk::Format::eB8G8R8A8Unorm:
[[fallthrough]];
case vk::Format::eR8G8B8A8Unorm: case vk::Format::eR8G8B8A8Unorm:
return vk::Format::eR32Uint; return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock: case vk::Format::eBc1RgbaUnormBlock:
@ -315,7 +319,8 @@ bool TileManager::TryDetile(Image& image) {
return false; return false;
} }
const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4); const auto offset =
staging.Copy(image.cpu_addr, image.info.guest_size_bytes, instance.StorageMinAlignment());
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
auto cmdbuf = scheduler.CommandBuffer(); auto cmdbuf = scheduler.CommandBuffer();