mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-22 14:31:39 +00:00
Merge pull request #201 from shadps4-emu/stabilization_4
Proper color buffers color handling and various fixes
This commit is contained in:
commit
3552484b33
|
@ -51,4 +51,7 @@ enum MarkersPallete : int {
|
||||||
#define GPU_SCOPE_LOCATION(name, color) \
|
#define GPU_SCOPE_LOCATION(name, color) \
|
||||||
tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color};
|
tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color};
|
||||||
|
|
||||||
|
#define MUTEX_LOCATION(name) \
|
||||||
|
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
|
||||||
|
|
||||||
#define FRAME_END FrameMark
|
#define FRAME_END FrameMark
|
||||||
|
|
|
@ -622,7 +622,6 @@ int PS4_SYSV_ABI sceGnmGetShaderStatus() {
|
||||||
|
|
||||||
VAddr PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress() {
|
VAddr PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress() {
|
||||||
LOG_TRACE(Lib_GnmDriver, "called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
// Actual virtual buffer address is hardcoded in the driver to 0xff00'000
|
|
||||||
return tessellation_factors_ring_addr;
|
return tessellation_factors_ring_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -964,15 +963,16 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1
|
0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1
|
||||||
0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0
|
0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0
|
||||||
0x7e020b01u, // v_cvt_f32_i32 v1, v1
|
0x7e020b01u, // v_cvt_f32_i32 v1, v1
|
||||||
0x7E000B00U,
|
0x7e000b00U, // v_cvt_f32_i32 v0, v0
|
||||||
0x7e040280u, // v_cvt_f32_i32 v0, v0
|
0x7e040280u, // v_mov_b32 v2, 0
|
||||||
0x7e0602f2u, // v_mov_b32 v3, 1.0
|
0x7e0602f2u, // v_mov_b32 v3, 1.0
|
||||||
0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done
|
0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done
|
||||||
0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3
|
0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3
|
||||||
0xbf810000u, // s_endpgm
|
0xbf810000u, // s_endpgm
|
||||||
|
|
||||||
// OrbShdr header
|
// Binary header
|
||||||
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
|
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
|
||||||
|
// VS regs
|
||||||
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u,
|
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u,
|
||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
@ -1512,9 +1512,9 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
|
||||||
const auto& ccb_span = std::span<const u32>{ccb, ccb_size_dw};
|
const auto& ccb_span = std::span<const u32>{ccb, ccb_size_dw};
|
||||||
|
|
||||||
if (Config::dumpPM4()) {
|
if (Config::dumpPM4()) {
|
||||||
static auto last_frame_num = frames_submitted;
|
static auto last_frame_num = -1LL;
|
||||||
static u32 seq_num{};
|
static u32 seq_num{};
|
||||||
if (last_frame_num && last_frame_num == frames_submitted) {
|
if (last_frame_num == frames_submitted) {
|
||||||
++seq_num;
|
++seq_num;
|
||||||
} else {
|
} else {
|
||||||
last_frame_num = frames_submitted;
|
last_frame_num = frames_submitted;
|
||||||
|
|
|
@ -429,7 +429,11 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut
|
||||||
|
|
||||||
int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr);
|
int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr);
|
||||||
|
|
||||||
|
static auto mutex_loc = MUTEX_LOCATION("mutex");
|
||||||
|
(*mutex)->tracy_lock = std::make_unique<tracy::LockableCtx>(&mutex_loc);
|
||||||
|
|
||||||
if (name != nullptr) {
|
if (name != nullptr) {
|
||||||
|
(*mutex)->tracy_lock->CustomName(name, std::strlen(name));
|
||||||
LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result);
|
LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -526,7 +530,11 @@ int PS4_SYSV_ABI scePthreadMutexattrSetprotocol(ScePthreadMutexattr* attr, int p
|
||||||
UNREACHABLE_MSG("Invalid protocol: {}", protocol);
|
UNREACHABLE_MSG("Invalid protocol: {}", protocol);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if _WIN64
|
||||||
|
int result = 0;
|
||||||
|
#else
|
||||||
int result = pthread_mutexattr_setprotocol(&(*attr)->pth_mutex_attr, pprotocol);
|
int result = pthread_mutexattr_setprotocol(&(*attr)->pth_mutex_attr, pprotocol);
|
||||||
|
#endif
|
||||||
(*attr)->pprotocol = pprotocol;
|
(*attr)->pprotocol = pprotocol;
|
||||||
return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL;
|
return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -537,10 +545,15 @@ int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) {
|
||||||
return SCE_KERNEL_ERROR_EINVAL;
|
return SCE_KERNEL_ERROR_EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*mutex)->tracy_lock->BeforeLock();
|
||||||
|
|
||||||
int result = pthread_mutex_lock(&(*mutex)->pth_mutex);
|
int result = pthread_mutex_lock(&(*mutex)->pth_mutex);
|
||||||
if (result != 0) {
|
if (result != 0) {
|
||||||
LOG_TRACE(Kernel_Pthread, "Locked name={}, result={}", (*mutex)->name, result);
|
LOG_TRACE(Kernel_Pthread, "Locked name={}, result={}", (*mutex)->name, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*mutex)->tracy_lock->AfterLock();
|
||||||
|
|
||||||
switch (result) {
|
switch (result) {
|
||||||
case 0:
|
case 0:
|
||||||
return SCE_OK;
|
return SCE_OK;
|
||||||
|
@ -565,6 +578,9 @@ int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) {
|
||||||
if (result != 0) {
|
if (result != 0) {
|
||||||
LOG_TRACE(Kernel_Pthread, "Unlocking name={}, result={}", (*mutex)->name, result);
|
LOG_TRACE(Kernel_Pthread, "Unlocking name={}, result={}", (*mutex)->name, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*mutex)->tracy_lock->AfterUnlock();
|
||||||
|
|
||||||
switch (result) {
|
switch (result) {
|
||||||
case 0:
|
case 0:
|
||||||
return SCE_OK;
|
return SCE_OK;
|
||||||
|
@ -1095,6 +1111,9 @@ int PS4_SYSV_ABI scePthreadMutexTrylock(ScePthreadMutex* mutex) {
|
||||||
if (result != 0) {
|
if (result != 0) {
|
||||||
LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*mutex)->tracy_lock->AfterTryLock(result == 0);
|
||||||
|
|
||||||
switch (result) {
|
switch (result) {
|
||||||
case 0:
|
case 0:
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
|
#include "common/debug.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
namespace Core::Loader {
|
namespace Core::Loader {
|
||||||
|
@ -72,6 +73,7 @@ struct PthreadMutexInternal {
|
||||||
u8 reserved[256];
|
u8 reserved[256];
|
||||||
std::string name;
|
std::string name;
|
||||||
pthread_mutex_t pth_mutex;
|
pthread_mutex_t pth_mutex;
|
||||||
|
std::unique_ptr<tracy::LockableCtx> tracy_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PthreadMutexattrInternal {
|
struct PthreadMutexattrInternal {
|
||||||
|
|
|
@ -134,6 +134,7 @@ int VideoOutDriver::RegisterBuffers(VideoOutPort* port, s32 startIndex, void* co
|
||||||
.address_right = 0,
|
.address_right = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
renderer->RegisterVideoOutSurface(group, address);
|
||||||
LOG_INFO(Lib_VideoOut, "buffers[{}] = {:#x}", i + startIndex, address);
|
LOG_INFO(Lib_VideoOut, "buffers[{}] = {:#x}", i + startIndex, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,9 @@ struct Liverpool {
|
||||||
using UserData = std::array<u32, NumShaderUserData>;
|
using UserData = std::array<u32, NumShaderUserData>;
|
||||||
|
|
||||||
struct BinaryInfo {
|
struct BinaryInfo {
|
||||||
u8 signature[7];
|
static constexpr u8 signature_ref[] = {0x4f, 0x72, 0x62, 0x53, 0x68, 0x64, 0x72}; // OrbShdr
|
||||||
|
|
||||||
|
std::array<u8, sizeof(signature_ref)> signature;
|
||||||
u8 version;
|
u8 version;
|
||||||
u32 pssl_or_cg : 1;
|
u32 pssl_or_cg : 1;
|
||||||
u32 cached : 1;
|
u32 cached : 1;
|
||||||
|
@ -65,6 +67,11 @@ struct Liverpool {
|
||||||
u8 reserved3;
|
u8 reserved3;
|
||||||
u64 shader_hash;
|
u64 shader_hash;
|
||||||
u32 crc32;
|
u32 crc32;
|
||||||
|
|
||||||
|
bool Valid() const {
|
||||||
|
return shader_hash && crc32 &&
|
||||||
|
(std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ShaderProgram {
|
struct ShaderProgram {
|
||||||
|
@ -134,6 +141,14 @@ struct Liverpool {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Shader>
|
||||||
|
static constexpr auto* GetBinaryInfo(const Shader& sh) {
|
||||||
|
const auto* code = sh.template Address<u32>();
|
||||||
|
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
|
||||||
|
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
|
||||||
|
return bininfo;
|
||||||
|
}
|
||||||
|
|
||||||
union PsInputControl {
|
union PsInputControl {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 5, u32> input_offset;
|
BitField<0, 5, u32> input_offset;
|
||||||
|
|
|
@ -277,6 +277,7 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color) {
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||||
|
|
||||||
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
|
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||||
num_format == AmdGpu::NumberFormat::Float) {
|
num_format == AmdGpu::NumberFormat::Float) {
|
||||||
return vk::Format::eR32G32B32A32Sfloat;
|
return vk::Format::eR32G32B32A32Sfloat;
|
||||||
|
@ -291,7 +292,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
}
|
}
|
||||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||||
num_format == AmdGpu::NumberFormat::Srgb) {
|
num_format == AmdGpu::NumberFormat::Srgb) {
|
||||||
return vk::Format::eB8G8R8A8Srgb;
|
return vk::Format::eR8G8B8A8Srgb;
|
||||||
}
|
}
|
||||||
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
|
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
|
||||||
num_format == AmdGpu::NumberFormat::Float) {
|
num_format == AmdGpu::NumberFormat::Float) {
|
||||||
|
@ -353,6 +354,31 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::Format AdjustColorBufferFormat(vk::Format base_format,
|
||||||
|
Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface) {
|
||||||
|
ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard ||
|
||||||
|
comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate,
|
||||||
|
"Unsupported component swap mode {}", static_cast<u32>(comp_swap));
|
||||||
|
|
||||||
|
const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate;
|
||||||
|
|
||||||
|
switch (base_format) {
|
||||||
|
case vk::Format::eR8G8B8A8Unorm:
|
||||||
|
return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm : base_format;
|
||||||
|
case vk::Format::eB8G8R8A8Unorm:
|
||||||
|
return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm : base_format;
|
||||||
|
case vk::Format::eR8G8B8A8Srgb:
|
||||||
|
return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm
|
||||||
|
: is_vo_surface ? vk::Format::eR8G8B8A8Unorm
|
||||||
|
: base_format;
|
||||||
|
case vk::Format::eB8G8R8A8Srgb:
|
||||||
|
return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm
|
||||||
|
: is_vo_surface ? vk::Format::eB8G8R8A8Unorm
|
||||||
|
: base_format;
|
||||||
|
}
|
||||||
|
UNREACHABLE_MSG("Unsupported base format {}", vk::to_string(base_format));
|
||||||
|
}
|
||||||
|
|
||||||
vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) {
|
vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) {
|
||||||
if (z_format == DepthBuffer::ZFormat::Z32Float &&
|
if (z_format == DepthBuffer::ZFormat::Z32Float &&
|
||||||
stencil_format == DepthBuffer::StencilFormat::Stencil8) {
|
stencil_format == DepthBuffer::StencilFormat::Stencil8) {
|
||||||
|
|
|
@ -40,6 +40,9 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color);
|
||||||
|
|
||||||
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
|
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
|
||||||
|
|
||||||
|
vk::Format AdjustColorBufferFormat(vk::Format base_format,
|
||||||
|
Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface);
|
||||||
|
|
||||||
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
|
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
|
||||||
Liverpool::DepthBuffer::StencilFormat stencil_format);
|
Liverpool::DepthBuffer::StencilFormat stencil_format);
|
||||||
|
|
||||||
|
|
|
@ -192,19 +192,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
|
||||||
VAddr cpu_address) {
|
|
||||||
// Request presentation image from the texture cache.
|
|
||||||
const auto info = VideoCore::ImageInfo{attribute};
|
|
||||||
auto& image = texture_cache.FindImage(info, cpu_address);
|
|
||||||
return PrepareFrameInternal(image);
|
|
||||||
}
|
|
||||||
|
|
||||||
Frame* RendererVulkan::PrepareBlankFrame() {
|
|
||||||
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
|
|
||||||
return PrepareFrameInternal(image);
|
|
||||||
}
|
|
||||||
|
|
||||||
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
|
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
|
||||||
// Request a free presentation frame.
|
// Request a free presentation frame.
|
||||||
Frame* frame = GetRenderFrame();
|
Frame* frame = GetRenderFrame();
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||||
|
@ -38,8 +39,28 @@ public:
|
||||||
~RendererVulkan();
|
~RendererVulkan();
|
||||||
|
|
||||||
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||||
VAddr cpu_address);
|
VAddr cpu_address) {
|
||||||
Frame* PrepareBlankFrame();
|
auto& image = RegisterVideoOutSurface(attribute, cpu_address);
|
||||||
|
return PrepareFrameInternal(image);
|
||||||
|
}
|
||||||
|
|
||||||
|
Frame* PrepareBlankFrame() {
|
||||||
|
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
|
||||||
|
return PrepareFrameInternal(image);
|
||||||
|
}
|
||||||
|
|
||||||
|
VideoCore::Image& RegisterVideoOutSurface(
|
||||||
|
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
|
||||||
|
vo_buffers_addr.emplace_back(cpu_address);
|
||||||
|
const auto info = VideoCore::ImageInfo{attribute};
|
||||||
|
return texture_cache.FindImage(info, cpu_address);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
|
||||||
|
return std::find_if(vo_buffers_addr.cbegin(), vo_buffers_addr.cend(), [&](VAddr vo_buffer) {
|
||||||
|
return vo_buffer == color_buffer.Address();
|
||||||
|
}) != vo_buffers_addr.cend();
|
||||||
|
}
|
||||||
|
|
||||||
bool ShowSplash(Frame* frame = nullptr);
|
bool ShowSplash(Frame* frame = nullptr);
|
||||||
void Present(Frame* frame);
|
void Present(Frame* frame);
|
||||||
|
@ -63,6 +84,7 @@ private:
|
||||||
std::condition_variable free_cv;
|
std::condition_variable free_cv;
|
||||||
std::condition_variable_any frame_cv;
|
std::condition_variable_any frame_cv;
|
||||||
std::optional<VideoCore::Image> splash_img;
|
std::optional<VideoCore::Image> splash_img;
|
||||||
|
std::vector<VAddr> vo_buffers_addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -95,8 +95,9 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
||||||
const u32 size = vsharp.GetSize();
|
const u32 size = vsharp.GetSize();
|
||||||
const VAddr addr = vsharp.base_address.Value();
|
const VAddr addr = vsharp.base_address.Value();
|
||||||
texture_cache.OnCpuWrite(addr);
|
texture_cache.OnCpuWrite(addr);
|
||||||
const u32 offset =
|
const u32 offset = staging.Copy(addr, size,
|
||||||
staging.Copy(addr, size, buffer.is_storage ? 4 : instance.UniformMinAlignment());
|
buffer.is_storage ? instance.StorageMinAlignment()
|
||||||
|
: instance.UniformMinAlignment());
|
||||||
// const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
|
// const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
|
||||||
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
|
|
|
@ -327,7 +327,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
||||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||||
const u32 size = vsharp.GetSize();
|
const u32 size = vsharp.GetSize();
|
||||||
const u32 offset = staging.Copy(vsharp.base_address.Value(), size,
|
const u32 offset = staging.Copy(vsharp.base_address.Value(), size,
|
||||||
buffer.is_storage ? 4 : instance.UniformMinAlignment());
|
buffer.is_storage ? instance.StorageMinAlignment()
|
||||||
|
: instance.UniformMinAlignment());
|
||||||
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
.dstSet = VK_NULL_HANDLE,
|
.dstSet = VK_NULL_HANDLE,
|
||||||
|
@ -399,7 +400,7 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Calculate buffers memory overlaps
|
// Calculate buffers memory overlaps
|
||||||
std::vector<BufferRange> ranges{};
|
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
|
||||||
for (const auto& input : vs_info.vs_inputs) {
|
for (const auto& input : vs_info.vs_inputs) {
|
||||||
const auto& buffer = guest_buffers.emplace_back(
|
const auto& buffer = guest_buffers.emplace_back(
|
||||||
vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset));
|
vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset));
|
||||||
|
|
|
@ -71,7 +71,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
|
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
|
||||||
static constexpr size_t EmbeddedVsHash = 0x59c556606a027efd;
|
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||||
return key.stage_hashes[0] == EmbeddedVsHash;
|
return key.stage_hashes[0] == EmbeddedVsHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -213,6 +213,7 @@ bool Instance::CreateDevice() {
|
||||||
},
|
},
|
||||||
vk::PhysicalDeviceVulkan12Features{
|
vk::PhysicalDeviceVulkan12Features{
|
||||||
.scalarBlockLayout = true,
|
.scalarBlockLayout = true,
|
||||||
|
.uniformBufferStandardLayout = true,
|
||||||
.hostQueryReset = true,
|
.hostQueryReset = true,
|
||||||
.timelineSemaphore = true,
|
.timelineSemaphore = true,
|
||||||
},
|
},
|
||||||
|
|
|
@ -169,6 +169,11 @@ public:
|
||||||
return properties.limits.minUniformBufferOffsetAlignment;
|
return properties.limits.minUniformBufferOffsetAlignment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the minimum required alignment for storage buffers
|
||||||
|
vk::DeviceSize StorageMinAlignment() const {
|
||||||
|
return properties.limits.minStorageBufferOffsetAlignment;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the minimum alignemt required for accessing host-mapped device memory
|
/// Returns the minimum alignemt required for accessing host-mapped device memory
|
||||||
vk::DeviceSize NonCoherentAtomSize() const {
|
vk::DeviceSize NonCoherentAtomSize() const {
|
||||||
return properties.limits.nonCoherentAtomSize;
|
return properties.limits.nonCoherentAtomSize;
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <xxhash.h>
|
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
|
@ -9,11 +8,14 @@
|
||||||
#include "shader_recompiler/exception.h"
|
#include "shader_recompiler/exception.h"
|
||||||
#include "shader_recompiler/recompiler.h"
|
#include "shader_recompiler/recompiler.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||||
|
|
||||||
|
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
|
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
|
||||||
|
@ -74,8 +76,8 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
||||||
const ComputePipeline* PipelineCache::GetComputePipeline() {
|
const ComputePipeline* PipelineCache::GetComputePipeline() {
|
||||||
const auto& cs_pgm = liverpool->regs.cs_program;
|
const auto& cs_pgm = liverpool->regs.cs_program;
|
||||||
ASSERT(cs_pgm.Address() != nullptr);
|
ASSERT(cs_pgm.Address() != nullptr);
|
||||||
const auto code = cs_pgm.Code();
|
const auto* bininfo = Liverpool::GetBinaryInfo(cs_pgm);
|
||||||
compute_key = XXH3_64bits(code.data(), code.size_bytes());
|
compute_key = bininfo->shader_hash;
|
||||||
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
|
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
|
||||||
if (is_new) {
|
if (is_new) {
|
||||||
it.value() = CreateComputePipeline();
|
it.value() = CreateComputePipeline();
|
||||||
|
@ -130,8 +132,11 @@ void PipelineCache::RefreshGraphicsKey() {
|
||||||
if (!col_buf) {
|
if (!col_buf) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
key.color_formats[remapped_cb] =
|
const auto base_format =
|
||||||
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
|
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
|
||||||
|
const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf);
|
||||||
|
key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat(
|
||||||
|
base_format, col_buf.info.comp_swap.Value(), is_vo_surface);
|
||||||
key.blend_controls[remapped_cb] = regs.blend_control[cb];
|
key.blend_controls[remapped_cb] = regs.blend_control[cb];
|
||||||
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
||||||
!col_buf.info.blend_bypass);
|
!col_buf.info.blend_bypass);
|
||||||
|
@ -147,8 +152,8 @@ void PipelineCache::RefreshGraphicsKey() {
|
||||||
key.stage_hashes[i] = 0;
|
key.stage_hashes[i] = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto code = pgm->Code();
|
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
|
||||||
key.stage_hashes[i] = XXH3_64bits(code.data(), code.size_bytes());
|
key.stage_hashes[i] = bininfo->shader_hash;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,7 +248,7 @@ void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stag
|
||||||
if (!std::filesystem::exists(dump_dir)) {
|
if (!std::filesystem::exists(dump_dir)) {
|
||||||
std::filesystem::create_directories(dump_dir);
|
std::filesystem::create_directories(dump_dir);
|
||||||
}
|
}
|
||||||
const auto filename = fmt::format("{}_{:#X}.{}", stage, hash, ext);
|
const auto filename = fmt::format("{}_{:#018x}.{}", stage, hash, ext);
|
||||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||||
file.WriteSpan(code);
|
file.WriteSpan(code);
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,9 +124,9 @@ void Swapchain::FindPresentFormat() {
|
||||||
const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface);
|
const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface);
|
||||||
|
|
||||||
// If there is a single undefined surface format, the device doesn't care, so we'll just use
|
// If there is a single undefined surface format, the device doesn't care, so we'll just use
|
||||||
// RGBA.
|
// RGBA sRGB.
|
||||||
if (formats[0].format == vk::Format::eUndefined) {
|
if (formats[0].format == vk::Format::eUndefined) {
|
||||||
surface_format.format = vk::Format::eR8G8B8A8Unorm;
|
surface_format.format = vk::Format::eR8G8B8A8Srgb;
|
||||||
surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
|
surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -134,7 +134,7 @@ void Swapchain::FindPresentFormat() {
|
||||||
// Try to find a suitable format.
|
// Try to find a suitable format.
|
||||||
for (const vk::SurfaceFormatKHR& sformat : formats) {
|
for (const vk::SurfaceFormatKHR& sformat : formats) {
|
||||||
vk::Format format = sformat.format;
|
vk::Format format = sformat.format;
|
||||||
if (format != vk::Format::eR8G8B8A8Unorm && format != vk::Format::eB8G8R8A8Unorm) {
|
if (format != vk::Format::eR8G8B8A8Srgb && format != vk::Format::eB8G8R8A8Srgb) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ static vk::Format ConvertPixelFormat(const VideoOutFormat format) {
|
||||||
case VideoOutFormat::A8R8G8B8Srgb:
|
case VideoOutFormat::A8R8G8B8Srgb:
|
||||||
return vk::Format::eB8G8R8A8Srgb;
|
return vk::Format::eB8G8R8A8Srgb;
|
||||||
case VideoOutFormat::A8B8G8R8Srgb:
|
case VideoOutFormat::A8B8G8R8Srgb:
|
||||||
return vk::Format::eA8B8G8R8SrgbPack32;
|
return vk::Format::eR8G8B8A8Srgb;
|
||||||
case VideoOutFormat::A2R10G10B10:
|
case VideoOutFormat::A2R10G10B10:
|
||||||
case VideoOutFormat::A2R10G10B10Srgb:
|
case VideoOutFormat::A2R10G10B10Srgb:
|
||||||
return vk::Format::eA2R10G10B10UnormPack32;
|
return vk::Format::eA2R10G10B10UnormPack32;
|
||||||
|
@ -57,6 +57,17 @@ bool ImageInfo::IsBlockCoded() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ImageInfo::IsPacked() const {
|
||||||
|
switch (pixel_format) {
|
||||||
|
case vk::Format::eB5G5R5A1UnormPack16:
|
||||||
|
[[fallthrough]];
|
||||||
|
case vk::Format::eB5G6R5UnormPack16:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool ImageInfo::IsDepthStencil() const {
|
bool ImageInfo::IsDepthStencil() const {
|
||||||
switch (pixel_format) {
|
switch (pixel_format) {
|
||||||
case vk::Format::eD16Unorm:
|
case vk::Format::eD16Unorm:
|
||||||
|
@ -76,7 +87,7 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
|
||||||
if (info.IsDepthStencil()) {
|
if (info.IsDepthStencil()) {
|
||||||
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||||
} else {
|
} else {
|
||||||
if (!info.IsBlockCoded()) {
|
if (!info.IsBlockCoded() && !info.IsPacked()) {
|
||||||
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -110,8 +121,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
|
||||||
size.width = attrib.width;
|
size.width = attrib.width;
|
||||||
size.height = attrib.height;
|
size.height = attrib.height;
|
||||||
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7;
|
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7;
|
||||||
const bool is_32bpp = pixel_format == vk::Format::eB8G8R8A8Srgb ||
|
const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float;
|
||||||
pixel_format == vk::Format::eA8B8G8R8SrgbPack32;
|
|
||||||
ASSERT(is_32bpp);
|
ASSERT(is_32bpp);
|
||||||
if (!is_tiled) {
|
if (!is_tiled) {
|
||||||
guest_size_bytes = pitch * size.height * 4;
|
guest_size_bytes = pitch * size.height * 4;
|
||||||
|
@ -122,6 +132,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
|
||||||
} else {
|
} else {
|
||||||
guest_size_bytes = pitch * 128 * ((size.height + 63) & (~63)) * 4;
|
guest_size_bytes = pitch * 128 * ((size.height + 63) & (~63)) * 4;
|
||||||
}
|
}
|
||||||
|
is_vo_surface = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||||
|
|
|
@ -43,10 +43,12 @@ struct ImageInfo {
|
||||||
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
||||||
|
|
||||||
bool IsBlockCoded() const;
|
bool IsBlockCoded() const;
|
||||||
|
bool IsPacked() const;
|
||||||
bool IsDepthStencil() const;
|
bool IsDepthStencil() const;
|
||||||
|
|
||||||
bool is_tiled = false;
|
bool is_tiled = false;
|
||||||
bool is_storage = false;
|
bool is_storage = false;
|
||||||
|
bool is_vo_surface = false;
|
||||||
vk::Format pixel_format = vk::Format::eUndefined;
|
vk::Format pixel_format = vk::Format::eUndefined;
|
||||||
vk::ImageType type = vk::ImageType::e1D;
|
vk::ImageType type = vk::ImageType::e1D;
|
||||||
vk::ImageUsageFlags usage;
|
vk::ImageUsageFlags usage;
|
||||||
|
|
|
@ -62,6 +62,14 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexce
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
|
||||||
|
bool is_vo_surface) noexcept {
|
||||||
|
const auto base_format =
|
||||||
|
Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat());
|
||||||
|
format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(
|
||||||
|
base_format, col_buffer.info.comp_swap.Value(), is_vo_surface);
|
||||||
|
}
|
||||||
|
|
||||||
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
|
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
|
||||||
std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
|
std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
|
||||||
: info{info_} {
|
: info{info_} {
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
#include "video_core/texture_cache/types.h"
|
#include "video_core/texture_cache/types.h"
|
||||||
|
@ -19,6 +20,8 @@ namespace VideoCore {
|
||||||
struct ImageViewInfo {
|
struct ImageViewInfo {
|
||||||
explicit ImageViewInfo() = default;
|
explicit ImageViewInfo() = default;
|
||||||
explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept;
|
explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept;
|
||||||
|
explicit ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
|
||||||
|
bool is_vo_surface) noexcept;
|
||||||
|
|
||||||
vk::ImageViewType type = vk::ImageViewType::e2D;
|
vk::ImageViewType type = vk::ImageViewType::e2D;
|
||||||
vk::Format format = vk::Format::eR8G8B8A8Unorm;
|
vk::Format format = vk::Format::eR8G8B8A8Unorm;
|
||||||
|
|
|
@ -183,8 +183,7 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
|
||||||
vk::AccessFlagBits::eColorAttachmentWrite |
|
vk::AccessFlagBits::eColorAttachmentWrite |
|
||||||
vk::AccessFlagBits::eColorAttachmentRead);
|
vk::AccessFlagBits::eColorAttachmentRead);
|
||||||
|
|
||||||
ImageViewInfo view_info;
|
ImageViewInfo view_info{buffer, image.info.is_vo_surface};
|
||||||
view_info.format = info.pixel_format;
|
|
||||||
return RegisterImageView(image, view_info);
|
return RegisterImageView(image, view_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -178,8 +178,12 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case vk::Format::eR8Unorm:
|
case vk::Format::eR8Unorm:
|
||||||
return vk::Format::eR8Uint;
|
return vk::Format::eR8Uint;
|
||||||
|
case vk::Format::eR8G8B8A8Srgb:
|
||||||
|
[[fallthrough]];
|
||||||
case vk::Format::eB8G8R8A8Srgb:
|
case vk::Format::eB8G8R8A8Srgb:
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
|
case vk::Format::eB8G8R8A8Unorm:
|
||||||
|
[[fallthrough]];
|
||||||
case vk::Format::eR8G8B8A8Unorm:
|
case vk::Format::eR8G8B8A8Unorm:
|
||||||
return vk::Format::eR32Uint;
|
return vk::Format::eR32Uint;
|
||||||
case vk::Format::eBc1RgbaUnormBlock:
|
case vk::Format::eBc1RgbaUnormBlock:
|
||||||
|
@ -315,7 +319,8 @@ bool TileManager::TryDetile(Image& image) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4);
|
const auto offset =
|
||||||
|
staging.Copy(image.cpu_addr, image.info.guest_size_bytes, instance.StorageMinAlignment());
|
||||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||||
|
|
||||||
auto cmdbuf = scheduler.CommandBuffer();
|
auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
|
Loading…
Reference in a new issue