mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-21 05:51:39 +00:00
Merge pull request #225 from shadps4-emu/stabilization/10
Various fixes and improvements
This commit is contained in:
commit
1f83824a8a
|
@ -8,6 +8,7 @@
|
||||||
#include "common/slot_vector.h"
|
#include "common/slot_vector.h"
|
||||||
#include "core/libraries/error_codes.h"
|
#include "core/libraries/error_codes.h"
|
||||||
#include "core/libraries/gnmdriver/gnmdriver.h"
|
#include "core/libraries/gnmdriver/gnmdriver.h"
|
||||||
|
#include "core/libraries/kernel/libkernel.h"
|
||||||
#include "core/libraries/libs.h"
|
#include "core/libraries/libs.h"
|
||||||
#include "core/libraries/videoout/video_out.h"
|
#include "core/libraries/videoout/video_out.h"
|
||||||
#include "core/platform.h"
|
#include "core/platform.h"
|
||||||
|
@ -212,12 +213,61 @@ static constexpr std::array InitSequence350{
|
||||||
0xc0016900u, 0x2aau, 0xffu,
|
0xc0016900u, 0x2aau, 0xffu,
|
||||||
};
|
};
|
||||||
static_assert(InitSequence350.size() == 0x7c);
|
static_assert(InitSequence350.size() == 0x7c);
|
||||||
|
|
||||||
|
static constexpr std::array CtxInitSequence{
|
||||||
|
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||||
|
0xc0001200u, 0u,
|
||||||
|
0xc0002f00u, 1u,
|
||||||
|
0xc0016900u, 0x102u, 0u,
|
||||||
|
0xc0016900u, 0x202u, 0xcc0010u,
|
||||||
|
0xc0111000u, 0u
|
||||||
|
};
|
||||||
|
static_assert(CtxInitSequence.size() == 0x0f);
|
||||||
|
|
||||||
|
static constexpr std::array CtxInitSequence400{
|
||||||
|
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||||
|
0xc0001200u, 0u,
|
||||||
|
0xc0016900u, 0x2f9u, 0x2du,
|
||||||
|
0xc0016900u, 0x282u, 8u,
|
||||||
|
0xc0016900u, 0x280u, 0x80008u,
|
||||||
|
0xc0016900u, 0x281u, 0xffff0000u,
|
||||||
|
0xc0016900u, 0x204u, 0u,
|
||||||
|
0xc0016900u, 0x206u, 0x43fu,
|
||||||
|
0xc0016900u, 0x83u, 0xffffu,
|
||||||
|
0xc0016900u, 0x317u, 0x10u,
|
||||||
|
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||||
|
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||||
|
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||||
|
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||||
|
0xc0016900u, 0x202u, 0xcc0010u,
|
||||||
|
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||||
|
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||||
|
0xc0002f00u, 1u,
|
||||||
|
0xc0016900u, 0x1b1u, 2u,
|
||||||
|
0xc0016900u, 0x101u, 0u,
|
||||||
|
0xc0016900u, 0x100u, 0xffffffffu,
|
||||||
|
0xc0016900u, 0x103u, 0u,
|
||||||
|
0xc0016900u, 0x284u, 0u,
|
||||||
|
0xc0016900u, 0x290u, 0u,
|
||||||
|
0xc0016900u, 0x2aeu, 0u,
|
||||||
|
0xc0016900u, 0x102u, 0u,
|
||||||
|
0xc0016900u, 0x292u, 0u,
|
||||||
|
0xc0016900u, 0x293u, 0x6020000u,
|
||||||
|
0xc0016900u, 0x2f8u, 0u,
|
||||||
|
0xc0016900u, 0x2deu, 0x1e9u,
|
||||||
|
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||||
|
0xc0016900u, 0x2aau, 0xffu,
|
||||||
|
0xc09e1000u,
|
||||||
|
};
|
||||||
|
static_assert(CtxInitSequence400.size() == 0x61);
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
||||||
static u32 submission_lock{};
|
static u32 submission_lock{};
|
||||||
static std::mutex m_submission{};
|
static std::mutex m_submission{};
|
||||||
static u64 frames_submitted{}; // frame counter
|
static u64 frames_submitted{}; // frame counter
|
||||||
|
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
||||||
|
static int sdk_version{0};
|
||||||
|
|
||||||
struct AscQueueInfo {
|
struct AscQueueInfo {
|
||||||
VAddr map_addr;
|
VAddr map_addr;
|
||||||
|
@ -664,9 +714,10 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) {
|
||||||
|
|
||||||
cmdbuf = ClearContextState(cmdbuf);
|
cmdbuf = ClearContextState(cmdbuf);
|
||||||
std::memcpy(cmdbuf, InitSequence175.data(), InitSequence175.size() * 4);
|
std::memcpy(cmdbuf, InitSequence175.data(), InitSequence175.size() * 4);
|
||||||
|
cmdbuf += InitSequence175.size();
|
||||||
|
|
||||||
cmdbuf[0x7f] = 0xc07f1000;
|
constexpr auto cmdbuf_left = HwInitPacketSize - InitSequence175.size() - 0xc - 1;
|
||||||
cmdbuf[0x80] = 0;
|
WriteTrailingNop<cmdbuf_left>(cmdbuf);
|
||||||
|
|
||||||
return HwInitPacketSize;
|
return HwInitPacketSize;
|
||||||
}
|
}
|
||||||
|
@ -723,14 +774,28 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
|
||||||
return SetupContext350(cmdbuf, size, true);
|
return SetupContext350(cmdbuf, size, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState() {
|
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
return ORBIS_OK;
|
|
||||||
|
constexpr auto CtxInitPacketSize = 0x20u;
|
||||||
|
if (size != CtxInitPacketSize) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4);
|
||||||
|
return CtxInitPacketSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400() {
|
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
return ORBIS_OK;
|
|
||||||
|
constexpr auto CtxInitPacketSize = 0x100u;
|
||||||
|
if (size != CtxInitPacketSize) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4);
|
||||||
|
return CtxInitPacketSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto() {
|
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto() {
|
||||||
|
@ -1873,6 +1938,17 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
|
||||||
submission_lock = 0;
|
submission_lock = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (send_init_packet) {
|
||||||
|
if (sdk_version <= 0x1ffffffu) {
|
||||||
|
liverpool->SubmitGfx(InitSequence, {});
|
||||||
|
} else if (sdk_version <= 0x3ffffffu) {
|
||||||
|
liverpool->SubmitGfx(InitSequence200, {});
|
||||||
|
} else {
|
||||||
|
liverpool->SubmitGfx(InitSequence350, {});
|
||||||
|
}
|
||||||
|
send_init_packet = false;
|
||||||
|
}
|
||||||
|
|
||||||
for (auto cbpair = 0u; cbpair < count; ++cbpair) {
|
for (auto cbpair = 0u; cbpair < count; ++cbpair) {
|
||||||
const auto* ccb = ccb_gpu_addrs ? ccb_gpu_addrs[cbpair] : nullptr;
|
const auto* ccb = ccb_gpu_addrs ? ccb_gpu_addrs[cbpair] : nullptr;
|
||||||
const auto ccb_size_in_bytes = ccb_sizes_in_bytes ? ccb_sizes_in_bytes[cbpair] : 0;
|
const auto ccb_size_in_bytes = ccb_sizes_in_bytes ? ccb_sizes_in_bytes[cbpair] : 0;
|
||||||
|
@ -1915,6 +1991,7 @@ int PS4_SYSV_ABI sceGnmSubmitDone() {
|
||||||
submission_lock = true;
|
submission_lock = true;
|
||||||
}
|
}
|
||||||
liverpool->NotifySubmitDone();
|
liverpool->NotifySubmitDone();
|
||||||
|
send_init_packet = true;
|
||||||
++frames_submitted;
|
++frames_submitted;
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
@ -2388,6 +2465,11 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) {
|
||||||
liverpool = std::make_unique<AmdGpu::Liverpool>();
|
liverpool = std::make_unique<AmdGpu::Liverpool>();
|
||||||
renderer = std::make_unique<Vulkan::RendererVulkan>(*g_window, liverpool.get());
|
renderer = std::make_unique<Vulkan::RendererVulkan>(*g_window, liverpool.get());
|
||||||
|
|
||||||
|
const int result = sceKernelGetCompiledSdkVersion(&sdk_version);
|
||||||
|
if (result != ORBIS_OK) {
|
||||||
|
sdk_version = 0;
|
||||||
|
}
|
||||||
|
|
||||||
LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent);
|
LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent);
|
||||||
LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1,
|
LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1,
|
||||||
sceGnmAreSubmitsAllowed);
|
sceGnmAreSubmitsAllowed);
|
||||||
|
|
|
@ -60,8 +60,8 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size);
|
||||||
u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size);
|
u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size);
|
||||||
u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size);
|
u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size);
|
||||||
u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size);
|
u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size);
|
||||||
int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState();
|
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size);
|
||||||
int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400();
|
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size);
|
||||||
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto();
|
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto();
|
||||||
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
|
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
|
||||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface();
|
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface();
|
||||||
|
|
|
@ -161,7 +161,7 @@ int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) {
|
||||||
int version = param_sfo->GetInteger("SYSTEM_VER");
|
int version = param_sfo->GetInteger("SYSTEM_VER");
|
||||||
LOG_INFO(Kernel, "returned system version = {:#x}", version);
|
LOG_INFO(Kernel, "returned system version = {:#x}", version);
|
||||||
*ver = version;
|
*ver = version;
|
||||||
return ORBIS_OK;
|
return (version > 0) ? ORBIS_OK : ORBIS_KERNEL_ERROR_EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
s64 PS4_SYSV_ABI ps4__read(int d, void* buf, u64 nbytes) {
|
s64 PS4_SYSV_ABI ps4__read(int d, void* buf, u64 nbytes) {
|
||||||
|
|
|
@ -30,6 +30,7 @@ typedef struct {
|
||||||
} OrbisKernelUuid;
|
} OrbisKernelUuid;
|
||||||
|
|
||||||
int* PS4_SYSV_ABI __Error();
|
int* PS4_SYSV_ABI __Error();
|
||||||
|
int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver);
|
||||||
|
|
||||||
void LibKernel_Register(Core::Loader::SymbolsResolver* sym);
|
void LibKernel_Register(Core::Loader::SymbolsResolver* sym);
|
||||||
|
|
||||||
|
|
|
@ -42,10 +42,6 @@ Emulator::Emulator() : window{WindowWidth, WindowHeight, controller} {
|
||||||
// Start logger.
|
// Start logger.
|
||||||
Common::Log::Initialize();
|
Common::Log::Initialize();
|
||||||
Common::Log::Start();
|
Common::Log::Start();
|
||||||
|
|
||||||
// Initialize kernel and library facilities.
|
|
||||||
Libraries::Kernel::init_pthreads();
|
|
||||||
Libraries::InitHLELibs(&linker->GetHLESymbols());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Emulator::~Emulator() {
|
Emulator::~Emulator() {
|
||||||
|
@ -93,6 +89,10 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||||
const auto& mount_temp_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id;
|
const auto& mount_temp_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id;
|
||||||
mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir
|
mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir
|
||||||
|
|
||||||
|
// Initialize kernel and library facilities.
|
||||||
|
Libraries::Kernel::init_pthreads();
|
||||||
|
Libraries::InitHLELibs(&linker->GetHLESymbols());
|
||||||
|
|
||||||
// Load the module with the linker
|
// Load the module with the linker
|
||||||
linker->LoadModule(file);
|
linker->LoadModule(file);
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <deque>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
|
@ -250,11 +252,25 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
||||||
}
|
}
|
||||||
|
|
||||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
std::deque<IR::Inst*> insts{&inst};
|
||||||
ASSERT(producer->GetOpcode() ==
|
const auto& pred = [](auto opcode) -> bool {
|
||||||
IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||||
producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||||
producer->GetOpcode() == IR::Opcode::GetUserData);
|
opcode == IR::Opcode::GetUserData);
|
||||||
|
};
|
||||||
|
|
||||||
|
IR::Inst* producer{};
|
||||||
|
while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) {
|
||||||
|
for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) {
|
||||||
|
const auto arg = producer->Arg(arg_idx);
|
||||||
|
if (arg.TryInstRecursive()) {
|
||||||
|
insts.push_back(arg.InstRecursive());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
insts.pop_front();
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(pred(producer->GetOpcode()));
|
||||||
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
||||||
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
||||||
return std::make_pair(producer->Arg(0).InstRecursive(),
|
return std::make_pair(producer->Arg(0).InstRecursive(),
|
||||||
|
|
|
@ -518,13 +518,14 @@ struct PM4CmdEventWriteEos {
|
||||||
struct PM4WriteConstRam {
|
struct PM4WriteConstRam {
|
||||||
PM4Type3Header header;
|
PM4Type3Header header;
|
||||||
union {
|
union {
|
||||||
BitField<0, 16, u32> offset; // in DWs
|
BitField<0, 16, u32> offset; ///< Starting DW granularity offset into the constant RAM.
|
||||||
|
///< Thus, bits[1:0] are zero.
|
||||||
u32 dw1;
|
u32 dw1;
|
||||||
};
|
};
|
||||||
u32 data[0];
|
u32 data[0];
|
||||||
|
|
||||||
[[nodiscard]] u32 Offset() const {
|
[[nodiscard]] u32 Offset() const {
|
||||||
return offset.Value() << 2u;
|
return offset.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] u32 Size() const {
|
[[nodiscard]] u32 Size() const {
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
set(SHADER_FILES
|
set(SHADER_FILES
|
||||||
detile_m8x1.comp
|
detile_m8x1.comp
|
||||||
|
detile_m8x2.comp
|
||||||
detile_m32x1.comp
|
detile_m32x1.comp
|
||||||
detile_m32x2.comp
|
detile_m32x2.comp
|
||||||
detile_m32x4.comp
|
detile_m32x4.comp
|
||||||
|
|
61
src/video_core/host_shaders/detile_m8x2.comp
Normal file
61
src/video_core/host_shaders/detile_m8x2.comp
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
// NOTE: Current subgroup utilization is subotimal on most GPUs, so
|
||||||
|
// it will be nice to process two tiles at once here.
|
||||||
|
layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(std430, binding = 0) buffer input_buf {
|
||||||
|
uint in_data[];
|
||||||
|
};
|
||||||
|
layout(rg8ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||||
|
|
||||||
|
layout(push_constant) uniform image_info {
|
||||||
|
uint pitch;
|
||||||
|
} info;
|
||||||
|
|
||||||
|
#define MICRO_TILE_DIM 8
|
||||||
|
#define TEXELS_PER_ELEMENT 2
|
||||||
|
|
||||||
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
|
uint rmort[16] = {
|
||||||
|
0x11011000, 0x31213020,
|
||||||
|
0x13031202, 0x33233222,
|
||||||
|
0x51415040, 0x71617060,
|
||||||
|
0x53435242, 0x73637262,
|
||||||
|
|
||||||
|
0x15051404, 0x35253424,
|
||||||
|
0x17071606, 0x37273626,
|
||||||
|
0x55455444, 0x75657464,
|
||||||
|
0x57475646, 0x77677666,
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint src_tx = in_data[gl_GlobalInvocationID.x];
|
||||||
|
uint p[TEXELS_PER_ELEMENT] = {
|
||||||
|
(src_tx >> 16) & 0xffff,
|
||||||
|
src_tx & 0xffff
|
||||||
|
};
|
||||||
|
|
||||||
|
uint bit_ofs = 8 * TEXELS_PER_ELEMENT * (gl_LocalInvocationID.x % 4);
|
||||||
|
uint packed_pos = rmort[gl_LocalInvocationID.x >> 1] >> bit_ofs;
|
||||||
|
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||||
|
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||||
|
|
||||||
|
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||||
|
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||||
|
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||||
|
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col;
|
||||||
|
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||||
|
|
||||||
|
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) {
|
||||||
|
uint p0 = (p[ofs] >> 8) & 0xff;
|
||||||
|
uint p1 = p[ofs] & 0xff;
|
||||||
|
imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p0, p1, 0, 0));
|
||||||
|
}
|
||||||
|
}
|
|
@ -325,6 +325,18 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
num_format == AmdGpu::NumberFormat::Sint) {
|
num_format == AmdGpu::NumberFormat::Sint) {
|
||||||
return vk::Format::eR16G16B16A16Sint;
|
return vk::Format::eR16G16B16A16Sint;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Float) {
|
||||||
|
return vk::Format::eR16G16Sfloat;
|
||||||
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format10_11_11 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Float) {
|
||||||
|
return vk::Format::eB10G11R11UfloatPack32;
|
||||||
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format2_10_10_10 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
|
return vk::Format::eA2B10G10R10UnormPack32;
|
||||||
|
}
|
||||||
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Srgb) {
|
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Srgb) {
|
||||||
return vk::Format::eBc7SrgbBlock;
|
return vk::Format::eBc7SrgbBlock;
|
||||||
}
|
}
|
||||||
|
@ -490,6 +502,8 @@ vk::SampleCountFlagBits NumSamples(u32 num_samples) {
|
||||||
return vk::SampleCountFlagBits::e2;
|
return vk::SampleCountFlagBits::e2;
|
||||||
case 4:
|
case 4:
|
||||||
return vk::SampleCountFlagBits::e4;
|
return vk::SampleCountFlagBits::e4;
|
||||||
|
case 8:
|
||||||
|
return vk::SampleCountFlagBits::e8;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,9 +122,8 @@ void PipelineCache::RefreshGraphicsKey() {
|
||||||
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
|
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Should be a check for `OperationMode::Disable` once we emulate HW state init packet
|
const auto skip_cb_binding =
|
||||||
// sent by system software.
|
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||||
const auto skip_cb_binding = false;
|
|
||||||
|
|
||||||
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
|
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
|
||||||
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary order.
|
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary order.
|
||||||
|
|
|
@ -91,9 +91,13 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
|
||||||
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (info.is_tiled || info.usage.storage) {
|
|
||||||
|
// In cases where an image is created as a render/depth target and cleared with compute,
|
||||||
|
// we cannot predict whether it will be used as a storage image. A proper solution would
|
||||||
|
// involve re-creating the resource with a new configuration and copying previous content into
|
||||||
|
// it. However, for now, we will set storage usage for all images (if the format allows),
|
||||||
|
// sacrificing a bit of performance. Note use of ExtendedUsage flag set by default.
|
||||||
usage |= vk::ImageUsageFlagBits::eStorage;
|
usage |= vk::ImageUsageFlagBits::eStorage;
|
||||||
}
|
|
||||||
return usage;
|
return usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -217,7 +221,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
||||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||||
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat};
|
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
|
||||||
|
vk::ImageCreateFlagBits::eExtendedUsage};
|
||||||
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
|
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
|
||||||
info.size.width == info.size.height) {
|
info.size.width == info.size.height) {
|
||||||
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
|
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
|
||||||
|
@ -225,12 +230,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
if (info.type == vk::ImageType::e3D) {
|
if (info.type == vk::ImageType::e3D) {
|
||||||
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
||||||
}
|
}
|
||||||
if (info.is_tiled) {
|
|
||||||
flags |= vk::ImageCreateFlagBits::eExtendedUsage;
|
|
||||||
if (info.IsBlockCoded()) {
|
if (info.IsBlockCoded()) {
|
||||||
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
|
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
usage = ImageUsageFlags(info);
|
usage = ImageUsageFlags(info);
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "video_core/host_shaders/detile_m32x2_comp.h"
|
#include "video_core/host_shaders/detile_m32x2_comp.h"
|
||||||
#include "video_core/host_shaders/detile_m32x4_comp.h"
|
#include "video_core/host_shaders/detile_m32x4_comp.h"
|
||||||
#include "video_core/host_shaders/detile_m8x1_comp.h"
|
#include "video_core/host_shaders/detile_m8x1_comp.h"
|
||||||
|
#include "video_core/host_shaders/detile_m8x2_comp.h"
|
||||||
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
#include <magic_enum.hpp>
|
#include <magic_enum.hpp>
|
||||||
|
@ -177,6 +178,8 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case vk::Format::eR8Unorm:
|
case vk::Format::eR8Unorm:
|
||||||
return vk::Format::eR8Uint;
|
return vk::Format::eR8Uint;
|
||||||
|
case vk::Format::eR8G8Unorm:
|
||||||
|
return vk::Format::eR8G8Uint;
|
||||||
case vk::Format::eR8G8B8A8Srgb:
|
case vk::Format::eR8G8B8A8Srgb:
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case vk::Format::eB8G8R8A8Srgb:
|
case vk::Format::eB8G8R8A8Srgb:
|
||||||
|
@ -207,6 +210,8 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case vk::Format::eR8Uint:
|
case vk::Format::eR8Uint:
|
||||||
return &detilers[DetilerType::Micro8x1];
|
return &detilers[DetilerType::Micro8x1];
|
||||||
|
case vk::Format::eR8G8Uint:
|
||||||
|
return &detilers[DetilerType::Micro8x2];
|
||||||
case vk::Format::eR32Uint:
|
case vk::Format::eR32Uint:
|
||||||
return &detilers[DetilerType::Micro32x1];
|
return &detilers[DetilerType::Micro32x1];
|
||||||
case vk::Format::eR32G32Uint:
|
case vk::Format::eR32G32Uint:
|
||||||
|
@ -229,9 +234,8 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc
|
||||||
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
|
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
|
||||||
|
|
||||||
static const std::array detiler_shaders{
|
static const std::array detiler_shaders{
|
||||||
HostShaders::DETILE_M8X1_COMP,
|
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
|
||||||
HostShaders::DETILE_M32X1_COMP,
|
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP,
|
||||||
HostShaders::DETILE_M32X2_COMP,
|
|
||||||
HostShaders::DETILE_M32X4_COMP,
|
HostShaders::DETILE_M32X4_COMP,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format);
|
||||||
|
|
||||||
enum DetilerType : u32 {
|
enum DetilerType : u32 {
|
||||||
Micro8x1,
|
Micro8x1,
|
||||||
|
Micro8x2,
|
||||||
Micro32x1,
|
Micro32x1,
|
||||||
Micro32x2,
|
Micro32x2,
|
||||||
Micro32x4,
|
Micro32x4,
|
||||||
|
|
Loading…
Reference in a new issue