shadPS4/src/core/debug_state.cpp

216 lines
7.2 KiB
C++
Raw Normal View History

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <imgui.h>
#include "common/assert.h"
#include "common/native_clock.h"
#include "common/singleton.h"
#include "debug_state.h"
#include "devtools/widget/common.h"
kernel: Rewrite pthread emulation (#1440) * libkernel: Cleanup some function places * kernel: Refactor thread functions * kernel: It builds * kernel: Fix a bunch of bugs, kernel thread heap * kernel: File cleanup pt1 * File cleanup pt2 * File cleanup pt3 * File cleanup pt4 * kernel: Add missing funcs * kernel: Add basic exceptions for linux * gnmdriver: Add workload functions * kernel: Fix new pthreads code on macOS. (#1441) * kernel: Downgrade edeadlk to log * gnmdriver: Add sceGnmSubmitCommandBuffersForWorkload * exception: Add context register population for macOS. (#1444) * kernel: Pthread rewrite touchups for Windows * kernel: Multiplatform thread implementation * mutex: Remove spamming log * pthread_spec: Make assert into a log * pthread_spec: Zero initialize array * Attempt to fix non-Windows builds * hotfix: change incorrect NID for scePthreadAttrSetaffinity * scePthreadAttrSetaffinity implementation * Attempt to fix Linux * windows: Address a bunch of address space problems * address_space: Fix unmap of region surrounded by placeholders * libs: Reduce logging * pthread: Implement condvar with waitable atomics and sleepqueue * sleepq: Separate and make faster * time: Remove delay execution * Causes high cpu usage in Tohou Luna Nights * kernel: Cleanup files again * pthread: Add missing include * semaphore: Use binary_semaphore instead of condvar * Seems more reliable * libraries/sysmodule: log module on `sceSysmoduleIsLoaded` * libraries/kernel: implement `scePthreadSetPrio` --------- Co-authored-by: squidbus <175574877+squidbus@users.noreply.github.com> Co-authored-by: Daniel R. <47796739+polybiusproxy@users.noreply.github.com>
2024-11-21 20:59:38 +00:00
#include "libraries/kernel/time.h"
#include "libraries/system/msgdialog.h"
#include "video_core/amdgpu/pm4_cmds.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
using namespace DebugStateType;
DebugStateImpl& DebugState = *Common::Singleton<DebugStateImpl>::Instance();
static ThreadID ThisThreadID() {
#ifdef _WIN32
return GetCurrentThreadId();
#else
return pthread_self();
#endif
}
static void PauseThread(ThreadID id) {
#ifdef _WIN32
auto handle = OpenThread(THREAD_SUSPEND_RESUME, FALSE, id);
SuspendThread(handle);
CloseHandle(handle);
#else
pthread_kill(id, SIGUSR1);
#endif
}
static void ResumeThread(ThreadID id) {
#ifdef _WIN32
auto handle = OpenThread(THREAD_SUSPEND_RESUME, FALSE, id);
ResumeThread(handle);
CloseHandle(handle);
#else
pthread_kill(id, SIGUSR1);
#endif
}
void DebugStateImpl::AddCurrentThreadToGuestList() {
std::lock_guard lock{guest_threads_mutex};
const ThreadID id = ThisThreadID();
guest_threads.push_back(id);
}
void DebugStateImpl::RemoveCurrentThreadFromGuestList() {
std::lock_guard lock{guest_threads_mutex};
const ThreadID id = ThisThreadID();
std::erase_if(guest_threads, [&](const ThreadID& v) { return v == id; });
}
void DebugStateImpl::PauseGuestThreads() {
using namespace Libraries::MsgDialog;
std::unique_lock lock{guest_threads_mutex};
if (is_guest_threads_paused) {
return;
}
if (ShouldPauseInSubmit()) {
waiting_submit_pause = false;
should_show_frame_dump = true;
}
bool self_guest = false;
ThreadID self_id = ThisThreadID();
for (const auto& id : guest_threads) {
if (id == self_id) {
self_guest = true;
} else {
PauseThread(id);
}
}
pause_time = Libraries::Kernel::Dev::GetClock()->GetUptime();
is_guest_threads_paused = true;
lock.unlock();
if (self_guest) {
PauseThread(self_id);
}
}
void DebugStateImpl::ResumeGuestThreads() {
std::lock_guard lock{guest_threads_mutex};
if (!is_guest_threads_paused) {
return;
}
u64 delta_time = Libraries::Kernel::Dev::GetClock()->GetUptime() - pause_time;
Libraries::Kernel::Dev::GetInitialPtc() += delta_time;
for (const auto& id : guest_threads) {
ResumeThread(id);
}
is_guest_threads_paused = false;
}
void DebugStateImpl::RequestFrameDump(s32 count) {
ASSERT(!DumpingCurrentFrame());
gnm_frame_dump_request_count = count;
frame_dump_list.clear();
frame_dump_list.resize(count);
const auto f = gnm_frame_count.load() + 1;
for (size_t i = 0; i < count; ++i) {
frame_dump_list[i].frame_id = f + i;
}
waiting_submit_pause = true;
}
void DebugStateImpl::PushQueueDump(QueueDump dump) {
ASSERT(DumpingCurrentFrame());
std::unique_lock lock{frame_dump_list_mutex};
auto& frame = GetFrameDump();
{ // Find draw calls
auto data = std::span{dump.data};
auto initial_data = data.data();
while (!data.empty()) {
const auto* header = reinterpret_cast<const AmdGpu::PM4Type3Header*>(data.data());
const auto type = header->type;
if (type == 2) {
data = data.subspan(1);
} else if (type != 3) {
UNREACHABLE();
}
const AmdGpu::PM4ItOpcode opcode = header->opcode;
if (Core::Devtools::Widget::IsDrawCall(opcode)) {
const auto offset =
reinterpret_cast<uintptr_t>(header) - reinterpret_cast<uintptr_t>(initial_data);
const auto addr = dump.base_addr + offset;
waiting_reg_dumps.emplace(addr, &frame);
waiting_reg_dumps_dbg.emplace(
addr,
fmt::format("#{} h({}) queue {} {} {}",
frame_dump_list.size() - gnm_frame_dump_request_count, addr,
magic_enum::enum_name(dump.type), dump.submit_num, dump.num2));
}
data = data.subspan(header->NumWords() + 1);
}
}
frame.queues.push_back(std::move(dump));
}
std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
const auto it = waiting_reg_dumps.find(header_addr);
if (it == waiting_reg_dumps.end()) {
return std::nullopt;
}
auto& frame = *it->second;
waiting_reg_dumps.erase(it);
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
return &frame.regs[header_addr - base_addr];
}
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs) {
std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr);
if (!dump) {
return;
}
(*dump)->regs = regs;
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
auto stage = (*dump)->regs.ProgramForStage(i);
if (stage->address_lo != 0) {
const auto& info = AmdGpu::Liverpool::SearchBinaryInfo(stage->Address<u32*>());
auto code = stage->Code();
(*dump)->stages[i] = PipelineShaderProgramDump{
.name = Vulkan::PipelineCache::GetShaderName(Shader::StageFromIndex(i),
info.shader_hash),
.hash = info.shader_hash,
.user_data = *stage,
.code = std::vector<u32>{code.begin(), code.end()},
};
}
}
}
}
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
const CsState& cs_state) {
std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr);
if (!dump) {
return;
}
(*dump)->is_compute = true;
auto& cs = (*dump)->regs.cs_program;
cs = cs_state;
const auto& info = AmdGpu::Liverpool::SearchBinaryInfo(cs.Address<u32*>());
(*dump)->cs_data = PipelineComputerProgramDump{
.name = Vulkan::PipelineCache::GetShaderName(Shader::Stage::Compute, info.shader_hash),
.hash = info.shader_hash,
.cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
};
}
Tessellation (#1528) * shader_recompiler: Tessellation WIP * fix compiler errors after merge DONT MERGE set log file to /dev/null DONT MERGE linux pthread bb fix save work DONT MERGE dump ir save more work fix mistake with ES shader skip list add input patch control points dynamic state random stuff * WIP Tessellation partial implementation. Squash commits * test: make local/tcs use attr arrays * attr arrays in TCS/TES * dont define empty attr arrays * switch to special opcodes for tess tcs/tes reads and tcs writes * impl tcs/tes read attr insts * rebase fix * save some work * save work probably broken and slow * put Vertex LogicalStage after TCS and TES to fix bindings * more refactors * refactor pattern matching and optimize modulos (disabled) * enable modulo opt * copyright * rebase fixes * remove some prints * remove some stuff * Add TCS/TES support for shader patching and use LogicalStage * refactor and handle wider DS instructions * get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR * stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs * delete some more stuff * update comments for current implementation * some cleanup * uint error * more cleanup * remove patch control points dynamic state (because runtime_info already depends on it) * fix potential problem with determining passthrough --------- Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com>
2024-12-14 10:56:17 +00:00
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
vk::ShaderModule module, std::span<const u32> spv,
std::span<const u32> raw_code, std::span<const u32> patch_spv,
bool is_patched) {
shader_dump_list.emplace_back(name, l_stage, module, std::vector<u32>{spv.begin(), spv.end()},
std::vector<u32>{raw_code.begin(), raw_code.end()},
std::vector<u32>{patch_spv.begin(), patch_spv.end()}, is_patched);
}