Switch remaining CRLF terminated files to LF

This commit is contained in:
Daniel R. 2024-12-24 13:56:31 +01:00
parent 2c0f986c52
commit c284cf72e1
No known key found for this signature in database
GPG key ID: B8ADC8F57BA18DBA
28 changed files with 4856 additions and 4856 deletions

View file

@ -1,25 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <type_traits>
namespace Common {
/// Ceiled integer division.
template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeil(N number, D divisor) {
return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
}
/// Ceiled integer division with logarithmic divisor in base 2
template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) {
return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
}
} // namespace Common
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <type_traits>
namespace Common {
/// Ceiled integer division.
template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeil(N number, D divisor) {
return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
}
/// Ceiled integer division with logarithmic divisor in base 2
template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) {
return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
}
} // namespace Common

View file

@ -1,30 +1,30 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#ifdef _WIN32
#include "ntapi.h"
NtClose_t NtClose = nullptr;
NtSetInformationFile_t NtSetInformationFile = nullptr;
NtCreateThread_t NtCreateThread = nullptr;
NtTerminateThread_t NtTerminateThread = nullptr;
NtQueueApcThreadEx_t NtQueueApcThreadEx = nullptr;
namespace Common::NtApi {
void Initialize() {
HMODULE nt_handle = GetModuleHandleA("ntdll.dll");
// http://stackoverflow.com/a/31411628/4725495
NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose");
NtSetInformationFile =
(NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile");
NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread");
NtTerminateThread = (NtTerminateThread_t)GetProcAddress(nt_handle, "NtTerminateThread");
NtQueueApcThreadEx = (NtQueueApcThreadEx_t)GetProcAddress(nt_handle, "NtQueueApcThreadEx");
}
} // namespace Common::NtApi
#endif
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#ifdef _WIN32
#include "ntapi.h"
NtClose_t NtClose = nullptr;
NtSetInformationFile_t NtSetInformationFile = nullptr;
NtCreateThread_t NtCreateThread = nullptr;
NtTerminateThread_t NtTerminateThread = nullptr;
NtQueueApcThreadEx_t NtQueueApcThreadEx = nullptr;
namespace Common::NtApi {
void Initialize() {
HMODULE nt_handle = GetModuleHandleA("ntdll.dll");
// http://stackoverflow.com/a/31411628/4725495
NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose");
NtSetInformationFile =
(NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile");
NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread");
NtTerminateThread = (NtTerminateThread_t)GetProcAddress(nt_handle, "NtTerminateThread");
NtQueueApcThreadEx = (NtQueueApcThreadEx_t)GetProcAddress(nt_handle, "NtQueueApcThreadEx");
}
} // namespace Common::NtApi
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,53 +1,53 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/spin_lock.h"
#if _MSC_VER
#include <intrin.h>
#if _M_AMD64
#define __x86_64__ 1
#endif
#if _M_ARM64
#define __aarch64__ 1
#endif
#else
#if __x86_64__
#include <xmmintrin.h>
#endif
#endif
namespace {
void ThreadPause() {
#if __x86_64__
_mm_pause();
#elif __aarch64__ && _MSC_VER
__yield();
#elif __aarch64__
asm("yield");
#endif
}
} // Anonymous namespace
namespace Common {
void SpinLock::lock() {
while (lck.test_and_set(std::memory_order_acquire)) {
ThreadPause();
}
}
void SpinLock::unlock() {
lck.clear(std::memory_order_release);
}
bool SpinLock::try_lock() {
if (lck.test_and_set(std::memory_order_acquire)) {
return false;
}
return true;
}
} // namespace Common
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/spin_lock.h"
#if _MSC_VER
#include <intrin.h>
#if _M_AMD64
#define __x86_64__ 1
#endif
#if _M_ARM64
#define __aarch64__ 1
#endif
#else
#if __x86_64__
#include <xmmintrin.h>
#endif
#endif
namespace {
void ThreadPause() {
#if __x86_64__
_mm_pause();
#elif __aarch64__ && _MSC_VER
__yield();
#elif __aarch64__
asm("yield");
#endif
}
} // Anonymous namespace
namespace Common {
void SpinLock::lock() {
while (lck.test_and_set(std::memory_order_acquire)) {
ThreadPause();
}
}
void SpinLock::unlock() {
lck.clear(std::memory_order_release);
}
bool SpinLock::try_lock() {
if (lck.test_and_set(std::memory_order_acquire)) {
return false;
}
return true;
}
} // namespace Common

View file

@ -1,33 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
namespace Common {
/**
* SpinLock class
* a lock similar to mutex that forces a thread to spin wait instead calling the
* supervisor. Should be used on short sequences of code.
*/
class SpinLock {
public:
SpinLock() = default;
SpinLock(const SpinLock&) = delete;
SpinLock& operator=(const SpinLock&) = delete;
SpinLock(SpinLock&&) = delete;
SpinLock& operator=(SpinLock&&) = delete;
void lock();
void unlock();
[[nodiscard]] bool try_lock();
private:
std::atomic_flag lck = ATOMIC_FLAG_INIT;
};
} // namespace Common
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
namespace Common {
/**
* SpinLock class
* a lock similar to mutex that forces a thread to spin wait instead calling the
* supervisor. Should be used on short sequences of code.
*/
class SpinLock {
public:
SpinLock() = default;
SpinLock(const SpinLock&) = delete;
SpinLock& operator=(const SpinLock&) = delete;
SpinLock(SpinLock&&) = delete;
SpinLock& operator=(SpinLock&&) = delete;
void lock();
void unlock();
[[nodiscard]] bool try_lock();
private:
std::atomic_flag lck = ATOMIC_FLAG_INIT;
};
} // namespace Common

View file

@ -1,61 +1,61 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <utility>
namespace Common {
/// General purpose function wrapper similar to std::function.
/// Unlike std::function, the captured values don't have to be copyable.
/// This class can be moved but not copied.
template <typename ResultType, typename... Args>
class UniqueFunction {
class CallableBase {
public:
virtual ~CallableBase() = default;
virtual ResultType operator()(Args&&...) = 0;
};
template <typename Functor>
class Callable final : public CallableBase {
public:
Callable(Functor&& functor_) : functor{std::move(functor_)} {}
~Callable() override = default;
ResultType operator()(Args&&... args) override {
return functor(std::forward<Args>(args)...);
}
private:
Functor functor;
};
public:
UniqueFunction() = default;
template <typename Functor>
UniqueFunction(Functor&& functor)
: callable{std::make_unique<Callable<Functor>>(std::move(functor))} {}
UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default;
UniqueFunction(UniqueFunction&& rhs) noexcept = default;
UniqueFunction& operator=(const UniqueFunction&) = delete;
UniqueFunction(const UniqueFunction&) = delete;
ResultType operator()(Args&&... args) const {
return (*callable)(std::forward<Args>(args)...);
}
explicit operator bool() const noexcept {
return static_cast<bool>(callable);
}
private:
std::unique_ptr<CallableBase> callable;
};
} // namespace Common
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <utility>
namespace Common {
/// General purpose function wrapper similar to std::function.
/// Unlike std::function, the captured values don't have to be copyable.
/// This class can be moved but not copied.
template <typename ResultType, typename... Args>
class UniqueFunction {
class CallableBase {
public:
virtual ~CallableBase() = default;
virtual ResultType operator()(Args&&...) = 0;
};
template <typename Functor>
class Callable final : public CallableBase {
public:
Callable(Functor&& functor_) : functor{std::move(functor_)} {}
~Callable() override = default;
ResultType operator()(Args&&... args) override {
return functor(std::forward<Args>(args)...);
}
private:
Functor functor;
};
public:
UniqueFunction() = default;
template <typename Functor>
UniqueFunction(Functor&& functor)
: callable{std::make_unique<Callable<Functor>>(std::move(functor))} {}
UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default;
UniqueFunction(UniqueFunction&& rhs) noexcept = default;
UniqueFunction& operator=(const UniqueFunction&) = delete;
UniqueFunction(const UniqueFunction&) = delete;
ResultType operator()(Args&&... args) const {
return (*callable)(std::forward<Args>(args)...);
}
explicit operator bool() const noexcept {
return static_cast<bool>(callable);
}
private:
std::unique_ptr<CallableBase> callable;
};
} // namespace Common

View file

@ -1,484 +1,484 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "fiber.h"
#include "common/logging/log.h"
#include "core/libraries/fiber/fiber_error.h"
#include "core/libraries/libs.h"
#include "core/tls.h"
namespace Libraries::Fiber {
static constexpr u32 kFiberSignature0 = 0xdef1649c;
static constexpr u32 kFiberSignature1 = 0xb37592a0;
static constexpr u32 kFiberOptSignature = 0xbb40e64d;
static constexpr u64 kFiberStackSignature = 0x7149f2ca7149f2ca;
static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef;
static std::atomic<u32> context_size_check = false;
OrbisFiberContext* GetFiberContext() {
return Core::GetTcbBase()->tcb_fiber;
}
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp");
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp");
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data,
bool set_fpu) asm("_sceFiberSwitchEntry");
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit");
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) {
OrbisFiberContext* g_ctx = GetFiberContext();
g_ctx->return_val = ret;
_sceFiberLongJmp(g_ctx);
}
void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) {
u64* stack_base = reinterpret_cast<u64*>(ctx->current_fiber->addr_context);
if (stack_base && *stack_base != kFiberStackSignature) {
UNREACHABLE_MSG("Stack overflow detected in fiber.");
}
}
void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to,
OrbisFiberContext* ctx) {
OrbisFiberContext* fiber_ctx = fiber->context;
if (fiber_ctx) {
ctx->arg_on_run_to = arg_on_run_to;
_sceFiberLongJmp(fiber_ctx);
__builtin_trap();
}
OrbisFiberData data{};
if (ctx->prev_fiber) {
OrbisFiber* prev_fiber = ctx->prev_fiber;
ctx->prev_fiber = nullptr;
data.state = reinterpret_cast<u32*>(&prev_fiber->state);
} else {
data.state = nullptr;
}
data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to;
data.stack_addr =
reinterpret_cast<void*>(reinterpret_cast<u64>(fiber->addr_context) + fiber->size_context);
if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f;
data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true);
} else {
_sceFiberSwitchEntry(&data, false);
}
__builtin_trap();
}
void PS4_SYSV_ABI _sceFiberSwitch(OrbisFiber* cur_fiber, OrbisFiber* fiber, u64 arg_on_run_to,
OrbisFiberContext* ctx) {
ctx->prev_fiber = cur_fiber;
ctx->current_fiber = fiber;
if (fiber->addr_context == nullptr) {
ctx->prev_fiber = nullptr;
OrbisFiberData data{};
data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to;
data.stack_addr = reinterpret_cast<void*>(ctx->rsp & ~15);
data.state = reinterpret_cast<u32*>(&cur_fiber->state);
if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f;
data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true);
} else {
_sceFiberSwitchEntry(&data, false);
}
__builtin_trap();
}
_sceFiberSwitchToFiber(fiber, arg_on_run_to, ctx);
__builtin_trap();
}
void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, OrbisFiberContext* ctx) {
ctx->arg_on_return = arg_on_return;
_sceFiberLongJmp(ctx);
__builtin_trap();
}
s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry,
u64 arg_on_initialize, void* addr_context, u64 size_context,
const OrbisFiberOptParam* opt_param, u32 build_ver) {
if (!fiber || !name || !entry) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7 || (u64)addr_context & 15) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (opt_param && (u64)opt_param & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) {
return ORBIS_FIBER_ERROR_RANGE;
}
if (size_context & 15) {
return ORBIS_FIBER_ERROR_INVALID;
}
if (!addr_context && size_context) {
return ORBIS_FIBER_ERROR_INVALID;
}
if (addr_context && !size_context) {
return ORBIS_FIBER_ERROR_INVALID;
}
if (opt_param && opt_param->magic != kFiberOptSignature) {
return ORBIS_FIBER_ERROR_INVALID;
}
u32 flags = FiberFlags::None;
if (build_ver >= 0x3500000) {
flags |= FiberFlags::SetFpuRegs;
}
if (context_size_check) {
flags |= FiberFlags::ContextSizeCheck;
}
strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH);
fiber->entry = entry;
fiber->arg_on_initialize = arg_on_initialize;
fiber->addr_context = addr_context;
fiber->size_context = size_context;
fiber->context = nullptr;
fiber->flags = flags;
/*
A low stack area is problematic, as we can easily
cause a stack overflow with our HLE.
*/
if (size_context && size_context <= 4096) {
LOG_WARNING(Lib_Fiber, "Fiber initialized with small stack area.");
}
fiber->magic_start = kFiberSignature0;
fiber->magic_end = kFiberSignature1;
if (addr_context != nullptr) {
fiber->context_start = addr_context;
fiber->context_end =
reinterpret_cast<void*>(reinterpret_cast<u64>(addr_context) + size_context);
/* Apply signature to start of stack */
*(u64*)addr_context = kFiberStackSignature;
if (flags & FiberFlags::ContextSizeCheck) {
u64* stack_start = reinterpret_cast<u64*>(fiber->context_start);
u64* stack_end = reinterpret_cast<u64*>(fiber->context_end);
u64* stack_ptr = stack_start + 1;
while (stack_ptr < stack_end) {
*stack_ptr++ = kFiberStackSizeCheck;
}
}
}
fiber->state = FiberState::Idle;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param) {
if (!opt_param) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)opt_param & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
opt_param->magic = kFiberOptSignature;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) {
if (!fiber) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Terminated)) {
return ORBIS_FIBER_ERROR_STATE;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) {
if (!fiber) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
Core::Tcb* tcb = Core::GetTcbBase();
if (tcb->tcb_fiber) {
return ORBIS_FIBER_ERROR_PERMISSION;
}
FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) {
return ORBIS_FIBER_ERROR_STATE;
}
OrbisFiberContext ctx{};
ctx.current_fiber = fiber;
ctx.prev_fiber = nullptr;
ctx.return_val = 0;
tcb->tcb_fiber = &ctx;
s32 jmp = _sceFiberSetJmp(&ctx);
if (!jmp) {
if (fiber->addr_context) {
_sceFiberSwitchToFiber(fiber, arg_on_run_to, &ctx);
__builtin_trap();
}
OrbisFiberData data{};
data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to;
data.stack_addr = reinterpret_cast<void*>(ctx.rsp & ~15);
data.state = nullptr;
if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f;
data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true);
} else {
_sceFiberSwitchEntry(&data, false);
}
}
OrbisFiber* cur_fiber = ctx.current_fiber;
ctx.current_fiber = nullptr;
cur_fiber->state = FiberState::Idle;
if (ctx.return_val != 0) {
/* Fiber entry returned! This should never happen. */
UNREACHABLE_MSG("Fiber entry function returned.");
}
if (arg_on_return) {
*arg_on_return = ctx.arg_on_return;
}
tcb->tcb_fiber = nullptr;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) {
if (!fiber) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION;
}
FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) {
return ORBIS_FIBER_ERROR_STATE;
}
OrbisFiber* cur_fiber = g_ctx->current_fiber;
if (cur_fiber->addr_context == nullptr) {
_sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx);
__builtin_trap();
}
OrbisFiberContext ctx{};
s32 jmp = _sceFiberSetJmp(&ctx);
if (!jmp) {
cur_fiber->context = &ctx;
_sceFiberCheckStackOverflow(g_ctx);
_sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx);
__builtin_trap();
}
g_ctx = GetFiberContext();
if (g_ctx->prev_fiber) {
g_ctx->prev_fiber->state = FiberState::Idle;
g_ctx->prev_fiber = nullptr;
}
if (arg_on_run) {
*arg_on_run = g_ctx->arg_on_run_to;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber) {
if (!fiber) {
return ORBIS_FIBER_ERROR_NULL;
}
OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION;
}
*fiber = g_ctx->current_fiber;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run) {
OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION;
}
OrbisFiber* cur_fiber = g_ctx->current_fiber;
if (cur_fiber->addr_context) {
OrbisFiberContext ctx{};
s32 jmp = _sceFiberSetJmp(&ctx);
if (jmp) {
g_ctx = GetFiberContext();
if (g_ctx->prev_fiber) {
g_ctx->prev_fiber->state = FiberState::Idle;
g_ctx->prev_fiber = nullptr;
}
if (arg_on_run) {
*arg_on_run = g_ctx->arg_on_run_to;
}
return ORBIS_OK;
}
cur_fiber->context = &ctx;
_sceFiberCheckStackOverflow(g_ctx);
}
_sceFiberTerminate(cur_fiber, arg_on_return, g_ctx);
__builtin_trap();
}
s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info) {
if (!fiber || !fiber_info) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7 || (u64)fiber_info & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber_info->size != sizeof(OrbisFiberInfo)) {
return ORBIS_FIBER_ERROR_INVALID;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
fiber_info->entry = fiber->entry;
fiber_info->arg_on_initialize = fiber->arg_on_initialize;
fiber_info->addr_context = fiber->addr_context;
fiber_info->size_context = fiber->size_context;
strncpy(fiber_info->name, fiber->name, ORBIS_FIBER_MAX_NAME_LENGTH);
fiber_info->size_context_margin = -1;
if (fiber->flags & FiberFlags::ContextSizeCheck && fiber->addr_context != nullptr) {
u64 stack_margin = 0;
u64* stack_start = reinterpret_cast<u64*>(fiber->context_start);
u64* stack_end = reinterpret_cast<u64*>(fiber->context_end);
if (*stack_start == kFiberStackSignature) {
u64* stack_ptr = stack_start + 1;
while (stack_ptr < stack_end) {
if (*stack_ptr == kFiberStackSizeCheck) {
stack_ptr++;
}
}
stack_margin =
reinterpret_cast<u64>(stack_ptr) - reinterpret_cast<u64>(stack_start + 1);
}
fiber_info->size_context_margin = stack_margin;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags) {
if (flags != 0) {
return ORBIS_FIBER_ERROR_INVALID;
}
u32 expected = 0;
if (!context_size_check.compare_exchange_strong(expected, 1u)) {
return ORBIS_FIBER_ERROR_STATE;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck() {
u32 expected = 1;
if (!context_size_check.compare_exchange_strong(expected, 0u)) {
return ORBIS_FIBER_ERROR_STATE;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) {
if (!fiber || !name) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH);
return ORBIS_OK;
}
void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize);
LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize);
LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize);
LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize);
LIB_FUNCTION("a0LLrZWac0M", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRun);
LIB_FUNCTION("PFT2S-tJ7Uk", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberSwitch);
LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf);
LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread);
LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo);
LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1,
sceFiberStartContextSizeCheck);
LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1,
sceFiberStopContextSizeCheck);
LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename);
}
} // namespace Libraries::Fiber
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "fiber.h"
#include "common/logging/log.h"
#include "core/libraries/fiber/fiber_error.h"
#include "core/libraries/libs.h"
#include "core/tls.h"
namespace Libraries::Fiber {
static constexpr u32 kFiberSignature0 = 0xdef1649c;
static constexpr u32 kFiberSignature1 = 0xb37592a0;
static constexpr u32 kFiberOptSignature = 0xbb40e64d;
static constexpr u64 kFiberStackSignature = 0x7149f2ca7149f2ca;
static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef;
static std::atomic<u32> context_size_check = false;
OrbisFiberContext* GetFiberContext() {
return Core::GetTcbBase()->tcb_fiber;
}
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp");
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp");
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data,
bool set_fpu) asm("_sceFiberSwitchEntry");
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit");
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) {
OrbisFiberContext* g_ctx = GetFiberContext();
g_ctx->return_val = ret;
_sceFiberLongJmp(g_ctx);
}
void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) {
u64* stack_base = reinterpret_cast<u64*>(ctx->current_fiber->addr_context);
if (stack_base && *stack_base != kFiberStackSignature) {
UNREACHABLE_MSG("Stack overflow detected in fiber.");
}
}
void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to,
OrbisFiberContext* ctx) {
OrbisFiberContext* fiber_ctx = fiber->context;
if (fiber_ctx) {
ctx->arg_on_run_to = arg_on_run_to;
_sceFiberLongJmp(fiber_ctx);
__builtin_trap();
}
OrbisFiberData data{};
if (ctx->prev_fiber) {
OrbisFiber* prev_fiber = ctx->prev_fiber;
ctx->prev_fiber = nullptr;
data.state = reinterpret_cast<u32*>(&prev_fiber->state);
} else {
data.state = nullptr;
}
data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to;
data.stack_addr =
reinterpret_cast<void*>(reinterpret_cast<u64>(fiber->addr_context) + fiber->size_context);
if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f;
data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true);
} else {
_sceFiberSwitchEntry(&data, false);
}
__builtin_trap();
}
void PS4_SYSV_ABI _sceFiberSwitch(OrbisFiber* cur_fiber, OrbisFiber* fiber, u64 arg_on_run_to,
OrbisFiberContext* ctx) {
ctx->prev_fiber = cur_fiber;
ctx->current_fiber = fiber;
if (fiber->addr_context == nullptr) {
ctx->prev_fiber = nullptr;
OrbisFiberData data{};
data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to;
data.stack_addr = reinterpret_cast<void*>(ctx->rsp & ~15);
data.state = reinterpret_cast<u32*>(&cur_fiber->state);
if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f;
data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true);
} else {
_sceFiberSwitchEntry(&data, false);
}
__builtin_trap();
}
_sceFiberSwitchToFiber(fiber, arg_on_run_to, ctx);
__builtin_trap();
}
void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, OrbisFiberContext* ctx) {
ctx->arg_on_return = arg_on_return;
_sceFiberLongJmp(ctx);
__builtin_trap();
}
s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry,
u64 arg_on_initialize, void* addr_context, u64 size_context,
const OrbisFiberOptParam* opt_param, u32 build_ver) {
if (!fiber || !name || !entry) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7 || (u64)addr_context & 15) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (opt_param && (u64)opt_param & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) {
return ORBIS_FIBER_ERROR_RANGE;
}
if (size_context & 15) {
return ORBIS_FIBER_ERROR_INVALID;
}
if (!addr_context && size_context) {
return ORBIS_FIBER_ERROR_INVALID;
}
if (addr_context && !size_context) {
return ORBIS_FIBER_ERROR_INVALID;
}
if (opt_param && opt_param->magic != kFiberOptSignature) {
return ORBIS_FIBER_ERROR_INVALID;
}
u32 flags = FiberFlags::None;
if (build_ver >= 0x3500000) {
flags |= FiberFlags::SetFpuRegs;
}
if (context_size_check) {
flags |= FiberFlags::ContextSizeCheck;
}
strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH);
fiber->entry = entry;
fiber->arg_on_initialize = arg_on_initialize;
fiber->addr_context = addr_context;
fiber->size_context = size_context;
fiber->context = nullptr;
fiber->flags = flags;
/*
A low stack area is problematic, as we can easily
cause a stack overflow with our HLE.
*/
if (size_context && size_context <= 4096) {
LOG_WARNING(Lib_Fiber, "Fiber initialized with small stack area.");
}
fiber->magic_start = kFiberSignature0;
fiber->magic_end = kFiberSignature1;
if (addr_context != nullptr) {
fiber->context_start = addr_context;
fiber->context_end =
reinterpret_cast<void*>(reinterpret_cast<u64>(addr_context) + size_context);
/* Apply signature to start of stack */
*(u64*)addr_context = kFiberStackSignature;
if (flags & FiberFlags::ContextSizeCheck) {
u64* stack_start = reinterpret_cast<u64*>(fiber->context_start);
u64* stack_end = reinterpret_cast<u64*>(fiber->context_end);
u64* stack_ptr = stack_start + 1;
while (stack_ptr < stack_end) {
*stack_ptr++ = kFiberStackSizeCheck;
}
}
}
fiber->state = FiberState::Idle;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param) {
if (!opt_param) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)opt_param & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
opt_param->magic = kFiberOptSignature;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) {
if (!fiber) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Terminated)) {
return ORBIS_FIBER_ERROR_STATE;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) {
if (!fiber) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
Core::Tcb* tcb = Core::GetTcbBase();
if (tcb->tcb_fiber) {
return ORBIS_FIBER_ERROR_PERMISSION;
}
FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) {
return ORBIS_FIBER_ERROR_STATE;
}
OrbisFiberContext ctx{};
ctx.current_fiber = fiber;
ctx.prev_fiber = nullptr;
ctx.return_val = 0;
tcb->tcb_fiber = &ctx;
s32 jmp = _sceFiberSetJmp(&ctx);
if (!jmp) {
if (fiber->addr_context) {
_sceFiberSwitchToFiber(fiber, arg_on_run_to, &ctx);
__builtin_trap();
}
OrbisFiberData data{};
data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to;
data.stack_addr = reinterpret_cast<void*>(ctx.rsp & ~15);
data.state = nullptr;
if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f;
data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true);
} else {
_sceFiberSwitchEntry(&data, false);
}
}
OrbisFiber* cur_fiber = ctx.current_fiber;
ctx.current_fiber = nullptr;
cur_fiber->state = FiberState::Idle;
if (ctx.return_val != 0) {
/* Fiber entry returned! This should never happen. */
UNREACHABLE_MSG("Fiber entry function returned.");
}
if (arg_on_return) {
*arg_on_return = ctx.arg_on_return;
}
tcb->tcb_fiber = nullptr;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) {
if (!fiber) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION;
}
FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) {
return ORBIS_FIBER_ERROR_STATE;
}
OrbisFiber* cur_fiber = g_ctx->current_fiber;
if (cur_fiber->addr_context == nullptr) {
_sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx);
__builtin_trap();
}
OrbisFiberContext ctx{};
s32 jmp = _sceFiberSetJmp(&ctx);
if (!jmp) {
cur_fiber->context = &ctx;
_sceFiberCheckStackOverflow(g_ctx);
_sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx);
__builtin_trap();
}
g_ctx = GetFiberContext();
if (g_ctx->prev_fiber) {
g_ctx->prev_fiber->state = FiberState::Idle;
g_ctx->prev_fiber = nullptr;
}
if (arg_on_run) {
*arg_on_run = g_ctx->arg_on_run_to;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber) {
if (!fiber) {
return ORBIS_FIBER_ERROR_NULL;
}
OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION;
}
*fiber = g_ctx->current_fiber;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run) {
OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION;
}
OrbisFiber* cur_fiber = g_ctx->current_fiber;
if (cur_fiber->addr_context) {
OrbisFiberContext ctx{};
s32 jmp = _sceFiberSetJmp(&ctx);
if (jmp) {
g_ctx = GetFiberContext();
if (g_ctx->prev_fiber) {
g_ctx->prev_fiber->state = FiberState::Idle;
g_ctx->prev_fiber = nullptr;
}
if (arg_on_run) {
*arg_on_run = g_ctx->arg_on_run_to;
}
return ORBIS_OK;
}
cur_fiber->context = &ctx;
_sceFiberCheckStackOverflow(g_ctx);
}
_sceFiberTerminate(cur_fiber, arg_on_return, g_ctx);
__builtin_trap();
}
s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info) {
if (!fiber || !fiber_info) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7 || (u64)fiber_info & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber_info->size != sizeof(OrbisFiberInfo)) {
return ORBIS_FIBER_ERROR_INVALID;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
fiber_info->entry = fiber->entry;
fiber_info->arg_on_initialize = fiber->arg_on_initialize;
fiber_info->addr_context = fiber->addr_context;
fiber_info->size_context = fiber->size_context;
strncpy(fiber_info->name, fiber->name, ORBIS_FIBER_MAX_NAME_LENGTH);
fiber_info->size_context_margin = -1;
if (fiber->flags & FiberFlags::ContextSizeCheck && fiber->addr_context != nullptr) {
u64 stack_margin = 0;
u64* stack_start = reinterpret_cast<u64*>(fiber->context_start);
u64* stack_end = reinterpret_cast<u64*>(fiber->context_end);
if (*stack_start == kFiberStackSignature) {
u64* stack_ptr = stack_start + 1;
while (stack_ptr < stack_end) {
if (*stack_ptr == kFiberStackSizeCheck) {
stack_ptr++;
}
}
stack_margin =
reinterpret_cast<u64>(stack_ptr) - reinterpret_cast<u64>(stack_start + 1);
}
fiber_info->size_context_margin = stack_margin;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags) {
if (flags != 0) {
return ORBIS_FIBER_ERROR_INVALID;
}
u32 expected = 0;
if (!context_size_check.compare_exchange_strong(expected, 1u)) {
return ORBIS_FIBER_ERROR_STATE;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck() {
u32 expected = 1;
if (!context_size_check.compare_exchange_strong(expected, 0u)) {
return ORBIS_FIBER_ERROR_STATE;
}
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) {
if (!fiber || !name) {
return ORBIS_FIBER_ERROR_NULL;
}
if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT;
}
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID;
}
strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH);
return ORBIS_OK;
}
void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize);
LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize);
LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize);
LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize);
LIB_FUNCTION("a0LLrZWac0M", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRun);
LIB_FUNCTION("PFT2S-tJ7Uk", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberSwitch);
LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf);
LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread);
LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo);
LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1,
sceFiberStartContextSizeCheck);
LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1,
sceFiberStopContextSizeCheck);
LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename);
}
} // namespace Libraries::Fiber

View file

@ -1,118 +1,118 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
#include <atomic>
namespace Core::Loader {
class SymbolsResolver;
}
namespace Libraries::Fiber {
#define ORBIS_FIBER_MAX_NAME_LENGTH (31)
#define ORBIS_FIBER_CONTEXT_MINIMUM_SIZE (512)
typedef void PS4_SYSV_ABI (*OrbisFiberEntry)(u64 arg_on_initialize, u64 arg_on_run);
enum FiberState : u32 {
Run = 1u,
Idle = 2u,
Terminated = 3u,
};
enum FiberFlags : u32 {
None = 0x0,
NoUlobjmgr = 0x1,
ContextSizeCheck = 0x10,
SetFpuRegs = 0x100,
};
struct OrbisFiber;
struct OrbisFiberContext {
struct {
u64 rax, rcx, rdx, rbx, rsp, rbp, r8, r9, r10, r11, r12, r13, r14, r15;
u16 fpucw;
u32 mxcsr;
};
OrbisFiber* current_fiber;
OrbisFiber* prev_fiber;
u64 arg_on_run_to;
u64 arg_on_return;
u64 return_val;
};
struct OrbisFiberData {
OrbisFiberEntry entry;
u64 arg_on_initialize;
u64 arg_on_run_to;
void* stack_addr;
u32* state;
u16 fpucw;
s8 pad[2];
u32 mxcsr;
};
struct OrbisFiber {
u32 magic_start;
std::atomic<FiberState> state;
OrbisFiberEntry entry;
u64 arg_on_initialize;
void* addr_context;
u64 size_context;
char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1];
OrbisFiberContext* context;
u32 flags;
void* context_start;
void* context_end;
u32 magic_end;
};
static_assert(sizeof(OrbisFiber) <= 256);
struct OrbisFiberInfo {
u64 size;
OrbisFiberEntry entry;
u64 arg_on_initialize;
void* addr_context;
u64 size_context;
char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1];
u64 size_context_margin;
u8 pad[48];
};
static_assert(sizeof(OrbisFiberInfo) == 128);
struct OrbisFiberOptParam {
u32 magic;
};
static_assert(sizeof(OrbisFiberOptParam) <= 128);
s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry,
u64 arg_on_initialize, void* addr_context, u64 size_context,
const OrbisFiberOptParam* opt_param, u32 build_version);
s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param);
s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber);
s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return);
s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run);
s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber);
s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run);
s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info);
s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags);
s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void);
s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name);
void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym);
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/types.h"
#include <atomic>
namespace Core::Loader {
class SymbolsResolver;
}
namespace Libraries::Fiber {
#define ORBIS_FIBER_MAX_NAME_LENGTH (31)
#define ORBIS_FIBER_CONTEXT_MINIMUM_SIZE (512)
typedef void PS4_SYSV_ABI (*OrbisFiberEntry)(u64 arg_on_initialize, u64 arg_on_run);
enum FiberState : u32 {
Run = 1u,
Idle = 2u,
Terminated = 3u,
};
enum FiberFlags : u32 {
None = 0x0,
NoUlobjmgr = 0x1,
ContextSizeCheck = 0x10,
SetFpuRegs = 0x100,
};
struct OrbisFiber;
struct OrbisFiberContext {
struct {
u64 rax, rcx, rdx, rbx, rsp, rbp, r8, r9, r10, r11, r12, r13, r14, r15;
u16 fpucw;
u32 mxcsr;
};
OrbisFiber* current_fiber;
OrbisFiber* prev_fiber;
u64 arg_on_run_to;
u64 arg_on_return;
u64 return_val;
};
struct OrbisFiberData {
OrbisFiberEntry entry;
u64 arg_on_initialize;
u64 arg_on_run_to;
void* stack_addr;
u32* state;
u16 fpucw;
s8 pad[2];
u32 mxcsr;
};
struct OrbisFiber {
u32 magic_start;
std::atomic<FiberState> state;
OrbisFiberEntry entry;
u64 arg_on_initialize;
void* addr_context;
u64 size_context;
char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1];
OrbisFiberContext* context;
u32 flags;
void* context_start;
void* context_end;
u32 magic_end;
};
static_assert(sizeof(OrbisFiber) <= 256);
struct OrbisFiberInfo {
u64 size;
OrbisFiberEntry entry;
u64 arg_on_initialize;
void* addr_context;
u64 size_context;
char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1];
u64 size_context_margin;
u8 pad[48];
};
static_assert(sizeof(OrbisFiberInfo) == 128);
struct OrbisFiberOptParam {
u32 magic;
};
static_assert(sizeof(OrbisFiberOptParam) <= 128);
s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry,
u64 arg_on_initialize, void* addr_context, u64 size_context,
const OrbisFiberOptParam* opt_param, u32 build_version);
s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param);
s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber);
s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return);
s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run);
s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber);
s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run);
s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info);
s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags);
s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void);
s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name);
void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::Fiber

View file

@ -1,121 +1,121 @@
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
.global _sceFiberSetJmp
_sceFiberSetJmp:
movq %rax, 0x0(%rdi)
movq (%rsp), %rdx
movq %rdx, 0x10(%rdi)
movq %rcx, 0x08(%rdi)
movq %rbx, 0x18(%rdi)
movq %rsp, 0x20(%rdi)
movq %rbp, 0x28(%rdi)
movq %r8, 0x30(%rdi)
movq %r9, 0x38(%rdi)
movq %r10, 0x40(%rdi)
movq %r11, 0x48(%rdi)
movq %r12, 0x50(%rdi)
movq %r13, 0x58(%rdi)
movq %r14, 0x60(%rdi)
movq %r15, 0x68(%rdi)
fnstcw 0x70(%rdi)
stmxcsr 0x72(%rdi)
xor %eax, %eax
ret
.global _sceFiberLongJmp
_sceFiberLongJmp:
# MXCSR = (MXCSR & 0x3f) ^ (ctx->mxcsr & ~0x3f)
stmxcsr -0x4(%rsp)
movl 0x72(%rdi), %eax
andl $0xffffffc0, %eax
movl -0x4(%rsp), %ecx
andl $0x3f, %ecx
xorl %eax, %ecx
movl %ecx, -0x4(%rsp)
ldmxcsr -0x4(%rsp)
movq 0x00(%rdi), %rax
movq 0x08(%rdi), %rcx
movq 0x10(%rdi), %rdx
movq 0x18(%rdi), %rbx
movq 0x20(%rdi), %rsp
movq 0x28(%rdi), %rbp
movq 0x30(%rdi), %r8
movq 0x38(%rdi), %r9
movq 0x40(%rdi), %r10
movq 0x48(%rdi), %r11
movq 0x50(%rdi), %r12
movq 0x58(%rdi), %r13
movq 0x60(%rdi), %r14
movq 0x68(%rdi), %r15
fldcw 0x70(%rdi)
# Make the jump and return 1
movq %rdx, 0x00(%rsp)
movl $0x1, %eax
ret
.global _sceFiberSwitchEntry
_sceFiberSwitchEntry:
mov %rdi, %r11
# Set stack address to provided stack
movq 0x18(%r11), %rsp
xorl %ebp, %ebp
movq 0x20(%r11), %r10 # data->state
# Set previous fiber state to Idle
test %r10, %r10
jz .clear_regs
movl $2, (%r10)
.clear_regs:
test %esi, %esi
jz .skip_fpu_regs
ldmxcsr 0x2c(%r11)
fldcw 0x28(%r11)
.skip_fpu_regs:
movq 0x08(%r11), %rdi # data->arg_on_initialize
movq 0x10(%r11), %rsi # data->arg_on_run_to
movq 0x00(%r11), %r11 # data->entry
xorl %eax, %eax
xorl %ebx, %ebx
xorl %ecx, %ecx
xorl %edx, %edx
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15
pxor %mm0, %mm0
pxor %mm1, %mm1
pxor %mm2, %mm2
pxor %mm3, %mm3
pxor %mm4, %mm4
pxor %mm5, %mm5
pxor %mm6, %mm6
pxor %mm7, %mm7
emms
vzeroall
# Call the fiber's entry function: entry(arg_on_initialize, arg_on_run_to)
call *%r11
# Fiber returned, not good
movl $1, %edi
call _sceFiberForceQuit
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
.global _sceFiberSetJmp
_sceFiberSetJmp:
movq %rax, 0x0(%rdi)
movq (%rsp), %rdx
movq %rdx, 0x10(%rdi)
movq %rcx, 0x08(%rdi)
movq %rbx, 0x18(%rdi)
movq %rsp, 0x20(%rdi)
movq %rbp, 0x28(%rdi)
movq %r8, 0x30(%rdi)
movq %r9, 0x38(%rdi)
movq %r10, 0x40(%rdi)
movq %r11, 0x48(%rdi)
movq %r12, 0x50(%rdi)
movq %r13, 0x58(%rdi)
movq %r14, 0x60(%rdi)
movq %r15, 0x68(%rdi)
fnstcw 0x70(%rdi)
stmxcsr 0x72(%rdi)
xor %eax, %eax
ret
.global _sceFiberLongJmp
_sceFiberLongJmp:
# MXCSR = (MXCSR & 0x3f) ^ (ctx->mxcsr & ~0x3f)
stmxcsr -0x4(%rsp)
movl 0x72(%rdi), %eax
andl $0xffffffc0, %eax
movl -0x4(%rsp), %ecx
andl $0x3f, %ecx
xorl %eax, %ecx
movl %ecx, -0x4(%rsp)
ldmxcsr -0x4(%rsp)
movq 0x00(%rdi), %rax
movq 0x08(%rdi), %rcx
movq 0x10(%rdi), %rdx
movq 0x18(%rdi), %rbx
movq 0x20(%rdi), %rsp
movq 0x28(%rdi), %rbp
movq 0x30(%rdi), %r8
movq 0x38(%rdi), %r9
movq 0x40(%rdi), %r10
movq 0x48(%rdi), %r11
movq 0x50(%rdi), %r12
movq 0x58(%rdi), %r13
movq 0x60(%rdi), %r14
movq 0x68(%rdi), %r15
fldcw 0x70(%rdi)
# Make the jump and return 1
movq %rdx, 0x00(%rsp)
movl $0x1, %eax
ret
.global _sceFiberSwitchEntry
_sceFiberSwitchEntry:
mov %rdi, %r11
# Set stack address to provided stack
movq 0x18(%r11), %rsp
xorl %ebp, %ebp
movq 0x20(%r11), %r10 # data->state
# Set previous fiber state to Idle
test %r10, %r10
jz .clear_regs
movl $2, (%r10)
.clear_regs:
test %esi, %esi
jz .skip_fpu_regs
ldmxcsr 0x2c(%r11)
fldcw 0x28(%r11)
.skip_fpu_regs:
movq 0x08(%r11), %rdi # data->arg_on_initialize
movq 0x10(%r11), %rsi # data->arg_on_run_to
movq 0x00(%r11), %r11 # data->entry
xorl %eax, %eax
xorl %ebx, %ebx
xorl %ecx, %ecx
xorl %edx, %edx
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15
pxor %mm0, %mm0
pxor %mm1, %mm1
pxor %mm2, %mm2
pxor %mm3, %mm3
pxor %mm4, %mm4
pxor %mm5, %mm5
pxor %mm6, %mm6
pxor %mm7, %mm7
emms
vzeroall
# Call the fiber's entry function: entry(arg_on_initialize, arg_on_run_to)
call *%r11
# Fiber returned, not good
movl $1, %edi
call _sceFiberForceQuit
ret

View file

@ -1,183 +1,183 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "core/libraries/rtc/rtc.h"
enum class OrbisImeType : u32 {
Default = 0,
BasicLatin = 1,
Url = 2,
Mail = 3,
Number = 4,
};
enum class OrbisImeHorizontalAlignment : u32 {
Left = 0,
Center = 1,
Right = 2,
};
enum class OrbisImeVerticalAlignment : u32 {
Top = 0,
Center = 1,
Bottom = 2,
};
enum class OrbisImeEnterLabel : u32 {
Default = 0,
Send = 1,
Search = 2,
Go = 3,
};
enum class OrbisImeInputMethod : u32 {
Default = 0,
};
enum class OrbisImeEventId : u32 {
Open = 0,
UpdateText = 1,
UpdateCaret = 2,
PressClose = 4,
PressEnter = 5,
Abort = 6,
CandidateListStart = 7,
CandidateListEnd = 8,
CandidateWord = 9,
CandidateIndex = 10,
CandidateDone = 11,
CandidateCancel = 12,
ChangeDevice = 14,
ChangeInputMethodState = 18,
KeyboardOpen = 256,
KeyboardKeycodeDoen = 257,
KeyboardKeycodeUp = 258,
KeyboardKeycodeRepeat = 259,
KeyboardConnection = 260,
KeyboardDisconnection = 261,
KeyboardAbort = 262,
};
enum class OrbisImeKeyboardType : u32 {
NONE = 0,
DANISH = 1,
GERMAN = 2,
GERMAN_SW = 3,
ENGLISH_US = 4,
ENGLISH_GB = 5,
SPANISH = 6,
SPANISH_LA = 7,
FINNISH = 8,
FRENCH = 9,
FRENCH_BR = 10,
FRENCH_CA = 11,
FRENCH_SW = 12,
ITALIAN = 13,
DUTCH = 14,
NORWEGIAN = 15,
POLISH = 16,
PORTUGUESE_BR = 17,
PORTUGUESE_PT = 18,
RUSSIAN = 19,
SWEDISH = 20,
TURKISH = 21,
JAPANESE_ROMAN = 22,
JAPANESE_KANA = 23,
KOREAN = 24,
SM_CHINESE = 25,
TR_CHINESE_ZY = 26,
TR_CHINESE_PY_HK = 27,
TR_CHINESE_PY_TW = 28,
TR_CHINESE_CG = 29,
ARABIC_AR = 30,
THAI = 31,
CZECH = 32,
GREEK = 33,
INDONESIAN = 34,
VIETNAMESE = 35,
ROMANIAN = 36,
HUNGARIAN = 37,
};
enum class OrbisImeDeviceType : u32 {
None = 0,
Controller = 1,
ExtKeyboard = 2,
RemoteOsk = 3,
};
struct OrbisImeRect {
f32 x;
f32 y;
u32 width;
u32 height;
};
struct OrbisImeTextAreaProperty {
u32 mode; // OrbisImeTextAreaMode
u32 index;
s32 length;
};
struct OrbisImeEditText {
char16_t* str;
u32 caret_index;
u32 area_num;
OrbisImeTextAreaProperty text_area[4];
};
struct OrbisImeKeycode {
u16 keycode;
char16_t character;
u32 status;
OrbisImeKeyboardType type;
s32 user_id;
u32 resource_id;
Libraries::Rtc::OrbisRtcTick timestamp;
};
struct OrbisImeKeyboardResourceIdArray {
s32 userId;
u32 resourceId[5];
};
enum class OrbisImeCaretMovementDirection : u32 {
Still = 0,
Left = 1,
Right = 2,
Up = 3,
Down = 4,
Home = 5,
End = 6,
PageUp = 7,
PageDown = 8,
Top = 9,
Bottom = 10,
};
union OrbisImeEventParam {
OrbisImeRect rect;
OrbisImeEditText text;
OrbisImeCaretMovementDirection caret_move;
OrbisImeKeycode keycode;
OrbisImeKeyboardResourceIdArray resource_id_array;
char16_t* candidate_word;
s32 candidate_index;
OrbisImeDeviceType device_type;
u32 input_method_state;
s8 reserved[64];
};
struct OrbisImeEvent {
OrbisImeEventId id;
OrbisImeEventParam param;
};
using OrbisImeTextFilter = PS4_SYSV_ABI int (*)(char16_t* outText, u32* outTextLength,
const char16_t* srcText, u32 srcTextLength);
using OrbisImeEventHandler = PS4_SYSV_ABI void (*)(void* arg, const OrbisImeEvent* e);
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "core/libraries/rtc/rtc.h"
enum class OrbisImeType : u32 {
Default = 0,
BasicLatin = 1,
Url = 2,
Mail = 3,
Number = 4,
};
enum class OrbisImeHorizontalAlignment : u32 {
Left = 0,
Center = 1,
Right = 2,
};
enum class OrbisImeVerticalAlignment : u32 {
Top = 0,
Center = 1,
Bottom = 2,
};
enum class OrbisImeEnterLabel : u32 {
Default = 0,
Send = 1,
Search = 2,
Go = 3,
};
enum class OrbisImeInputMethod : u32 {
Default = 0,
};
enum class OrbisImeEventId : u32 {
Open = 0,
UpdateText = 1,
UpdateCaret = 2,
PressClose = 4,
PressEnter = 5,
Abort = 6,
CandidateListStart = 7,
CandidateListEnd = 8,
CandidateWord = 9,
CandidateIndex = 10,
CandidateDone = 11,
CandidateCancel = 12,
ChangeDevice = 14,
ChangeInputMethodState = 18,
KeyboardOpen = 256,
KeyboardKeycodeDoen = 257,
KeyboardKeycodeUp = 258,
KeyboardKeycodeRepeat = 259,
KeyboardConnection = 260,
KeyboardDisconnection = 261,
KeyboardAbort = 262,
};
enum class OrbisImeKeyboardType : u32 {
NONE = 0,
DANISH = 1,
GERMAN = 2,
GERMAN_SW = 3,
ENGLISH_US = 4,
ENGLISH_GB = 5,
SPANISH = 6,
SPANISH_LA = 7,
FINNISH = 8,
FRENCH = 9,
FRENCH_BR = 10,
FRENCH_CA = 11,
FRENCH_SW = 12,
ITALIAN = 13,
DUTCH = 14,
NORWEGIAN = 15,
POLISH = 16,
PORTUGUESE_BR = 17,
PORTUGUESE_PT = 18,
RUSSIAN = 19,
SWEDISH = 20,
TURKISH = 21,
JAPANESE_ROMAN = 22,
JAPANESE_KANA = 23,
KOREAN = 24,
SM_CHINESE = 25,
TR_CHINESE_ZY = 26,
TR_CHINESE_PY_HK = 27,
TR_CHINESE_PY_TW = 28,
TR_CHINESE_CG = 29,
ARABIC_AR = 30,
THAI = 31,
CZECH = 32,
GREEK = 33,
INDONESIAN = 34,
VIETNAMESE = 35,
ROMANIAN = 36,
HUNGARIAN = 37,
};
enum class OrbisImeDeviceType : u32 {
None = 0,
Controller = 1,
ExtKeyboard = 2,
RemoteOsk = 3,
};
struct OrbisImeRect {
f32 x;
f32 y;
u32 width;
u32 height;
};
struct OrbisImeTextAreaProperty {
u32 mode; // OrbisImeTextAreaMode
u32 index;
s32 length;
};
struct OrbisImeEditText {
char16_t* str;
u32 caret_index;
u32 area_num;
OrbisImeTextAreaProperty text_area[4];
};
struct OrbisImeKeycode {
u16 keycode;
char16_t character;
u32 status;
OrbisImeKeyboardType type;
s32 user_id;
u32 resource_id;
Libraries::Rtc::OrbisRtcTick timestamp;
};
struct OrbisImeKeyboardResourceIdArray {
s32 userId;
u32 resourceId[5];
};
enum class OrbisImeCaretMovementDirection : u32 {
Still = 0,
Left = 1,
Right = 2,
Up = 3,
Down = 4,
Home = 5,
End = 6,
PageUp = 7,
PageDown = 8,
Top = 9,
Bottom = 10,
};
union OrbisImeEventParam {
OrbisImeRect rect;
OrbisImeEditText text;
OrbisImeCaretMovementDirection caret_move;
OrbisImeKeycode keycode;
OrbisImeKeyboardResourceIdArray resource_id_array;
char16_t* candidate_word;
s32 candidate_index;
OrbisImeDeviceType device_type;
u32 input_method_state;
s8 reserved[64];
};
struct OrbisImeEvent {
OrbisImeEventId id;
OrbisImeEventParam param;
};
using OrbisImeTextFilter = PS4_SYSV_ABI int (*)(char16_t* outText, u32* outTextLength,
const char16_t* srcText, u32 srcTextLength);
using OrbisImeEventHandler = PS4_SYSV_ABI void (*)(void* arg, const OrbisImeEvent* e);

View file

@ -1,253 +1,253 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "ime_ui.h"
#include "imgui/imgui_std.h"
namespace Libraries::Ime {
using namespace ImGui;
static constexpr ImVec2 BUTTON_SIZE{100.0f, 30.0f};
ImeState::ImeState(const OrbisImeParam* param) {
if (!param) {
return;
}
work_buffer = param->work;
text_buffer = param->inputTextBuffer;
std::size_t text_len = std::char_traits<char16_t>::length(text_buffer);
if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(),
ORBIS_IME_MAX_TEXT_LENGTH * 4)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert text to utf8 encoding");
}
}
ImeState::ImeState(ImeState&& other) noexcept
: work_buffer(other.work_buffer), text_buffer(other.text_buffer),
current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) {
other.text_buffer = nullptr;
}
ImeState& ImeState::operator=(ImeState&& other) noexcept {
if (this != &other) {
work_buffer = other.work_buffer;
text_buffer = other.text_buffer;
current_text = std::move(other.current_text);
event_queue = std::move(other.event_queue);
other.text_buffer = nullptr;
}
return *this;
}
void ImeState::SendEvent(OrbisImeEvent* event) {
std::unique_lock lock{queue_mutex};
event_queue.push(*event);
}
void ImeState::SendEnterEvent() {
OrbisImeEvent enterEvent{};
enterEvent.id = OrbisImeEventId::PressEnter;
SendEvent(&enterEvent);
}
void ImeState::SendCloseEvent() {
OrbisImeEvent closeEvent{};
closeEvent.id = OrbisImeEventId::PressClose;
closeEvent.param.text.str = reinterpret_cast<char16_t*>(work_buffer);
SendEvent(&closeEvent);
}
void ImeState::SetText(const char16_t* text, u32 length) {}
void ImeState::SetCaret(u32 position) {}
bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len,
char* utf8_text, std::size_t utf8_text_len) {
std::fill(utf8_text, utf8_text + utf8_text_len, '\0');
const ImWchar* orbis_text_ptr = reinterpret_cast<const ImWchar*>(orbis_text);
ImTextStrToUtf8(utf8_text, utf8_text_len, orbis_text_ptr, orbis_text_ptr + orbis_text_len);
return true;
}
bool ImeState::ConvertUTF8ToOrbis(const char* utf8_text, std::size_t utf8_text_len,
char16_t* orbis_text, std::size_t orbis_text_len) {
std::fill(orbis_text, orbis_text + orbis_text_len, u'\0');
ImTextStrFromUtf8(reinterpret_cast<ImWchar*>(orbis_text), orbis_text_len, utf8_text, nullptr);
return true;
}
ImeUi::ImeUi(ImeState* state, const OrbisImeParam* param) : state(state), ime_param(param) {
if (param) {
AddLayer(this);
}
}
ImeUi::~ImeUi() {
std::scoped_lock lock(draw_mutex);
Free();
}
ImeUi& ImeUi::operator=(ImeUi&& other) {
std::scoped_lock lock(draw_mutex, other.draw_mutex);
Free();
state = other.state;
ime_param = other.ime_param;
first_render = other.first_render;
other.state = nullptr;
other.ime_param = nullptr;
AddLayer(this);
return *this;
}
void ImeUi::Draw() {
std::unique_lock lock{draw_mutex};
if (!state) {
return;
}
const auto& ctx = *GetCurrentContext();
const auto& io = ctx.IO;
// TODO: Figure out how to properly translate the positions -
// for example, if a game wants to center the IME panel,
// we have to translate the panel position in a way that it
// still becomes centered, as the game normally calculates
// the position assuming a it's running on a 1920x1080 screen,
// whereas we are running on a 1280x720 window size (by default).
//
// e.g. Panel position calculation from a game:
// param.posx = (1920 / 2) - (panelWidth / 2);
// param.posy = (1080 / 2) - (panelHeight / 2);
const auto size = GetIO().DisplaySize;
f32 pos_x = (ime_param->posx / 1920.0f * (float)size.x);
f32 pos_y = (ime_param->posy / 1080.0f * (float)size.y);
ImVec2 window_pos = {pos_x, pos_y};
ImVec2 window_size = {500.0f, 100.0f};
// SetNextWindowPos(window_pos);
SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, io.DisplaySize.y * 0.5f),
ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f));
SetNextWindowSize(window_size);
SetNextWindowCollapsed(false);
if (first_render || !io.NavActive) {
SetNextWindowFocus();
}
if (Begin("IME##Ime", nullptr,
ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize |
ImGuiWindowFlags_NoSavedSettings)) {
DrawPrettyBackground();
DrawInputText();
SetCursorPosY(GetCursorPosY() + 10.0f);
const char* button_text;
button_text = "Done##ImeDone";
float button_spacing = 10.0f;
float total_button_width = BUTTON_SIZE.x * 2 + button_spacing;
float button_start_pos = (window_size.x - total_button_width) / 2.0f;
SetCursorPosX(button_start_pos);
if (Button(button_text, BUTTON_SIZE) || (IsKeyPressed(ImGuiKey_Enter))) {
state->SendEnterEvent();
}
SameLine(0.0f, button_spacing);
if (Button("Close##ImeClose", BUTTON_SIZE)) {
state->SendCloseEvent();
}
}
End();
first_render = false;
}
void ImeUi::DrawInputText() {
ImVec2 input_size = {GetWindowWidth() - 40.0f, 0.0f};
SetCursorPosX(20.0f);
if (first_render) {
SetKeyboardFocusHere();
}
if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength,
input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) {
}
}
int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
ImeUi* ui = static_cast<ImeUi*>(data->UserData);
ASSERT(ui);
static std::string lastText;
std::string currentText(data->Buf, data->BufTextLen);
if (currentText != lastText) {
OrbisImeEditText eventParam{};
eventParam.str = reinterpret_cast<char16_t*>(ui->ime_param->work);
eventParam.caret_index = data->CursorPos;
eventParam.area_num = 1;
eventParam.text_area[0].mode = 1; // Edit mode
eventParam.text_area[0].index = data->CursorPos;
eventParam.text_area[0].length = data->BufTextLen;
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str,
ui->ime_param->maxTextLength)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0;
}
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen,
ui->ime_param->inputTextBuffer,
ui->ime_param->maxTextLength)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0;
}
OrbisImeEvent event{};
event.id = OrbisImeEventId::UpdateText;
event.param.text = eventParam;
lastText = currentText;
ui->state->SendEvent(&event);
}
static int lastCaretPos = -1;
if (lastCaretPos == -1) {
lastCaretPos = data->CursorPos;
} else if (data->CursorPos != lastCaretPos) {
OrbisImeCaretMovementDirection caretDirection = OrbisImeCaretMovementDirection::Still;
if (data->CursorPos < lastCaretPos) {
caretDirection = OrbisImeCaretMovementDirection::Left;
} else if (data->CursorPos > lastCaretPos) {
caretDirection = OrbisImeCaretMovementDirection::Right;
}
OrbisImeEvent event{};
event.id = OrbisImeEventId::UpdateCaret;
event.param.caret_move = caretDirection;
lastCaretPos = data->CursorPos;
ui->state->SendEvent(&event);
}
return 0;
}
void ImeUi::Free() {
RemoveLayer(this);
}
}; // namespace Libraries::Ime
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "ime_ui.h"
#include "imgui/imgui_std.h"
namespace Libraries::Ime {
using namespace ImGui;
static constexpr ImVec2 BUTTON_SIZE{100.0f, 30.0f};
ImeState::ImeState(const OrbisImeParam* param) {
if (!param) {
return;
}
work_buffer = param->work;
text_buffer = param->inputTextBuffer;
std::size_t text_len = std::char_traits<char16_t>::length(text_buffer);
if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(),
ORBIS_IME_MAX_TEXT_LENGTH * 4)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert text to utf8 encoding");
}
}
ImeState::ImeState(ImeState&& other) noexcept
: work_buffer(other.work_buffer), text_buffer(other.text_buffer),
current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) {
other.text_buffer = nullptr;
}
ImeState& ImeState::operator=(ImeState&& other) noexcept {
if (this != &other) {
work_buffer = other.work_buffer;
text_buffer = other.text_buffer;
current_text = std::move(other.current_text);
event_queue = std::move(other.event_queue);
other.text_buffer = nullptr;
}
return *this;
}
void ImeState::SendEvent(OrbisImeEvent* event) {
std::unique_lock lock{queue_mutex};
event_queue.push(*event);
}
void ImeState::SendEnterEvent() {
OrbisImeEvent enterEvent{};
enterEvent.id = OrbisImeEventId::PressEnter;
SendEvent(&enterEvent);
}
void ImeState::SendCloseEvent() {
OrbisImeEvent closeEvent{};
closeEvent.id = OrbisImeEventId::PressClose;
closeEvent.param.text.str = reinterpret_cast<char16_t*>(work_buffer);
SendEvent(&closeEvent);
}
void ImeState::SetText(const char16_t* text, u32 length) {}
void ImeState::SetCaret(u32 position) {}
bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len,
char* utf8_text, std::size_t utf8_text_len) {
std::fill(utf8_text, utf8_text + utf8_text_len, '\0');
const ImWchar* orbis_text_ptr = reinterpret_cast<const ImWchar*>(orbis_text);
ImTextStrToUtf8(utf8_text, utf8_text_len, orbis_text_ptr, orbis_text_ptr + orbis_text_len);
return true;
}
bool ImeState::ConvertUTF8ToOrbis(const char* utf8_text, std::size_t utf8_text_len,
char16_t* orbis_text, std::size_t orbis_text_len) {
std::fill(orbis_text, orbis_text + orbis_text_len, u'\0');
ImTextStrFromUtf8(reinterpret_cast<ImWchar*>(orbis_text), orbis_text_len, utf8_text, nullptr);
return true;
}
ImeUi::ImeUi(ImeState* state, const OrbisImeParam* param) : state(state), ime_param(param) {
if (param) {
AddLayer(this);
}
}
ImeUi::~ImeUi() {
std::scoped_lock lock(draw_mutex);
Free();
}
ImeUi& ImeUi::operator=(ImeUi&& other) {
std::scoped_lock lock(draw_mutex, other.draw_mutex);
Free();
state = other.state;
ime_param = other.ime_param;
first_render = other.first_render;
other.state = nullptr;
other.ime_param = nullptr;
AddLayer(this);
return *this;
}
void ImeUi::Draw() {
std::unique_lock lock{draw_mutex};
if (!state) {
return;
}
const auto& ctx = *GetCurrentContext();
const auto& io = ctx.IO;
// TODO: Figure out how to properly translate the positions -
// for example, if a game wants to center the IME panel,
// we have to translate the panel position in a way that it
// still becomes centered, as the game normally calculates
// the position assuming a it's running on a 1920x1080 screen,
// whereas we are running on a 1280x720 window size (by default).
//
// e.g. Panel position calculation from a game:
// param.posx = (1920 / 2) - (panelWidth / 2);
// param.posy = (1080 / 2) - (panelHeight / 2);
const auto size = GetIO().DisplaySize;
f32 pos_x = (ime_param->posx / 1920.0f * (float)size.x);
f32 pos_y = (ime_param->posy / 1080.0f * (float)size.y);
ImVec2 window_pos = {pos_x, pos_y};
ImVec2 window_size = {500.0f, 100.0f};
// SetNextWindowPos(window_pos);
SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, io.DisplaySize.y * 0.5f),
ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f));
SetNextWindowSize(window_size);
SetNextWindowCollapsed(false);
if (first_render || !io.NavActive) {
SetNextWindowFocus();
}
if (Begin("IME##Ime", nullptr,
ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize |
ImGuiWindowFlags_NoSavedSettings)) {
DrawPrettyBackground();
DrawInputText();
SetCursorPosY(GetCursorPosY() + 10.0f);
const char* button_text;
button_text = "Done##ImeDone";
float button_spacing = 10.0f;
float total_button_width = BUTTON_SIZE.x * 2 + button_spacing;
float button_start_pos = (window_size.x - total_button_width) / 2.0f;
SetCursorPosX(button_start_pos);
if (Button(button_text, BUTTON_SIZE) || (IsKeyPressed(ImGuiKey_Enter))) {
state->SendEnterEvent();
}
SameLine(0.0f, button_spacing);
if (Button("Close##ImeClose", BUTTON_SIZE)) {
state->SendCloseEvent();
}
}
End();
first_render = false;
}
void ImeUi::DrawInputText() {
ImVec2 input_size = {GetWindowWidth() - 40.0f, 0.0f};
SetCursorPosX(20.0f);
if (first_render) {
SetKeyboardFocusHere();
}
if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength,
input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) {
}
}
int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
ImeUi* ui = static_cast<ImeUi*>(data->UserData);
ASSERT(ui);
static std::string lastText;
std::string currentText(data->Buf, data->BufTextLen);
if (currentText != lastText) {
OrbisImeEditText eventParam{};
eventParam.str = reinterpret_cast<char16_t*>(ui->ime_param->work);
eventParam.caret_index = data->CursorPos;
eventParam.area_num = 1;
eventParam.text_area[0].mode = 1; // Edit mode
eventParam.text_area[0].index = data->CursorPos;
eventParam.text_area[0].length = data->BufTextLen;
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str,
ui->ime_param->maxTextLength)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0;
}
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen,
ui->ime_param->inputTextBuffer,
ui->ime_param->maxTextLength)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0;
}
OrbisImeEvent event{};
event.id = OrbisImeEventId::UpdateText;
event.param.text = eventParam;
lastText = currentText;
ui->state->SendEvent(&event);
}
static int lastCaretPos = -1;
if (lastCaretPos == -1) {
lastCaretPos = data->CursorPos;
} else if (data->CursorPos != lastCaretPos) {
OrbisImeCaretMovementDirection caretDirection = OrbisImeCaretMovementDirection::Still;
if (data->CursorPos < lastCaretPos) {
caretDirection = OrbisImeCaretMovementDirection::Left;
} else if (data->CursorPos > lastCaretPos) {
caretDirection = OrbisImeCaretMovementDirection::Right;
}
OrbisImeEvent event{};
event.id = OrbisImeEventId::UpdateCaret;
event.param.caret_move = caretDirection;
lastCaretPos = data->CursorPos;
ui->state->SendEvent(&event);
}
return 0;
}
void ImeUi::Free() {
RemoveLayer(this);
}
}; // namespace Libraries::Ime

View file

@ -1,76 +1,76 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <imgui.h>
#include <queue>
#include "imgui/imgui_layer.h"
#include "common/cstring.h"
#include "common/types.h"
#include "ime.h"
namespace Libraries::Ime {
class ImeHandler;
class ImeUi;
class ImeState {
friend class ImeHandler;
friend class ImeUi;
void* work_buffer{};
char16_t* text_buffer{};
// A character can hold up to 4 bytes in UTF-8
Common::CString<ORBIS_IME_MAX_TEXT_LENGTH * 4> current_text;
std::queue<OrbisImeEvent> event_queue;
std::mutex queue_mutex;
public:
ImeState(const OrbisImeParam* param = nullptr);
ImeState(ImeState&& other) noexcept;
ImeState& operator=(ImeState&& other) noexcept;
void SendEvent(OrbisImeEvent* event);
void SendEnterEvent();
void SendCloseEvent();
void SetText(const char16_t* text, u32 length);
void SetCaret(u32 position);
private:
bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text,
std::size_t native_text_len);
bool ConvertUTF8ToOrbis(const char* native_text, std::size_t utf8_text_len,
char16_t* orbis_text, std::size_t orbis_text_len);
};
class ImeUi : public ImGui::Layer {
ImeState* state{};
const OrbisImeParam* ime_param{};
bool first_render = true;
std::mutex draw_mutex;
public:
explicit ImeUi(ImeState* state = nullptr, const OrbisImeParam* param = nullptr);
~ImeUi() override;
ImeUi(const ImeUi& other) = delete;
ImeUi& operator=(ImeUi&& other);
void Draw() override;
private:
void Free();
void DrawInputText();
static int InputTextCallback(ImGuiInputTextCallbackData* data);
};
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <imgui.h>
#include <queue>
#include "imgui/imgui_layer.h"
#include "common/cstring.h"
#include "common/types.h"
#include "ime.h"
namespace Libraries::Ime {
class ImeHandler;
class ImeUi;
class ImeState {
friend class ImeHandler;
friend class ImeUi;
void* work_buffer{};
char16_t* text_buffer{};
// A character can hold up to 4 bytes in UTF-8
Common::CString<ORBIS_IME_MAX_TEXT_LENGTH * 4> current_text;
std::queue<OrbisImeEvent> event_queue;
std::mutex queue_mutex;
public:
ImeState(const OrbisImeParam* param = nullptr);
ImeState(ImeState&& other) noexcept;
ImeState& operator=(ImeState&& other) noexcept;
void SendEvent(OrbisImeEvent* event);
void SendEnterEvent();
void SendCloseEvent();
void SetText(const char16_t* text, u32 length);
void SetCaret(u32 position);
private:
bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text,
std::size_t native_text_len);
bool ConvertUTF8ToOrbis(const char* native_text, std::size_t utf8_text_len,
char16_t* orbis_text, std::size_t orbis_text_len);
};
class ImeUi : public ImGui::Layer {
ImeState* state{};
const OrbisImeParam* ime_param{};
bool first_render = true;
std::mutex draw_mutex;
public:
explicit ImeUi(ImeState* state = nullptr, const OrbisImeParam* param = nullptr);
~ImeUi() override;
ImeUi(const ImeUi& other) = delete;
ImeUi& operator=(ImeUi&& other);
void Draw() override;
private:
void Free();
void DrawInputText();
static int InputTextCallback(ImGuiInputTextCallbackData* data);
};
}; // namespace Libraries::Ime

View file

@ -1,52 +1,52 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "mutex.h"
#include "common/assert.h"
namespace Libraries::Kernel {
TimedMutex::TimedMutex() {
#ifdef _WIN64
mtx = CreateMutex(nullptr, false, nullptr);
ASSERT(mtx);
#endif
}
TimedMutex::~TimedMutex() {
#ifdef _WIN64
CloseHandle(mtx);
#endif
}
void TimedMutex::lock() {
#ifdef _WIN64
for (;;) {
u64 res = WaitForSingleObjectEx(mtx, INFINITE, true);
if (res == WAIT_OBJECT_0) {
return;
}
}
#else
mtx.lock();
#endif
}
bool TimedMutex::try_lock() {
#ifdef _WIN64
return WaitForSingleObjectEx(mtx, 0, true) == WAIT_OBJECT_0;
#else
return mtx.try_lock();
#endif
}
void TimedMutex::unlock() {
#ifdef _WIN64
ReleaseMutex(mtx);
#else
mtx.unlock();
#endif
}
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "mutex.h"
#include "common/assert.h"
namespace Libraries::Kernel {
TimedMutex::TimedMutex() {
#ifdef _WIN64
mtx = CreateMutex(nullptr, false, nullptr);
ASSERT(mtx);
#endif
}
TimedMutex::~TimedMutex() {
#ifdef _WIN64
CloseHandle(mtx);
#endif
}
void TimedMutex::lock() {
#ifdef _WIN64
for (;;) {
u64 res = WaitForSingleObjectEx(mtx, INFINITE, true);
if (res == WAIT_OBJECT_0) {
return;
}
}
#else
mtx.lock();
#endif
}
bool TimedMutex::try_lock() {
#ifdef _WIN64
return WaitForSingleObjectEx(mtx, 0, true) == WAIT_OBJECT_0;
#else
return mtx.try_lock();
#endif
}
void TimedMutex::unlock() {
#ifdef _WIN64
ReleaseMutex(mtx);
#else
mtx.unlock();
#endif
}
} // namespace Libraries::Kernel

View file

@ -1,80 +1,80 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
#include "common/types.h"
#ifdef _WIN64
#include <windows.h>
#else
#include <mutex>
#endif
namespace Libraries::Kernel {
class TimedMutex {
public:
TimedMutex();
~TimedMutex();
void lock();
bool try_lock();
void unlock();
template <class Rep, class Period>
bool try_lock_for(const std::chrono::duration<Rep, Period>& rel_time) {
#ifdef _WIN64
constexpr auto zero = std::chrono::duration<Rep, Period>::zero();
const auto now = std::chrono::steady_clock::now();
std::chrono::steady_clock::time_point abs_time = now;
if (rel_time > zero) {
constexpr auto max = (std::chrono::steady_clock::time_point::max)();
if (abs_time < max - rel_time) {
abs_time += rel_time;
} else {
abs_time = max;
}
}
return try_lock_until(abs_time);
#else
return mtx.try_lock_for(rel_time);
#endif
}
template <class Clock, class Duration>
bool try_lock_until(const std::chrono::time_point<Clock, Duration>& abs_time) {
#ifdef _WIN64
for (;;) {
const auto now = Clock::now();
if (abs_time <= now) {
return false;
}
const auto rel_ms = std::chrono::ceil<std::chrono::milliseconds>(abs_time - now);
u64 res = WaitForSingleObjectEx(mtx, static_cast<u64>(rel_ms.count()), true);
if (res == WAIT_OBJECT_0) {
return true;
} else if (res == WAIT_TIMEOUT) {
return false;
}
}
#else
return mtx.try_lock_until(abs_time);
#endif
}
private:
#ifdef _WIN64
HANDLE mtx;
#else
std::timed_mutex mtx;
#endif
};
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
#include "common/types.h"
#ifdef _WIN64
#include <windows.h>
#else
#include <mutex>
#endif
namespace Libraries::Kernel {
class TimedMutex {
public:
TimedMutex();
~TimedMutex();
void lock();
bool try_lock();
void unlock();
template <class Rep, class Period>
bool try_lock_for(const std::chrono::duration<Rep, Period>& rel_time) {
#ifdef _WIN64
constexpr auto zero = std::chrono::duration<Rep, Period>::zero();
const auto now = std::chrono::steady_clock::now();
std::chrono::steady_clock::time_point abs_time = now;
if (rel_time > zero) {
constexpr auto max = (std::chrono::steady_clock::time_point::max)();
if (abs_time < max - rel_time) {
abs_time += rel_time;
} else {
abs_time = max;
}
}
return try_lock_until(abs_time);
#else
return mtx.try_lock_for(rel_time);
#endif
}
template <class Clock, class Duration>
bool try_lock_until(const std::chrono::time_point<Clock, Duration>& abs_time) {
#ifdef _WIN64
for (;;) {
const auto now = Clock::now();
if (abs_time <= now) {
return false;
}
const auto rel_ms = std::chrono::ceil<std::chrono::milliseconds>(abs_time - now);
u64 res = WaitForSingleObjectEx(mtx, static_cast<u64>(rel_ms.count()), true);
if (res == WAIT_OBJECT_0) {
return true;
} else if (res == WAIT_TIMEOUT) {
return false;
}
}
#else
return mtx.try_lock_until(abs_time);
#endif
}
private:
#ifdef _WIN64
HANDLE mtx;
#else
std::timed_mutex mtx;
#endif
};
} // namespace Libraries::Kernel

View file

@ -1,167 +1,167 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <chrono>
#include "common/assert.h"
#include "common/types.h"
#ifdef _WIN64
#include <windows.h>
#elif defined(__APPLE__)
#include <dispatch/dispatch.h>
#else
#include <semaphore>
#endif
namespace Libraries::Kernel {
template <s64 max>
class Semaphore {
public:
Semaphore(s32 initialCount)
#if !defined(_WIN64) && !defined(__APPLE__)
: sem{initialCount}
#endif
{
#ifdef _WIN64
sem = CreateSemaphore(nullptr, initialCount, max, nullptr);
ASSERT(sem);
#elif defined(__APPLE__)
sem = dispatch_semaphore_create(initialCount);
ASSERT(sem);
#endif
}
~Semaphore() {
#ifdef _WIN64
CloseHandle(sem);
#elif defined(__APPLE__)
dispatch_release(sem);
#endif
}
void release() {
#ifdef _WIN64
ReleaseSemaphore(sem, 1, nullptr);
#elif defined(__APPLE__)
dispatch_semaphore_signal(sem);
#else
sem.release();
#endif
}
void acquire() {
#ifdef _WIN64
for (;;) {
u64 res = WaitForSingleObjectEx(sem, INFINITE, true);
if (res == WAIT_OBJECT_0) {
return;
}
}
#elif defined(__APPLE__)
for (;;) {
const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
if (res == 0) {
return;
}
}
#else
sem.acquire();
#endif
}
bool try_acquire() {
#ifdef _WIN64
return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0;
#elif defined(__APPLE__)
return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0;
#else
return sem.try_acquire();
#endif
}
template <class Rep, class Period>
bool try_acquire_for(const std::chrono::duration<Rep, Period>& rel_time) {
#ifdef _WIN64
const auto start_time = std::chrono::high_resolution_clock::now();
auto rel_time_ms = std::chrono::ceil<std::chrono::milliseconds>(rel_time);
while (rel_time_ms.count() > 0) {
u64 timeout_ms = static_cast<u64>(rel_time_ms.count());
u64 res = WaitForSingleObjectEx(sem, timeout_ms, true);
if (res == WAIT_OBJECT_0) {
return true;
} else if (res == WAIT_IO_COMPLETION) {
auto elapsed_time = std::chrono::high_resolution_clock::now() - start_time;
rel_time_ms -= std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time);
} else {
return false;
}
}
return false;
#elif defined(__APPLE__)
const auto rel_time_ns = std::chrono::ceil<std::chrono::nanoseconds>(rel_time).count();
const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns);
return dispatch_semaphore_wait(sem, timeout) == 0;
#else
return sem.try_acquire_for(rel_time);
#endif
}
template <class Clock, class Duration>
bool try_acquire_until(const std::chrono::time_point<Clock, Duration>& abs_time) {
#ifdef _WIN64
const auto start_time = Clock::now();
if (start_time >= abs_time) {
return false;
}
auto rel_time = std::chrono::ceil<std::chrono::milliseconds>(abs_time - start_time);
while (rel_time.count() > 0) {
u64 timeout_ms = static_cast<u64>(rel_time.count());
u64 res = WaitForSingleObjectEx(sem, timeout_ms, true);
if (res == WAIT_OBJECT_0) {
return true;
} else if (res == WAIT_IO_COMPLETION) {
auto elapsed_time = Clock::now() - start_time;
rel_time -= std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time);
} else {
return false;
}
}
return false;
#elif defined(__APPLE__)
auto abs_s = std::chrono::time_point_cast<std::chrono::seconds>(abs_time);
auto abs_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_time) -
std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_s);
const timespec abs_timespec = {
.tv_sec = abs_s.time_since_epoch().count(),
.tv_nsec = abs_ns.count(),
};
const auto timeout = dispatch_walltime(&abs_timespec, 0);
return dispatch_semaphore_wait(sem, timeout) == 0;
#else
return sem.try_acquire_until(abs_time);
#endif
}
private:
#ifdef _WIN64
HANDLE sem;
#elif defined(__APPLE__)
dispatch_semaphore_t sem;
#else
std::counting_semaphore<max> sem;
#endif
};
using BinarySemaphore = Semaphore<1>;
using CountingSemaphore = Semaphore<0x7FFFFFFF /*ORBIS_KERNEL_SEM_VALUE_MAX*/>;
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <chrono>
#include "common/assert.h"
#include "common/types.h"
#ifdef _WIN64
#include <windows.h>
#elif defined(__APPLE__)
#include <dispatch/dispatch.h>
#else
#include <semaphore>
#endif
namespace Libraries::Kernel {
template <s64 max>
class Semaphore {
public:
Semaphore(s32 initialCount)
#if !defined(_WIN64) && !defined(__APPLE__)
: sem{initialCount}
#endif
{
#ifdef _WIN64
sem = CreateSemaphore(nullptr, initialCount, max, nullptr);
ASSERT(sem);
#elif defined(__APPLE__)
sem = dispatch_semaphore_create(initialCount);
ASSERT(sem);
#endif
}
~Semaphore() {
#ifdef _WIN64
CloseHandle(sem);
#elif defined(__APPLE__)
dispatch_release(sem);
#endif
}
void release() {
#ifdef _WIN64
ReleaseSemaphore(sem, 1, nullptr);
#elif defined(__APPLE__)
dispatch_semaphore_signal(sem);
#else
sem.release();
#endif
}
void acquire() {
#ifdef _WIN64
for (;;) {
u64 res = WaitForSingleObjectEx(sem, INFINITE, true);
if (res == WAIT_OBJECT_0) {
return;
}
}
#elif defined(__APPLE__)
for (;;) {
const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
if (res == 0) {
return;
}
}
#else
sem.acquire();
#endif
}
bool try_acquire() {
#ifdef _WIN64
return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0;
#elif defined(__APPLE__)
return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0;
#else
return sem.try_acquire();
#endif
}
template <class Rep, class Period>
bool try_acquire_for(const std::chrono::duration<Rep, Period>& rel_time) {
#ifdef _WIN64
const auto start_time = std::chrono::high_resolution_clock::now();
auto rel_time_ms = std::chrono::ceil<std::chrono::milliseconds>(rel_time);
while (rel_time_ms.count() > 0) {
u64 timeout_ms = static_cast<u64>(rel_time_ms.count());
u64 res = WaitForSingleObjectEx(sem, timeout_ms, true);
if (res == WAIT_OBJECT_0) {
return true;
} else if (res == WAIT_IO_COMPLETION) {
auto elapsed_time = std::chrono::high_resolution_clock::now() - start_time;
rel_time_ms -= std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time);
} else {
return false;
}
}
return false;
#elif defined(__APPLE__)
const auto rel_time_ns = std::chrono::ceil<std::chrono::nanoseconds>(rel_time).count();
const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns);
return dispatch_semaphore_wait(sem, timeout) == 0;
#else
return sem.try_acquire_for(rel_time);
#endif
}
template <class Clock, class Duration>
bool try_acquire_until(const std::chrono::time_point<Clock, Duration>& abs_time) {
#ifdef _WIN64
const auto start_time = Clock::now();
if (start_time >= abs_time) {
return false;
}
auto rel_time = std::chrono::ceil<std::chrono::milliseconds>(abs_time - start_time);
while (rel_time.count() > 0) {
u64 timeout_ms = static_cast<u64>(rel_time.count());
u64 res = WaitForSingleObjectEx(sem, timeout_ms, true);
if (res == WAIT_OBJECT_0) {
return true;
} else if (res == WAIT_IO_COMPLETION) {
auto elapsed_time = Clock::now() - start_time;
rel_time -= std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time);
} else {
return false;
}
}
return false;
#elif defined(__APPLE__)
auto abs_s = std::chrono::time_point_cast<std::chrono::seconds>(abs_time);
auto abs_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_time) -
std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_s);
const timespec abs_timespec = {
.tv_sec = abs_s.time_since_epoch().count(),
.tv_nsec = abs_ns.count(),
};
const auto timeout = dispatch_walltime(&abs_timespec, 0);
return dispatch_semaphore_wait(sem, timeout) == 0;
#else
return sem.try_acquire_until(abs_time);
#endif
}
private:
#ifdef _WIN64
HANDLE sem;
#elif defined(__APPLE__)
dispatch_semaphore_t sem;
#else
std::counting_semaphore<max> sem;
#endif
};
using BinarySemaphore = Semaphore<1>;
using CountingSemaphore = Semaphore<0x7FFFFFFF /*ORBIS_KERNEL_SEM_VALUE_MAX*/>;
} // namespace Libraries::Kernel

View file

@ -1,199 +1,199 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/libraries/libs.h"
#include "core/libraries/videodec/videodec2.h"
#include "core/libraries/videodec/videodec2_impl.h"
#include "core/libraries/videodec/videodec_error.h"
namespace Libraries::Vdec2 {
static constexpr u64 kMinimumMemorySize = 32_MB; ///> Fake minimum memory size for querying
s32 PS4_SYSV_ABI
sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo) {
LOG_INFO(Lib_Vdec2, "called");
if (!computeMemInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (computeMemInfo->thisSize != sizeof(OrbisVideodec2ComputeMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
computeMemInfo->cpuGpuMemory = nullptr;
computeMemInfo->cpuGpuMemorySize = kMinimumMemorySize;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI
sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo,
const OrbisVideodec2ComputeMemoryInfo* computeMemInfo,
OrbisVideodec2ComputeQueue* computeQueue) {
LOG_INFO(Lib_Vdec2, "called");
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue) {
LOG_INFO(Lib_Vdec2, "called");
return ORBIS_OK;
}
s32 PS4_SYSV_ABI
sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
OrbisVideodec2DecoderMemoryInfo* decoderMemInfo) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoderCfgInfo || !decoderMemInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) ||
decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
decoderMemInfo->cpuMemory = nullptr;
decoderMemInfo->gpuMemory = nullptr;
decoderMemInfo->cpuGpuMemory = nullptr;
decoderMemInfo->cpuGpuMemorySize = kMinimumMemorySize;
decoderMemInfo->cpuMemorySize = kMinimumMemorySize;
decoderMemInfo->gpuMemorySize = kMinimumMemorySize;
decoderMemInfo->maxFrameBufferSize = kMinimumMemorySize;
decoderMemInfo->frameBufferAlignment = 0x100;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo,
OrbisVideodec2Decoder* decoder) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoderCfgInfo || !decoderMemInfo || !decoder) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) ||
decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
*decoder = new VdecDecoder(*decoderCfgInfo, *decoderMemInfo);
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
}
delete decoder;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder,
const OrbisVideodec2InputData* inputData,
OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo) {
LOG_TRACE(Lib_Vdec2, "called");
if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
}
if (!inputData || !frameBuffer || !outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (inputData->thisSize != sizeof(OrbisVideodec2InputData) ||
frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
return decoder->Decode(*inputData, *frameBuffer, *outputInfo);
}
s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder,
OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
}
if (!frameBuffer || !outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer) ||
outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
return decoder->Flush(*frameBuffer, *outputInfo);
}
s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
}
return decoder->Reset();
}
s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo,
void* p1stPictureInfoOut, void* p2ndPictureInfoOut) {
LOG_TRACE(Lib_Vdec2, "called");
if (!outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
if (outputInfo->pictureCount == 0 || gPictureInfos.empty()) {
return ORBIS_OK;
}
if (p1stPictureInfoOut) {
OrbisVideodec2AvcPictureInfo* picInfo =
static_cast<OrbisVideodec2AvcPictureInfo*>(p1stPictureInfoOut);
if (picInfo->thisSize != sizeof(OrbisVideodec2AvcPictureInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
*picInfo = gPictureInfos.back();
}
if (outputInfo->pictureCount > 1) {
UNREACHABLE();
}
return ORBIS_OK;
}
void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("RnDibcGCPKw", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2QueryComputeMemoryInfo);
LIB_FUNCTION("eD+X2SmxUt4", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2AllocateComputeQueue);
LIB_FUNCTION("UvtA3FAiF4Y", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2ReleaseComputeQueue);
LIB_FUNCTION("qqMCwlULR+E", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2QueryDecoderMemoryInfo);
LIB_FUNCTION("CNNRoRYd8XI", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2CreateDecoder);
LIB_FUNCTION("jwImxXRGSKA", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2DeleteDecoder);
LIB_FUNCTION("852F5+q6+iM", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Decode);
LIB_FUNCTION("l1hXwscLuCY", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Flush);
LIB_FUNCTION("wJXikG6QFN8", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Reset);
LIB_FUNCTION("NtXRa3dRzU0", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2GetPictureInfo);
}
} // namespace Libraries::Vdec2
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/libraries/libs.h"
#include "core/libraries/videodec/videodec2.h"
#include "core/libraries/videodec/videodec2_impl.h"
#include "core/libraries/videodec/videodec_error.h"
namespace Libraries::Vdec2 {
static constexpr u64 kMinimumMemorySize = 32_MB; ///> Fake minimum memory size for querying
s32 PS4_SYSV_ABI
sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo) {
LOG_INFO(Lib_Vdec2, "called");
if (!computeMemInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (computeMemInfo->thisSize != sizeof(OrbisVideodec2ComputeMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
computeMemInfo->cpuGpuMemory = nullptr;
computeMemInfo->cpuGpuMemorySize = kMinimumMemorySize;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI
sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo,
const OrbisVideodec2ComputeMemoryInfo* computeMemInfo,
OrbisVideodec2ComputeQueue* computeQueue) {
LOG_INFO(Lib_Vdec2, "called");
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue) {
LOG_INFO(Lib_Vdec2, "called");
return ORBIS_OK;
}
s32 PS4_SYSV_ABI
sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
OrbisVideodec2DecoderMemoryInfo* decoderMemInfo) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoderCfgInfo || !decoderMemInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) ||
decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
decoderMemInfo->cpuMemory = nullptr;
decoderMemInfo->gpuMemory = nullptr;
decoderMemInfo->cpuGpuMemory = nullptr;
decoderMemInfo->cpuGpuMemorySize = kMinimumMemorySize;
decoderMemInfo->cpuMemorySize = kMinimumMemorySize;
decoderMemInfo->gpuMemorySize = kMinimumMemorySize;
decoderMemInfo->maxFrameBufferSize = kMinimumMemorySize;
decoderMemInfo->frameBufferAlignment = 0x100;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo,
OrbisVideodec2Decoder* decoder) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoderCfgInfo || !decoderMemInfo || !decoder) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) ||
decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
*decoder = new VdecDecoder(*decoderCfgInfo, *decoderMemInfo);
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
}
delete decoder;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder,
const OrbisVideodec2InputData* inputData,
OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo) {
LOG_TRACE(Lib_Vdec2, "called");
if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
}
if (!inputData || !frameBuffer || !outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (inputData->thisSize != sizeof(OrbisVideodec2InputData) ||
frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
return decoder->Decode(*inputData, *frameBuffer, *outputInfo);
}
s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder,
OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
}
if (!frameBuffer || !outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer) ||
outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
return decoder->Flush(*frameBuffer, *outputInfo);
}
s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder) {
LOG_INFO(Lib_Vdec2, "called");
if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
}
return decoder->Reset();
}
s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo,
void* p1stPictureInfoOut, void* p2ndPictureInfoOut) {
LOG_TRACE(Lib_Vdec2, "called");
if (!outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
}
if (outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
if (outputInfo->pictureCount == 0 || gPictureInfos.empty()) {
return ORBIS_OK;
}
if (p1stPictureInfoOut) {
OrbisVideodec2AvcPictureInfo* picInfo =
static_cast<OrbisVideodec2AvcPictureInfo*>(p1stPictureInfoOut);
if (picInfo->thisSize != sizeof(OrbisVideodec2AvcPictureInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
}
*picInfo = gPictureInfos.back();
}
if (outputInfo->pictureCount > 1) {
UNREACHABLE();
}
return ORBIS_OK;
}
void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("RnDibcGCPKw", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2QueryComputeMemoryInfo);
LIB_FUNCTION("eD+X2SmxUt4", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2AllocateComputeQueue);
LIB_FUNCTION("UvtA3FAiF4Y", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2ReleaseComputeQueue);
LIB_FUNCTION("qqMCwlULR+E", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2QueryDecoderMemoryInfo);
LIB_FUNCTION("CNNRoRYd8XI", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2CreateDecoder);
LIB_FUNCTION("jwImxXRGSKA", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2DeleteDecoder);
LIB_FUNCTION("852F5+q6+iM", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Decode);
LIB_FUNCTION("l1hXwscLuCY", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Flush);
LIB_FUNCTION("wJXikG6QFN8", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Reset);
LIB_FUNCTION("NtXRa3dRzU0", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2GetPictureInfo);
}
} // namespace Libraries::Vdec2

View file

@ -1,139 +1,139 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "videodec2_avc.h"
namespace Core::Loader {
class SymbolsResolver;
}
namespace Libraries::Vdec2 {
class VdecDecoder;
using OrbisVideodec2Decoder = VdecDecoder*;
using OrbisVideodec2ComputeQueue = void*;
struct OrbisVideodec2DecoderConfigInfo {
u64 thisSize;
u32 resourceType;
u32 codecType;
u32 profile;
u32 maxLevel;
s32 maxFrameWidth;
s32 maxFrameHeight;
s32 maxDpbFrameCount;
u32 decodePipelineDepth;
OrbisVideodec2ComputeQueue computeQueue;
u64 cpuAffinityMask;
s32 cpuThreadPriority;
bool optimizeProgressiveVideo;
bool checkMemoryType;
u8 reserved0;
u8 reserved1;
void* extraConfigInfo;
};
static_assert(sizeof(OrbisVideodec2DecoderConfigInfo) == 0x48);
struct OrbisVideodec2DecoderMemoryInfo {
u64 thisSize;
u64 cpuMemorySize;
void* cpuMemory;
u64 gpuMemorySize;
void* gpuMemory;
u64 cpuGpuMemorySize;
void* cpuGpuMemory;
u64 maxFrameBufferSize;
u32 frameBufferAlignment;
u32 reserved0;
};
static_assert(sizeof(OrbisVideodec2DecoderMemoryInfo) == 0x48);
struct OrbisVideodec2InputData {
u64 thisSize;
void* auData;
u64 auSize;
u64 ptsData;
u64 dtsData;
u64 attachedData;
};
static_assert(sizeof(OrbisVideodec2InputData) == 0x30);
struct OrbisVideodec2OutputInfo {
u64 thisSize;
bool isValid;
bool isErrorFrame;
u8 pictureCount;
u32 codecType;
u32 frameWidth;
u32 framePitch;
u32 frameHeight;
void* frameBuffer;
u64 frameBufferSize;
};
static_assert(sizeof(OrbisVideodec2OutputInfo) == 0x30);
struct OrbisVideodec2FrameBuffer {
u64 thisSize;
void* frameBuffer;
u64 frameBufferSize;
bool isAccepted;
};
static_assert(sizeof(OrbisVideodec2FrameBuffer) == 0x20);
struct OrbisVideodec2ComputeMemoryInfo {
u64 thisSize;
u64 cpuGpuMemorySize;
void* cpuGpuMemory;
};
static_assert(sizeof(OrbisVideodec2ComputeMemoryInfo) == 0x18);
struct OrbisVideodec2ComputeConfigInfo {
u64 thisSize;
u16 computePipeId;
u16 computeQueueId;
bool checkMemoryType;
u8 reserved0;
u16 reserved1;
};
static_assert(sizeof(OrbisVideodec2ComputeConfigInfo) == 0x10);
s32 PS4_SYSV_ABI
sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo);
s32 PS4_SYSV_ABI
sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo,
const OrbisVideodec2ComputeMemoryInfo* computeMemInfo,
OrbisVideodec2ComputeQueue* computeQueue);
s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue);
s32 PS4_SYSV_ABI
sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
OrbisVideodec2DecoderMemoryInfo* decoderMemInfo);
s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo,
OrbisVideodec2Decoder* decoder);
s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder);
s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder,
const OrbisVideodec2InputData* inputData,
OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo);
s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder,
OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo);
s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder);
s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo,
void* p1stPictureInfo, void* p2ndPictureInfo);
void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym);
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
#include "videodec2_avc.h"
namespace Core::Loader {
class SymbolsResolver;
}
namespace Libraries::Vdec2 {
class VdecDecoder;
using OrbisVideodec2Decoder = VdecDecoder*;
using OrbisVideodec2ComputeQueue = void*;
struct OrbisVideodec2DecoderConfigInfo {
u64 thisSize;
u32 resourceType;
u32 codecType;
u32 profile;
u32 maxLevel;
s32 maxFrameWidth;
s32 maxFrameHeight;
s32 maxDpbFrameCount;
u32 decodePipelineDepth;
OrbisVideodec2ComputeQueue computeQueue;
u64 cpuAffinityMask;
s32 cpuThreadPriority;
bool optimizeProgressiveVideo;
bool checkMemoryType;
u8 reserved0;
u8 reserved1;
void* extraConfigInfo;
};
static_assert(sizeof(OrbisVideodec2DecoderConfigInfo) == 0x48);
struct OrbisVideodec2DecoderMemoryInfo {
u64 thisSize;
u64 cpuMemorySize;
void* cpuMemory;
u64 gpuMemorySize;
void* gpuMemory;
u64 cpuGpuMemorySize;
void* cpuGpuMemory;
u64 maxFrameBufferSize;
u32 frameBufferAlignment;
u32 reserved0;
};
static_assert(sizeof(OrbisVideodec2DecoderMemoryInfo) == 0x48);
struct OrbisVideodec2InputData {
u64 thisSize;
void* auData;
u64 auSize;
u64 ptsData;
u64 dtsData;
u64 attachedData;
};
static_assert(sizeof(OrbisVideodec2InputData) == 0x30);
struct OrbisVideodec2OutputInfo {
u64 thisSize;
bool isValid;
bool isErrorFrame;
u8 pictureCount;
u32 codecType;
u32 frameWidth;
u32 framePitch;
u32 frameHeight;
void* frameBuffer;
u64 frameBufferSize;
};
static_assert(sizeof(OrbisVideodec2OutputInfo) == 0x30);
struct OrbisVideodec2FrameBuffer {
u64 thisSize;
void* frameBuffer;
u64 frameBufferSize;
bool isAccepted;
};
static_assert(sizeof(OrbisVideodec2FrameBuffer) == 0x20);
struct OrbisVideodec2ComputeMemoryInfo {
u64 thisSize;
u64 cpuGpuMemorySize;
void* cpuGpuMemory;
};
static_assert(sizeof(OrbisVideodec2ComputeMemoryInfo) == 0x18);
struct OrbisVideodec2ComputeConfigInfo {
u64 thisSize;
u16 computePipeId;
u16 computeQueueId;
bool checkMemoryType;
u8 reserved0;
u16 reserved1;
};
static_assert(sizeof(OrbisVideodec2ComputeConfigInfo) == 0x10);
s32 PS4_SYSV_ABI
sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo);
s32 PS4_SYSV_ABI
sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo,
const OrbisVideodec2ComputeMemoryInfo* computeMemInfo,
OrbisVideodec2ComputeQueue* computeQueue);
s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue);
s32 PS4_SYSV_ABI
sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
OrbisVideodec2DecoderMemoryInfo* decoderMemInfo);
s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo,
OrbisVideodec2Decoder* decoder);
s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder);
s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder,
const OrbisVideodec2InputData* inputData,
OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo);
s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder,
OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo);
s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder);
s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo,
void* p1stPictureInfo, void* p2ndPictureInfo);
void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::Vdec2

View file

@ -1,60 +1,60 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Libraries::Vdec2 {
struct OrbisVideodec2AvcPictureInfo {
u64 thisSize;
bool isValid;
u64 ptsData;
u64 dtsData;
u64 attachedData;
u8 idrPictureflag;
u8 profile_idc;
u8 level_idc;
u32 pic_width_in_mbs_minus1;
u32 pic_height_in_map_units_minus1;
u8 frame_mbs_only_flag;
u8 frame_cropping_flag;
u32 frameCropLeftOffset;
u32 frameCropRightOffset;
u32 frameCropTopOffset;
u32 frameCropBottomOffset;
u8 aspect_ratio_info_present_flag;
u8 aspect_ratio_idc;
u16 sar_width;
u16 sar_height;
u8 video_signal_type_present_flag;
u8 video_format;
u8 video_full_range_flag;
u8 colour_description_present_flag;
u8 colour_primaries;
u8 transfer_characteristics;
u8 matrix_coefficients;
u8 timing_info_present_flag;
u32 num_units_in_tick;
u32 time_scale;
u8 fixed_frame_rate_flag;
u8 bitstream_restriction_flag;
u8 max_dec_frame_buffering;
u8 pic_struct_present_flag;
u8 pic_struct;
u8 field_pic_flag;
u8 bottom_field_flag;
};
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Libraries::Vdec2 {
struct OrbisVideodec2AvcPictureInfo {
u64 thisSize;
bool isValid;
u64 ptsData;
u64 dtsData;
u64 attachedData;
u8 idrPictureflag;
u8 profile_idc;
u8 level_idc;
u32 pic_width_in_mbs_minus1;
u32 pic_height_in_map_units_minus1;
u8 frame_mbs_only_flag;
u8 frame_cropping_flag;
u32 frameCropLeftOffset;
u32 frameCropRightOffset;
u32 frameCropTopOffset;
u32 frameCropBottomOffset;
u8 aspect_ratio_info_present_flag;
u8 aspect_ratio_idc;
u16 sar_width;
u16 sar_height;
u8 video_signal_type_present_flag;
u8 video_format;
u8 video_full_range_flag;
u8 colour_description_present_flag;
u8 colour_primaries;
u8 transfer_characteristics;
u8 matrix_coefficients;
u8 timing_info_present_flag;
u32 num_units_in_tick;
u32 time_scale;
u8 fixed_frame_rate_flag;
u8 bitstream_restriction_flag;
u8 max_dec_frame_buffering;
u8 pic_struct_present_flag;
u8 pic_struct;
u8 field_pic_flag;
u8 bottom_field_flag;
};
} // namespace Libraries::Vdec2

View file

@ -1,229 +1,229 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "videodec2_impl.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/libraries/videodec/videodec_error.h"
#include "common/support/avdec.h"
namespace Libraries::Vdec2 {
std::vector<OrbisVideodec2AvcPictureInfo> gPictureInfos;
static inline void CopyNV12Data(u8* dst, const AVFrame& src) {
std::memcpy(dst, src.data[0], src.width * src.height);
std::memcpy(dst + (src.width * src.height), src.data[1], (src.width * src.height) / 2);
}
VdecDecoder::VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo,
const OrbisVideodec2DecoderMemoryInfo& memoryInfo) {
ASSERT(configInfo.codecType == 1); /* AVC */
const AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H264);
ASSERT(codec);
mCodecContext = avcodec_alloc_context3(codec);
ASSERT(mCodecContext);
mCodecContext->width = configInfo.maxFrameWidth;
mCodecContext->height = configInfo.maxFrameHeight;
avcodec_open2(mCodecContext, codec, nullptr);
}
VdecDecoder::~VdecDecoder() {
avcodec_free_context(&mCodecContext);
sws_freeContext(mSwsContext);
gPictureInfos.clear();
}
s32 VdecDecoder::Decode(const OrbisVideodec2InputData& inputData,
OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo) {
frameBuffer.isAccepted = false;
outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo);
outputInfo.isValid = false;
outputInfo.isErrorFrame = true;
outputInfo.pictureCount = 0;
if (!inputData.auData) {
return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_POINTER;
}
if (inputData.auSize == 0) {
return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_SIZE;
}
AVPacket* packet = av_packet_alloc();
if (!packet) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate packet");
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
packet->data = (u8*)inputData.auData;
packet->size = inputData.auSize;
packet->pts = inputData.ptsData;
packet->dts = inputData.dtsData;
int ret = avcodec_send_packet(mCodecContext, packet);
if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error sending packet to decoder: {}", ret);
av_packet_free(&packet);
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
AVFrame* frame = av_frame_alloc();
if (frame == nullptr) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate frame");
av_packet_free(&packet);
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
while (true) {
ret = avcodec_receive_frame(mCodecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret);
av_packet_free(&packet);
av_frame_free(&frame);
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
if (frame->format != AV_PIX_FMT_NV12) {
AVFrame* nv12_frame = ConvertNV12Frame(*frame);
ASSERT(nv12_frame);
av_frame_free(&frame);
frame = nv12_frame;
}
CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame);
frameBuffer.isAccepted = true;
outputInfo.codecType = 1; // FIXME: Hardcoded to AVC
outputInfo.frameWidth = frame->width;
outputInfo.frameHeight = frame->height;
outputInfo.framePitch = frame->linesize[0];
outputInfo.frameBufferSize = frameBuffer.frameBufferSize;
outputInfo.frameBuffer = frameBuffer.frameBuffer;
outputInfo.isValid = true;
outputInfo.isErrorFrame = false;
outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video
if (outputInfo.isValid) {
OrbisVideodec2AvcPictureInfo pictureInfo = {};
pictureInfo.thisSize = sizeof(OrbisVideodec2AvcPictureInfo);
pictureInfo.isValid = true;
pictureInfo.ptsData = inputData.ptsData;
pictureInfo.dtsData = inputData.dtsData;
pictureInfo.attachedData = inputData.attachedData;
pictureInfo.frameCropLeftOffset = frame->crop_left;
pictureInfo.frameCropRightOffset = frame->crop_right;
pictureInfo.frameCropTopOffset = frame->crop_top;
pictureInfo.frameCropBottomOffset = frame->crop_bottom;
gPictureInfos.push_back(pictureInfo);
}
}
av_packet_free(&packet);
av_frame_free(&frame);
return ORBIS_OK;
}
s32 VdecDecoder::Flush(OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo) {
frameBuffer.isAccepted = false;
outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo);
outputInfo.isValid = false;
outputInfo.isErrorFrame = true;
outputInfo.pictureCount = 0;
AVFrame* frame = av_frame_alloc();
if (!frame) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate frame");
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
while (true) {
int ret = avcodec_receive_frame(mCodecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret);
av_frame_free(&frame);
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
if (frame->format != AV_PIX_FMT_NV12) {
AVFrame* nv12_frame = ConvertNV12Frame(*frame);
ASSERT(nv12_frame);
av_frame_free(&frame);
frame = nv12_frame;
}
CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame);
frameBuffer.isAccepted = true;
outputInfo.codecType = 1; // FIXME: Hardcoded to AVC
outputInfo.frameWidth = frame->width;
outputInfo.frameHeight = frame->height;
outputInfo.framePitch = frame->linesize[0];
outputInfo.frameBufferSize = frameBuffer.frameBufferSize;
outputInfo.frameBuffer = frameBuffer.frameBuffer;
outputInfo.isValid = true;
outputInfo.isErrorFrame = false;
outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video
// FIXME: Should we add picture info here too?
}
av_frame_free(&frame);
return ORBIS_OK;
}
s32 VdecDecoder::Reset() {
avcodec_flush_buffers(mCodecContext);
gPictureInfos.clear();
return ORBIS_OK;
}
AVFrame* VdecDecoder::ConvertNV12Frame(AVFrame& frame) {
AVFrame* nv12_frame = av_frame_alloc();
nv12_frame->pts = frame.pts;
nv12_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts;
nv12_frame->format = AV_PIX_FMT_NV12;
nv12_frame->width = frame.width;
nv12_frame->height = frame.height;
nv12_frame->sample_aspect_ratio = frame.sample_aspect_ratio;
nv12_frame->crop_top = frame.crop_top;
nv12_frame->crop_bottom = frame.crop_bottom;
nv12_frame->crop_left = frame.crop_left;
nv12_frame->crop_right = frame.crop_right;
av_frame_get_buffer(nv12_frame, 0);
if (mSwsContext == nullptr) {
mSwsContext = sws_getContext(frame.width, frame.height, AVPixelFormat(frame.format),
nv12_frame->width, nv12_frame->height, AV_PIX_FMT_NV12,
SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
}
const auto res = sws_scale(mSwsContext, frame.data, frame.linesize, 0, frame.height,
nv12_frame->data, nv12_frame->linesize);
if (res < 0) {
LOG_ERROR(Lib_Vdec2, "Could not convert to NV12: {}", av_err2str(res));
return nullptr;
}
return nv12_frame;
}
} // namespace Libraries::Vdec2
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "videodec2_impl.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/libraries/videodec/videodec_error.h"
#include "common/support/avdec.h"
namespace Libraries::Vdec2 {
std::vector<OrbisVideodec2AvcPictureInfo> gPictureInfos;
static inline void CopyNV12Data(u8* dst, const AVFrame& src) {
std::memcpy(dst, src.data[0], src.width * src.height);
std::memcpy(dst + (src.width * src.height), src.data[1], (src.width * src.height) / 2);
}
VdecDecoder::VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo,
const OrbisVideodec2DecoderMemoryInfo& memoryInfo) {
ASSERT(configInfo.codecType == 1); /* AVC */
const AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H264);
ASSERT(codec);
mCodecContext = avcodec_alloc_context3(codec);
ASSERT(mCodecContext);
mCodecContext->width = configInfo.maxFrameWidth;
mCodecContext->height = configInfo.maxFrameHeight;
avcodec_open2(mCodecContext, codec, nullptr);
}
VdecDecoder::~VdecDecoder() {
avcodec_free_context(&mCodecContext);
sws_freeContext(mSwsContext);
gPictureInfos.clear();
}
s32 VdecDecoder::Decode(const OrbisVideodec2InputData& inputData,
OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo) {
frameBuffer.isAccepted = false;
outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo);
outputInfo.isValid = false;
outputInfo.isErrorFrame = true;
outputInfo.pictureCount = 0;
if (!inputData.auData) {
return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_POINTER;
}
if (inputData.auSize == 0) {
return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_SIZE;
}
AVPacket* packet = av_packet_alloc();
if (!packet) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate packet");
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
packet->data = (u8*)inputData.auData;
packet->size = inputData.auSize;
packet->pts = inputData.ptsData;
packet->dts = inputData.dtsData;
int ret = avcodec_send_packet(mCodecContext, packet);
if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error sending packet to decoder: {}", ret);
av_packet_free(&packet);
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
AVFrame* frame = av_frame_alloc();
if (frame == nullptr) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate frame");
av_packet_free(&packet);
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
while (true) {
ret = avcodec_receive_frame(mCodecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret);
av_packet_free(&packet);
av_frame_free(&frame);
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
if (frame->format != AV_PIX_FMT_NV12) {
AVFrame* nv12_frame = ConvertNV12Frame(*frame);
ASSERT(nv12_frame);
av_frame_free(&frame);
frame = nv12_frame;
}
CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame);
frameBuffer.isAccepted = true;
outputInfo.codecType = 1; // FIXME: Hardcoded to AVC
outputInfo.frameWidth = frame->width;
outputInfo.frameHeight = frame->height;
outputInfo.framePitch = frame->linesize[0];
outputInfo.frameBufferSize = frameBuffer.frameBufferSize;
outputInfo.frameBuffer = frameBuffer.frameBuffer;
outputInfo.isValid = true;
outputInfo.isErrorFrame = false;
outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video
if (outputInfo.isValid) {
OrbisVideodec2AvcPictureInfo pictureInfo = {};
pictureInfo.thisSize = sizeof(OrbisVideodec2AvcPictureInfo);
pictureInfo.isValid = true;
pictureInfo.ptsData = inputData.ptsData;
pictureInfo.dtsData = inputData.dtsData;
pictureInfo.attachedData = inputData.attachedData;
pictureInfo.frameCropLeftOffset = frame->crop_left;
pictureInfo.frameCropRightOffset = frame->crop_right;
pictureInfo.frameCropTopOffset = frame->crop_top;
pictureInfo.frameCropBottomOffset = frame->crop_bottom;
gPictureInfos.push_back(pictureInfo);
}
}
av_packet_free(&packet);
av_frame_free(&frame);
return ORBIS_OK;
}
s32 VdecDecoder::Flush(OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo) {
frameBuffer.isAccepted = false;
outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo);
outputInfo.isValid = false;
outputInfo.isErrorFrame = true;
outputInfo.pictureCount = 0;
AVFrame* frame = av_frame_alloc();
if (!frame) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate frame");
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
while (true) {
int ret = avcodec_receive_frame(mCodecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret);
av_frame_free(&frame);
return ORBIS_VIDEODEC2_ERROR_API_FAIL;
}
if (frame->format != AV_PIX_FMT_NV12) {
AVFrame* nv12_frame = ConvertNV12Frame(*frame);
ASSERT(nv12_frame);
av_frame_free(&frame);
frame = nv12_frame;
}
CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame);
frameBuffer.isAccepted = true;
outputInfo.codecType = 1; // FIXME: Hardcoded to AVC
outputInfo.frameWidth = frame->width;
outputInfo.frameHeight = frame->height;
outputInfo.framePitch = frame->linesize[0];
outputInfo.frameBufferSize = frameBuffer.frameBufferSize;
outputInfo.frameBuffer = frameBuffer.frameBuffer;
outputInfo.isValid = true;
outputInfo.isErrorFrame = false;
outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video
// FIXME: Should we add picture info here too?
}
av_frame_free(&frame);
return ORBIS_OK;
}
s32 VdecDecoder::Reset() {
avcodec_flush_buffers(mCodecContext);
gPictureInfos.clear();
return ORBIS_OK;
}
AVFrame* VdecDecoder::ConvertNV12Frame(AVFrame& frame) {
AVFrame* nv12_frame = av_frame_alloc();
nv12_frame->pts = frame.pts;
nv12_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts;
nv12_frame->format = AV_PIX_FMT_NV12;
nv12_frame->width = frame.width;
nv12_frame->height = frame.height;
nv12_frame->sample_aspect_ratio = frame.sample_aspect_ratio;
nv12_frame->crop_top = frame.crop_top;
nv12_frame->crop_bottom = frame.crop_bottom;
nv12_frame->crop_left = frame.crop_left;
nv12_frame->crop_right = frame.crop_right;
av_frame_get_buffer(nv12_frame, 0);
if (mSwsContext == nullptr) {
mSwsContext = sws_getContext(frame.width, frame.height, AVPixelFormat(frame.format),
nv12_frame->width, nv12_frame->height, AV_PIX_FMT_NV12,
SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
}
const auto res = sws_scale(mSwsContext, frame.data, frame.linesize, 0, frame.height,
nv12_frame->data, nv12_frame->linesize);
if (res < 0) {
LOG_ERROR(Lib_Vdec2, "Could not convert to NV12: {}", av_err2str(res));
return nullptr;
}
return nv12_frame;
}
} // namespace Libraries::Vdec2

View file

@ -1,39 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "videodec2.h"
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
namespace Libraries::Vdec2 {
extern std::vector<OrbisVideodec2AvcPictureInfo> gPictureInfos;
class VdecDecoder {
public:
VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo,
const OrbisVideodec2DecoderMemoryInfo& memoryInfo);
~VdecDecoder();
s32 Decode(const OrbisVideodec2InputData& inputData, OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo);
s32 Flush(OrbisVideodec2FrameBuffer& frameBuffer, OrbisVideodec2OutputInfo& outputInfo);
s32 Reset();
private:
AVFrame* ConvertNV12Frame(AVFrame& frame);
private:
AVCodecContext* mCodecContext = nullptr;
SwsContext* mSwsContext = nullptr;
};
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "videodec2.h"
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
namespace Libraries::Vdec2 {
extern std::vector<OrbisVideodec2AvcPictureInfo> gPictureInfos;
class VdecDecoder {
public:
VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo,
const OrbisVideodec2DecoderMemoryInfo& memoryInfo);
~VdecDecoder();
s32 Decode(const OrbisVideodec2InputData& inputData, OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo);
s32 Flush(OrbisVideodec2FrameBuffer& frameBuffer, OrbisVideodec2OutputInfo& outputInfo);
s32 Reset();
private:
AVFrame* ConvertNV12Frame(AVFrame& frame);
private:
AVCodecContext* mCodecContext = nullptr;
SwsContext* mSwsContext = nullptr;
};
} // namespace Libraries::Vdec2

View file

@ -1,151 +1,151 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/alignment.h"
#include "core/libraries/kernel/threads/pthread.h"
#include "thread.h"
#ifdef _WIN64
#include <windows.h>
#include "common/ntapi.h"
#else
#include <csignal>
#include <pthread.h>
#endif
namespace Core {
#ifdef _WIN64
#define KGDT64_R3_DATA (0x28)
#define KGDT64_R3_CODE (0x30)
#define KGDT64_R3_CMTEB (0x50)
#define RPL_MASK (0x03)
#define INITIAL_FPUCW (0x037f)
#define INITIAL_MXCSR_MASK (0xffbf)
#define EFLAGS_INTERRUPT_MASK (0x200)
void InitializeTeb(INITIAL_TEB* teb, const ::Libraries::Kernel::PthreadAttr* attr) {
teb->StackBase = (void*)((u64)attr->stackaddr_attr + attr->stacksize_attr);
teb->StackLimit = nullptr;
teb->StackAllocationBase = attr->stackaddr_attr;
}
void InitializeContext(CONTEXT* ctx, ThreadFunc func, void* arg,
const ::Libraries::Kernel::PthreadAttr* attr) {
/* Note: The stack has to be reversed */
ctx->Rsp = (u64)attr->stackaddr_attr + attr->stacksize_attr;
ctx->Rbp = (u64)attr->stackaddr_attr + attr->stacksize_attr;
ctx->Rcx = (u64)arg;
ctx->Rip = (u64)func;
ctx->SegGs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegEs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegDs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegCs = KGDT64_R3_CODE | RPL_MASK;
ctx->SegSs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegFs = KGDT64_R3_CMTEB | RPL_MASK;
ctx->EFlags = 0x3000 | EFLAGS_INTERRUPT_MASK;
ctx->MxCsr = INITIAL_MXCSR;
ctx->FltSave.ControlWord = INITIAL_FPUCW;
ctx->FltSave.MxCsr = INITIAL_MXCSR;
ctx->FltSave.MxCsr_Mask = INITIAL_MXCSR_MASK;
ctx->ContextFlags =
CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT;
}
#endif
NativeThread::NativeThread() : native_handle{0} {}
NativeThread::~NativeThread() {}
int NativeThread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) {
#ifndef _WIN64
pthread_t* pthr = reinterpret_cast<pthread_t*>(&native_handle);
pthread_attr_t pattr;
pthread_attr_init(&pattr);
pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr);
return pthread_create(pthr, &pattr, (PthreadFunc)func, arg);
#else
CLIENT_ID clientId{};
INITIAL_TEB teb{};
CONTEXT ctx{};
clientId.UniqueProcess = GetCurrentProcess();
clientId.UniqueThread = GetCurrentThread();
InitializeTeb(&teb, attr);
InitializeContext(&ctx, func, arg, attr);
return NtCreateThread(&native_handle, THREAD_ALL_ACCESS, nullptr, GetCurrentProcess(),
&clientId, &ctx, &teb, false);
#endif
}
void NativeThread::Exit() {
if (!native_handle) {
return;
}
tid = 0;
#ifdef _WIN64
NtClose(native_handle);
native_handle = nullptr;
/* The Windows kernel will free the stack
given at thread creation via INITIAL_TEB
(StackAllocationBase) upon thread termination.
In earlier Windows versions (NT4 to Windows Server 2003),
you could get around this via disabling FreeStackOnTermination
on the TEB. This has been removed since then.
To avoid this, we must forcefully set the TEB
deallocation stack pointer to NULL so ZwFreeVirtualMemory fails
in the kernel and our stack is not freed.
*/
auto* teb = reinterpret_cast<TEB*>(NtCurrentTeb());
teb->DeallocationStack = nullptr;
NtTerminateThread(nullptr, 0);
#else
// Disable and free the signal stack.
constexpr stack_t sig_stack = {
.ss_flags = SS_DISABLE,
};
sigaltstack(&sig_stack, nullptr);
if (sig_stack_ptr) {
free(sig_stack_ptr);
sig_stack_ptr = nullptr;
}
pthread_exit(nullptr);
#endif
}
void NativeThread::Initialize() {
#if _WIN64
tid = GetCurrentThreadId();
#else
tid = (u64)pthread_self();
// Set up an alternate signal handler stack to avoid overflowing small thread stacks.
const size_t page_size = getpagesize();
const size_t sig_stack_size = Common::AlignUp(std::max<size_t>(64_KB, MINSIGSTKSZ), page_size);
ASSERT_MSG(posix_memalign(&sig_stack_ptr, page_size, sig_stack_size) == 0,
"Failed to allocate signal stack: {}", errno);
stack_t sig_stack;
sig_stack.ss_sp = sig_stack_ptr;
sig_stack.ss_size = sig_stack_size;
sig_stack.ss_flags = 0;
ASSERT_MSG(sigaltstack(&sig_stack, nullptr) == 0, "Failed to set signal stack: {}", errno);
#endif
}
} // namespace Core
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/alignment.h"
#include "core/libraries/kernel/threads/pthread.h"
#include "thread.h"
#ifdef _WIN64
#include <windows.h>
#include "common/ntapi.h"
#else
#include <csignal>
#include <pthread.h>
#endif
namespace Core {
#ifdef _WIN64
#define KGDT64_R3_DATA (0x28)
#define KGDT64_R3_CODE (0x30)
#define KGDT64_R3_CMTEB (0x50)
#define RPL_MASK (0x03)
#define INITIAL_FPUCW (0x037f)
#define INITIAL_MXCSR_MASK (0xffbf)
#define EFLAGS_INTERRUPT_MASK (0x200)
void InitializeTeb(INITIAL_TEB* teb, const ::Libraries::Kernel::PthreadAttr* attr) {
teb->StackBase = (void*)((u64)attr->stackaddr_attr + attr->stacksize_attr);
teb->StackLimit = nullptr;
teb->StackAllocationBase = attr->stackaddr_attr;
}
void InitializeContext(CONTEXT* ctx, ThreadFunc func, void* arg,
const ::Libraries::Kernel::PthreadAttr* attr) {
/* Note: The stack has to be reversed */
ctx->Rsp = (u64)attr->stackaddr_attr + attr->stacksize_attr;
ctx->Rbp = (u64)attr->stackaddr_attr + attr->stacksize_attr;
ctx->Rcx = (u64)arg;
ctx->Rip = (u64)func;
ctx->SegGs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegEs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegDs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegCs = KGDT64_R3_CODE | RPL_MASK;
ctx->SegSs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegFs = KGDT64_R3_CMTEB | RPL_MASK;
ctx->EFlags = 0x3000 | EFLAGS_INTERRUPT_MASK;
ctx->MxCsr = INITIAL_MXCSR;
ctx->FltSave.ControlWord = INITIAL_FPUCW;
ctx->FltSave.MxCsr = INITIAL_MXCSR;
ctx->FltSave.MxCsr_Mask = INITIAL_MXCSR_MASK;
ctx->ContextFlags =
CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT;
}
#endif
NativeThread::NativeThread() : native_handle{0} {}
NativeThread::~NativeThread() {}
int NativeThread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) {
#ifndef _WIN64
pthread_t* pthr = reinterpret_cast<pthread_t*>(&native_handle);
pthread_attr_t pattr;
pthread_attr_init(&pattr);
pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr);
return pthread_create(pthr, &pattr, (PthreadFunc)func, arg);
#else
CLIENT_ID clientId{};
INITIAL_TEB teb{};
CONTEXT ctx{};
clientId.UniqueProcess = GetCurrentProcess();
clientId.UniqueThread = GetCurrentThread();
InitializeTeb(&teb, attr);
InitializeContext(&ctx, func, arg, attr);
return NtCreateThread(&native_handle, THREAD_ALL_ACCESS, nullptr, GetCurrentProcess(),
&clientId, &ctx, &teb, false);
#endif
}
void NativeThread::Exit() {
if (!native_handle) {
return;
}
tid = 0;
#ifdef _WIN64
NtClose(native_handle);
native_handle = nullptr;
/* The Windows kernel will free the stack
given at thread creation via INITIAL_TEB
(StackAllocationBase) upon thread termination.
In earlier Windows versions (NT4 to Windows Server 2003),
you could get around this via disabling FreeStackOnTermination
on the TEB. This has been removed since then.
To avoid this, we must forcefully set the TEB
deallocation stack pointer to NULL so ZwFreeVirtualMemory fails
in the kernel and our stack is not freed.
*/
auto* teb = reinterpret_cast<TEB*>(NtCurrentTeb());
teb->DeallocationStack = nullptr;
NtTerminateThread(nullptr, 0);
#else
// Disable and free the signal stack.
constexpr stack_t sig_stack = {
.ss_flags = SS_DISABLE,
};
sigaltstack(&sig_stack, nullptr);
if (sig_stack_ptr) {
free(sig_stack_ptr);
sig_stack_ptr = nullptr;
}
pthread_exit(nullptr);
#endif
}
void NativeThread::Initialize() {
#if _WIN64
tid = GetCurrentThreadId();
#else
tid = (u64)pthread_self();
// Set up an alternate signal handler stack to avoid overflowing small thread stacks.
const size_t page_size = getpagesize();
const size_t sig_stack_size = Common::AlignUp(std::max<size_t>(64_KB, MINSIGSTKSZ), page_size);
ASSERT_MSG(posix_memalign(&sig_stack_ptr, page_size, sig_stack_size) == 0,
"Failed to allocate signal stack: {}", errno);
stack_t sig_stack;
sig_stack.ss_sp = sig_stack_ptr;
sig_stack.ss_size = sig_stack_size;
sig_stack.ss_flags = 0;
ASSERT_MSG(sigaltstack(&sig_stack, nullptr) == 0, "Failed to set signal stack: {}", errno);
#endif
}
} // namespace Core

View file

@ -1,45 +1,45 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Libraries::Kernel {
struct PthreadAttr;
} // namespace Libraries::Kernel
namespace Core {
using ThreadFunc = void (*)(void*);
using PthreadFunc = void* (*)(void*);
class NativeThread {
public:
NativeThread();
~NativeThread();
int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr);
void Exit();
void Initialize();
uintptr_t GetHandle() {
return reinterpret_cast<uintptr_t>(native_handle);
}
u64 GetTid() {
return tid;
}
private:
#ifdef _WIN64
void* native_handle;
#else
uintptr_t native_handle;
void* sig_stack_ptr;
#endif
u64 tid;
};
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Libraries::Kernel {
struct PthreadAttr;
} // namespace Libraries::Kernel
namespace Core {
using ThreadFunc = void (*)(void*);
using PthreadFunc = void* (*)(void*);
class NativeThread {
public:
NativeThread();
~NativeThread();
int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr);
void Exit();
void Initialize();
uintptr_t GetHandle() {
return reinterpret_cast<uintptr_t>(native_handle);
}
u64 GetTid() {
return tid;
}
private:
#ifdef _WIN64
void* native_handle;
#else
uintptr_t native_handle;
void* sig_stack_ptr;
#endif
u64 tid;
};
} // namespace Core

View file

@ -1,74 +1,74 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <optional>
#include <type_traits>
#include <boost/container/small_vector.hpp>
#include <queue>
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
// Use typename Instruction so the function can be used to return either const or mutable
// Insts depending on the context.
template <typename Instruction, typename Pred>
auto BreadthFirstSearch(Instruction* inst,
Pred&& pred) -> std::invoke_result_t<Pred, Instruction*> {
// Most often case the instruction is the desired already.
if (std::optional result = pred(inst)) {
return result;
}
// Breadth-first search visiting the right most arguments first
boost::container::small_vector<Instruction*, 2> visited;
std::queue<Instruction*> queue;
queue.push(inst);
while (!queue.empty()) {
// Pop one instruction from the queue
Instruction* inst{queue.front()};
queue.pop();
if (std::optional result = pred(inst)) {
// This is the instruction we were looking for
return result;
}
// Visit the right most arguments first
for (size_t arg = inst->NumArgs(); arg--;) {
Value arg_value{inst->Arg(arg)};
if (arg_value.IsImmediate()) {
continue;
}
// Queue instruction if it hasn't been visited
Instruction* arg_inst{arg_value.InstRecursive()};
if (std::ranges::find(visited, arg_inst) == visited.end()) {
visited.push_back(arg_inst);
queue.push(arg_inst);
}
}
}
// SSA tree has been traversed and the result hasn't been found
return std::nullopt;
}
template <typename Pred>
auto BreadthFirstSearch(const Value& value,
Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> {
if (value.IsImmediate()) {
// Nothing to do with immediates
return std::nullopt;
}
return BreadthFirstSearch(value.InstRecursive(), pred);
}
template <typename Pred>
auto BreadthFirstSearch(Value value, Pred&& pred) -> std::invoke_result_t<Pred, Inst*> {
if (value.IsImmediate()) {
// Nothing to do with immediates
return std::nullopt;
}
return BreadthFirstSearch(value.InstRecursive(), pred);
}
} // namespace Shader::IR
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <optional>
#include <type_traits>
#include <boost/container/small_vector.hpp>
#include <queue>
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
// Use typename Instruction so the function can be used to return either const or mutable
// Insts depending on the context.
template <typename Instruction, typename Pred>
auto BreadthFirstSearch(Instruction* inst,
Pred&& pred) -> std::invoke_result_t<Pred, Instruction*> {
// Most often case the instruction is the desired already.
if (std::optional result = pred(inst)) {
return result;
}
// Breadth-first search visiting the right most arguments first
boost::container::small_vector<Instruction*, 2> visited;
std::queue<Instruction*> queue;
queue.push(inst);
while (!queue.empty()) {
// Pop one instruction from the queue
Instruction* inst{queue.front()};
queue.pop();
if (std::optional result = pred(inst)) {
// This is the instruction we were looking for
return result;
}
// Visit the right most arguments first
for (size_t arg = inst->NumArgs(); arg--;) {
Value arg_value{inst->Arg(arg)};
if (arg_value.IsImmediate()) {
continue;
}
// Queue instruction if it hasn't been visited
Instruction* arg_inst{arg_value.InstRecursive()};
if (std::ranges::find(visited, arg_inst) == visited.end()) {
visited.push_back(arg_inst);
queue.push(arg_inst);
}
}
}
// SSA tree has been traversed and the result hasn't been found
return std::nullopt;
}
template <typename Pred>
auto BreadthFirstSearch(const Value& value,
Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> {
if (value.IsImmediate()) {
// Nothing to do with immediates
return std::nullopt;
}
return BreadthFirstSearch(value.InstRecursive(), pred);
}
template <typename Pred>
auto BreadthFirstSearch(Value value, Pred&& pred) -> std::invoke_result_t<Pred, Inst*> {
if (value.IsImmediate()) {
// Nothing to do with immediates
return std::nullopt;
}
return BreadthFirstSearch(value.InstRecursive(), pred);
}
} // namespace Shader::IR

View file

@ -1,203 +1,203 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <optional>
#include <utility>
#include <vector>
#include "common/types.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class Scheduler;
} // namespace Vulkan
VK_DEFINE_HANDLE(VmaAllocation)
VK_DEFINE_HANDLE(VmaAllocator)
struct VmaAllocationInfo;
namespace VideoCore {
/// Hints and requirements for the backing memory type of a commit
enum class MemoryUsage {
DeviceLocal, ///< Requests device local buffer.
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
Stream, ///< Requests device local host visible buffer, falling back host memory.
};
constexpr vk::BufferUsageFlags ReadFlags =
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer |
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer;
constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eStorageTexelBuffer |
vk::BufferUsageFlagBits::eStorageBuffer;
struct UniqueBuffer {
explicit UniqueBuffer(vk::Device device, VmaAllocator allocator);
~UniqueBuffer();
UniqueBuffer(const UniqueBuffer&) = delete;
UniqueBuffer& operator=(const UniqueBuffer&) = delete;
UniqueBuffer(UniqueBuffer&& other)
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
buffer{std::exchange(other.buffer, VK_NULL_HANDLE)} {}
UniqueBuffer& operator=(UniqueBuffer&& other) {
buffer = std::exchange(other.buffer, VK_NULL_HANDLE);
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
allocation = std::exchange(other.allocation, VK_NULL_HANDLE);
return *this;
}
void Create(const vk::BufferCreateInfo& image_ci, MemoryUsage usage,
VmaAllocationInfo* out_alloc_info);
operator vk::Buffer() const {
return buffer;
}
vk::Device device;
VmaAllocator allocator;
VmaAllocation allocation;
vk::Buffer buffer{};
};
class Buffer {
public:
explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags,
u64 size_bytes_);
Buffer& operator=(const Buffer&) = delete;
Buffer(const Buffer&) = delete;
Buffer& operator=(Buffer&&) = default;
Buffer(Buffer&&) = default;
vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
AmdGpu::NumberFormat nfmt);
/// Increases the likeliness of this being a stream buffer
void IncreaseStreamScore(int score) noexcept {
stream_score += score;
}
/// Returns the likeliness of this being a stream buffer
[[nodiscard]] int StreamScore() const noexcept {
return stream_score;
}
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
}
/// Returns the base CPU address of the buffer
[[nodiscard]] VAddr CpuAddr() const noexcept {
return cpu_addr;
}
/// Returns the offset relative to the given CPU address
[[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept {
return static_cast<u32>(other_cpu_addr - cpu_addr);
}
size_t SizeBytes() const {
return size_bytes;
}
vk::Buffer Handle() const noexcept {
return buffer;
}
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask,
vk::PipelineStageFlagBits2 dst_stage) {
if (dst_acess_mask == access_mask && stage == dst_stage) {
return {};
}
auto barrier = vk::BufferMemoryBarrier2{
.srcStageMask = stage,
.srcAccessMask = access_mask,
.dstStageMask = dst_stage,
.dstAccessMask = dst_acess_mask,
.buffer = buffer.buffer,
.size = size_bytes,
};
access_mask = dst_acess_mask;
stage = dst_stage;
return barrier;
}
public:
VAddr cpu_addr = 0;
bool is_picked{};
bool is_coherent{};
bool is_deleted{};
int stream_score = 0;
size_t size_bytes = 0;
std::span<u8> mapped_data;
const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
MemoryUsage usage;
UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
};
class StreamBuffer : public Buffer {
public:
explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, u64 size_bytes_);
/// Reserves a region of memory from the stream buffer.
std::pair<u8*, u64> Map(u64 size, u64 alignment = 0);
/// Ensures that reserved bytes of memory are available to the GPU.
void Commit();
/// Maps and commits a memory region with user provided data
u64 Copy(VAddr src, size_t size, size_t alignment = 0) {
const auto [data, offset] = Map(size, alignment);
std::memcpy(data, reinterpret_cast<const void*>(src), size);
Commit();
return offset;
}
u64 GetFreeSize() const {
return size_bytes - offset - mapped_size;
}
private:
struct Watch {
u64 tick{};
u64 upper_bound{};
};
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
/// Waits pending watches until requested upper bound.
void WaitPendingOperations(u64 requested_upper_bound);
private:
u64 offset{};
u64 mapped_size{};
std::vector<Watch> current_watches;
std::size_t current_watch_cursor{};
std::optional<size_t> invalidation_mark;
std::vector<Watch> previous_watches;
std::size_t wait_cursor{};
u64 wait_bound{};
};
} // namespace VideoCore
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <optional>
#include <utility>
#include <vector>
#include "common/types.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class Scheduler;
} // namespace Vulkan
VK_DEFINE_HANDLE(VmaAllocation)
VK_DEFINE_HANDLE(VmaAllocator)
struct VmaAllocationInfo;
namespace VideoCore {
/// Hints and requirements for the backing memory type of a commit
enum class MemoryUsage {
DeviceLocal, ///< Requests device local buffer.
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
Stream, ///< Requests device local host visible buffer, falling back host memory.
};
constexpr vk::BufferUsageFlags ReadFlags =
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer |
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer;
constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eStorageTexelBuffer |
vk::BufferUsageFlagBits::eStorageBuffer;
struct UniqueBuffer {
explicit UniqueBuffer(vk::Device device, VmaAllocator allocator);
~UniqueBuffer();
UniqueBuffer(const UniqueBuffer&) = delete;
UniqueBuffer& operator=(const UniqueBuffer&) = delete;
UniqueBuffer(UniqueBuffer&& other)
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
buffer{std::exchange(other.buffer, VK_NULL_HANDLE)} {}
UniqueBuffer& operator=(UniqueBuffer&& other) {
buffer = std::exchange(other.buffer, VK_NULL_HANDLE);
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
allocation = std::exchange(other.allocation, VK_NULL_HANDLE);
return *this;
}
void Create(const vk::BufferCreateInfo& image_ci, MemoryUsage usage,
VmaAllocationInfo* out_alloc_info);
operator vk::Buffer() const {
return buffer;
}
vk::Device device;
VmaAllocator allocator;
VmaAllocation allocation;
vk::Buffer buffer{};
};
class Buffer {
public:
explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags,
u64 size_bytes_);
Buffer& operator=(const Buffer&) = delete;
Buffer(const Buffer&) = delete;
Buffer& operator=(Buffer&&) = default;
Buffer(Buffer&&) = default;
vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
AmdGpu::NumberFormat nfmt);
/// Increases the likeliness of this being a stream buffer
void IncreaseStreamScore(int score) noexcept {
stream_score += score;
}
/// Returns the likeliness of this being a stream buffer
[[nodiscard]] int StreamScore() const noexcept {
return stream_score;
}
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
}
/// Returns the base CPU address of the buffer
[[nodiscard]] VAddr CpuAddr() const noexcept {
return cpu_addr;
}
/// Returns the offset relative to the given CPU address
[[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept {
return static_cast<u32>(other_cpu_addr - cpu_addr);
}
size_t SizeBytes() const {
return size_bytes;
}
vk::Buffer Handle() const noexcept {
return buffer;
}
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask,
vk::PipelineStageFlagBits2 dst_stage) {
if (dst_acess_mask == access_mask && stage == dst_stage) {
return {};
}
auto barrier = vk::BufferMemoryBarrier2{
.srcStageMask = stage,
.srcAccessMask = access_mask,
.dstStageMask = dst_stage,
.dstAccessMask = dst_acess_mask,
.buffer = buffer.buffer,
.size = size_bytes,
};
access_mask = dst_acess_mask;
stage = dst_stage;
return barrier;
}
public:
VAddr cpu_addr = 0;
bool is_picked{};
bool is_coherent{};
bool is_deleted{};
int stream_score = 0;
size_t size_bytes = 0;
std::span<u8> mapped_data;
const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
MemoryUsage usage;
UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
};
class StreamBuffer : public Buffer {
public:
explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, u64 size_bytes_);
/// Reserves a region of memory from the stream buffer.
std::pair<u8*, u64> Map(u64 size, u64 alignment = 0);
/// Ensures that reserved bytes of memory are available to the GPU.
void Commit();
/// Maps and commits a memory region with user provided data
u64 Copy(VAddr src, size_t size, size_t alignment = 0) {
const auto [data, offset] = Map(size, alignment);
std::memcpy(data, reinterpret_cast<const void*>(src), size);
Commit();
return offset;
}
u64 GetFreeSize() const {
return size_bytes - offset - mapped_size;
}
private:
struct Watch {
u64 tick{};
u64 upper_bound{};
};
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
/// Waits pending watches until requested upper bound.
void WaitPendingOperations(u64 requested_upper_bound);
private:
u64 offset{};
u64 mapped_size{};
std::vector<Watch> current_watches;
std::size_t current_watch_cursor{};
std::optional<size_t> invalidation_mark;
std::vector<Watch> previous_watches;
std::size_t wait_cursor{};
u64 wait_bound{};
};
} // namespace VideoCore

File diff suppressed because it is too large Load diff

View file

@ -1,168 +1,168 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "common/div_ceil.h"
#include "common/slot_vector.h"
#include "common/types.h"
#include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/range_set.h"
#include "video_core/multi_level_page_table.h"
namespace AmdGpu {
struct Liverpool;
}
namespace Shader {
namespace Gcn {
struct FetchShaderData;
}
struct Info;
} // namespace Shader
namespace VideoCore {
using BufferId = Common::SlotId;
static constexpr BufferId NULL_BUFFER_ID{0};
class TextureCache;
class BufferCache {
public:
static constexpr u32 CACHING_PAGEBITS = 12;
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
static constexpr u64 DEVICE_PAGESIZE = 4_KB;
struct Traits {
using Entry = BufferId;
static constexpr size_t AddressSpaceBits = 40;
static constexpr size_t FirstLevelBits = 14;
static constexpr size_t PageBits = CACHING_PAGEBITS;
};
using PageTable = MultiLevelPageTable<Traits>;
struct OverlapResult {
boost::container::small_vector<BufferId, 16> ids;
VAddr begin;
VAddr end;
bool has_stream_leap = false;
};
public:
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
PageManager& tracker);
~BufferCache();
/// Returns a pointer to GDS device local buffer.
[[nodiscard]] const Buffer* GetGdsBuffer() const noexcept {
return &gds_buffer;
}
/// Retrieves the buffer with the specified id.
[[nodiscard]] Buffer& GetBuffer(BufferId id) {
return slot_buffers[id];
}
[[nodiscard]] vk::BufferView& NullBufferView() {
return null_buffer_view;
}
/// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size);
/// Binds host vertex buffers for the current draw.
bool BindVertexBuffers(const Shader::Info& vs_info,
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
/// Bind host index buffer for the current draw.
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
/// Writes a value to GPU buffer.
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
/// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
bool is_texel_buffer = false,
BufferId buffer_id = {});
/// Attempts to obtain a buffer without modifying the cache contents.
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size,
bool prefer_gpu);
/// Return true when a region is registered on the cache
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
private:
template <typename Func>
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) {
const BufferId buffer_id = page_table[page];
if (!buffer_id) {
++page;
continue;
}
Buffer& buffer = slot_buffers[buffer_id];
func(buffer_id, buffer);
const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
}
}
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
[[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size);
void Register(BufferId buffer_id);
void Unregister(BufferId buffer_id);
template <bool insert>
void ChangeRegister(BufferId buffer_id);
void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer);
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
void DeleteBuffer(BufferId buffer_id);
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;
AmdGpu::Liverpool* liverpool;
TextureCache& texture_cache;
PageManager& tracker;
StreamBuffer staging_buffer;
StreamBuffer stream_buffer;
Buffer gds_buffer;
std::mutex mutex;
Common::SlotVector<Buffer> slot_buffers;
RangeSet gpu_modified_ranges;
vk::BufferView null_buffer_view;
MemoryTracker memory_tracker;
PageTable page_table;
};
} // namespace VideoCore
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "common/div_ceil.h"
#include "common/slot_vector.h"
#include "common/types.h"
#include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/range_set.h"
#include "video_core/multi_level_page_table.h"
namespace AmdGpu {
struct Liverpool;
}
namespace Shader {
namespace Gcn {
struct FetchShaderData;
}
struct Info;
} // namespace Shader
namespace VideoCore {
using BufferId = Common::SlotId;
static constexpr BufferId NULL_BUFFER_ID{0};
class TextureCache;
class BufferCache {
public:
static constexpr u32 CACHING_PAGEBITS = 12;
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
static constexpr u64 DEVICE_PAGESIZE = 4_KB;
struct Traits {
using Entry = BufferId;
static constexpr size_t AddressSpaceBits = 40;
static constexpr size_t FirstLevelBits = 14;
static constexpr size_t PageBits = CACHING_PAGEBITS;
};
using PageTable = MultiLevelPageTable<Traits>;
struct OverlapResult {
boost::container::small_vector<BufferId, 16> ids;
VAddr begin;
VAddr end;
bool has_stream_leap = false;
};
public:
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
PageManager& tracker);
~BufferCache();
/// Returns a pointer to GDS device local buffer.
[[nodiscard]] const Buffer* GetGdsBuffer() const noexcept {
return &gds_buffer;
}
/// Retrieves the buffer with the specified id.
[[nodiscard]] Buffer& GetBuffer(BufferId id) {
return slot_buffers[id];
}
[[nodiscard]] vk::BufferView& NullBufferView() {
return null_buffer_view;
}
/// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size);
/// Binds host vertex buffers for the current draw.
bool BindVertexBuffers(const Shader::Info& vs_info,
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
/// Bind host index buffer for the current draw.
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
/// Writes a value to GPU buffer.
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
/// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
bool is_texel_buffer = false,
BufferId buffer_id = {});
/// Attempts to obtain a buffer without modifying the cache contents.
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size,
bool prefer_gpu);
/// Return true when a region is registered on the cache
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
private:
template <typename Func>
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) {
const BufferId buffer_id = page_table[page];
if (!buffer_id) {
++page;
continue;
}
Buffer& buffer = slot_buffers[buffer_id];
func(buffer_id, buffer);
const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
}
}
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
[[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size);
void Register(BufferId buffer_id);
void Unregister(BufferId buffer_id);
template <bool insert>
void ChangeRegister(BufferId buffer_id);
void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer);
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
void DeleteBuffer(BufferId buffer_id);
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;
AmdGpu::Liverpool* liverpool;
TextureCache& texture_cache;
PageManager& tracker;
StreamBuffer staging_buffer;
StreamBuffer stream_buffer;
Buffer gds_buffer;
std::mutex mutex;
Common::SlotVector<Buffer> slot_buffers;
RangeSet gpu_modified_ranges;
vk::BufferView null_buffer_view;
MemoryTracker memory_tracker;
PageTable page_table;
};
} // namespace VideoCore

View file

@ -1,175 +1,175 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <deque>
#include <type_traits>
#include <vector>
#include "common/types.h"
#include "video_core/buffer_cache/word_manager.h"
namespace VideoCore {
class MemoryTracker {
public:
static constexpr size_t MAX_CPU_PAGE_BITS = 40;
static constexpr size_t HIGHER_PAGE_BITS = 22;
static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
static constexpr size_t MANAGER_POOL_SIZE = 32;
static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
using Manager = WordManager<WORDS_STACK_NEEDED>;
public:
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
~MemoryTracker() = default;
/// Returns true if a region has been modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<true>(
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::CPU>(offset, size);
});
}
/// Returns true if a region has been modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<false>(
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::GPU>(offset, size);
});
}
/// Mark region as CPU modified, notifying the device_tracker about this change
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::CPU, true>(
manager->GetCpuAddr() + offset, size);
});
}
/// Unmark region as CPU modified, notifying the device_tracker about this change
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::CPU, false>(
manager->GetCpuAddr() + offset, size);
});
}
/// Mark region as modified from the host GPU
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::GPU, true>(
manager->GetCpuAddr() + offset, size);
});
}
/// Unmark region as modified from the host GPU
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::GPU, false>(
manager->GetCpuAddr() + offset, size);
});
}
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<true>(query_cpu_range, query_size,
[&func](Manager* manager, u64 offset, size_t size) {
manager->template ForEachModifiedRange<Type::CPU, true>(
manager->GetCpuAddr() + offset, size, func);
});
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <bool clear, typename Func>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<false>(query_cpu_range, query_size,
[&func](Manager* manager, u64 offset, size_t size) {
if constexpr (clear) {
manager->template ForEachModifiedRange<Type::GPU, true>(
manager->GetCpuAddr() + offset, size, func);
} else {
manager->template ForEachModifiedRange<Type::GPU, false>(
manager->GetCpuAddr() + offset, size, func);
}
});
}
private:
/**
* @brief IteratePages Iterates L2 word manager page table.
* @param cpu_address Start byte cpu address
* @param size Size in bytes of the region of iterate.
* @param func Callback for each word manager.
* @return
*/
template <bool create_region_on_fail, typename Func>
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
std::size_t remaining_size{size};
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
auto* manager{top_tier[page_index]};
if (manager) {
if constexpr (BOOL_BREAK) {
if (func(manager, page_offset, copy_amount)) {
return true;
}
} else {
func(manager, page_offset, copy_amount);
}
} else if constexpr (create_region_on_fail) {
CreateRegion(page_index);
manager = top_tier[page_index];
if constexpr (BOOL_BREAK) {
if (func(manager, page_offset, copy_amount)) {
return true;
}
} else {
func(manager, page_offset, copy_amount);
}
}
page_index++;
page_offset = 0;
remaining_size -= copy_amount;
}
return false;
}
void CreateRegion(std::size_t page_index) {
const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS;
if (free_managers.empty()) {
manager_pool.emplace_back();
auto& last_pool = manager_pool.back();
for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE);
free_managers.push_back(&last_pool[i]);
}
}
// Each manager tracks a 4_MB virtual address space.
auto* new_manager = free_managers.back();
new_manager->SetCpuAddress(base_cpu_addr);
free_managers.pop_back();
top_tier[page_index] = new_manager;
}
PageManager* tracker;
std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
std::vector<Manager*> free_managers;
std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
};
} // namespace VideoCore
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <deque>
#include <type_traits>
#include <vector>
#include "common/types.h"
#include "video_core/buffer_cache/word_manager.h"
namespace VideoCore {
class MemoryTracker {
public:
static constexpr size_t MAX_CPU_PAGE_BITS = 40;
static constexpr size_t HIGHER_PAGE_BITS = 22;
static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
static constexpr size_t MANAGER_POOL_SIZE = 32;
static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
using Manager = WordManager<WORDS_STACK_NEEDED>;
public:
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
~MemoryTracker() = default;
/// Returns true if a region has been modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<true>(
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::CPU>(offset, size);
});
}
/// Returns true if a region has been modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<false>(
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::GPU>(offset, size);
});
}
/// Mark region as CPU modified, notifying the device_tracker about this change
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::CPU, true>(
manager->GetCpuAddr() + offset, size);
});
}
/// Unmark region as CPU modified, notifying the device_tracker about this change
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::CPU, false>(
manager->GetCpuAddr() + offset, size);
});
}
/// Mark region as modified from the host GPU
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::GPU, true>(
manager->GetCpuAddr() + offset, size);
});
}
/// Unmark region as modified from the host GPU
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::GPU, false>(
manager->GetCpuAddr() + offset, size);
});
}
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<true>(query_cpu_range, query_size,
[&func](Manager* manager, u64 offset, size_t size) {
manager->template ForEachModifiedRange<Type::CPU, true>(
manager->GetCpuAddr() + offset, size, func);
});
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <bool clear, typename Func>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<false>(query_cpu_range, query_size,
[&func](Manager* manager, u64 offset, size_t size) {
if constexpr (clear) {
manager->template ForEachModifiedRange<Type::GPU, true>(
manager->GetCpuAddr() + offset, size, func);
} else {
manager->template ForEachModifiedRange<Type::GPU, false>(
manager->GetCpuAddr() + offset, size, func);
}
});
}
private:
/**
* @brief IteratePages Iterates L2 word manager page table.
* @param cpu_address Start byte cpu address
* @param size Size in bytes of the region of iterate.
* @param func Callback for each word manager.
* @return
*/
template <bool create_region_on_fail, typename Func>
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
std::size_t remaining_size{size};
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
auto* manager{top_tier[page_index]};
if (manager) {
if constexpr (BOOL_BREAK) {
if (func(manager, page_offset, copy_amount)) {
return true;
}
} else {
func(manager, page_offset, copy_amount);
}
} else if constexpr (create_region_on_fail) {
CreateRegion(page_index);
manager = top_tier[page_index];
if constexpr (BOOL_BREAK) {
if (func(manager, page_offset, copy_amount)) {
return true;
}
} else {
func(manager, page_offset, copy_amount);
}
}
page_index++;
page_offset = 0;
remaining_size -= copy_amount;
}
return false;
}
void CreateRegion(std::size_t page_index) {
const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS;
if (free_managers.empty()) {
manager_pool.emplace_back();
auto& last_pool = manager_pool.back();
for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE);
free_managers.push_back(&last_pool[i]);
}
}
// Each manager tracks a 4_MB virtual address space.
auto* new_manager = free_managers.back();
new_manager->SetCpuAddress(base_cpu_addr);
free_managers.pop_back();
top_tier[page_index] = new_manager;
}
PageManager* tracker;
std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
std::vector<Manager*> free_managers;
std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
};
} // namespace VideoCore

View file

@ -1,398 +1,398 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <span>
#include <utility>
#include "common/div_ceil.h"
#include "common/types.h"
#include "video_core/page_manager.h"
namespace VideoCore {
constexpr u64 PAGES_PER_WORD = 64;
constexpr u64 BYTES_PER_PAGE = 4_KB;
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
enum class Type {
CPU,
GPU,
Untracked,
};
/// Vector tracking modified pages tightly packed with small vector optimization
template <size_t stack_words = 1>
struct WordsArray {
/// Returns the pointer to the words state
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
return is_short ? stack.data() : heap;
}
/// Returns the pointer to the words state
[[nodiscard]] u64* Pointer(bool is_short) noexcept {
return is_short ? stack.data() : heap;
}
std::array<u64, stack_words> stack{}; ///< Small buffers storage
u64* heap; ///< Not-small buffers pointer to the storage
};
template <size_t stack_words = 1>
struct Words {
explicit Words() = default;
explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
if (IsShort()) {
cpu.stack.fill(~u64{0});
gpu.stack.fill(0);
untracked.stack.fill(~u64{0});
} else {
// Share allocation between CPU and GPU pages and set their default values
u64* const alloc = new u64[num_words * 3];
cpu.heap = alloc;
gpu.heap = alloc + num_words;
untracked.heap = alloc + num_words * 2;
std::fill_n(cpu.heap, num_words, ~u64{0});
std::fill_n(gpu.heap, num_words, 0);
std::fill_n(untracked.heap, num_words, ~u64{0});
}
// Clean up tailing bits
const u64 last_word_size = size_bytes % BYTES_PER_WORD;
const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
const u64 last_word = (~u64{0} << shift) >> shift;
cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
}
~Words() {
Release();
}
Words& operator=(Words&& rhs) noexcept {
Release();
size_bytes = rhs.size_bytes;
num_words = rhs.num_words;
cpu = rhs.cpu;
gpu = rhs.gpu;
untracked = rhs.untracked;
rhs.cpu.heap = nullptr;
return *this;
}
Words(Words&& rhs) noexcept
: size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
untracked{rhs.untracked} {
rhs.cpu.heap = nullptr;
}
Words& operator=(const Words&) = delete;
Words(const Words&) = delete;
/// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept {
return num_words <= stack_words;
}
/// Returns the number of words of the buffer
[[nodiscard]] size_t NumWords() const noexcept {
return num_words;
}
/// Release buffer resources
void Release() {
if (!IsShort()) {
// CPU written words is the base for the heap allocation
delete[] cpu.heap;
}
}
template <Type type>
std::span<u64> Span() noexcept {
if constexpr (type == Type::CPU) {
return std::span<u64>(cpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::GPU) {
return std::span<u64>(gpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::Untracked) {
return std::span<u64>(untracked.Pointer(IsShort()), num_words);
}
}
template <Type type>
std::span<const u64> Span() const noexcept {
if constexpr (type == Type::CPU) {
return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::GPU) {
return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::Untracked) {
return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
}
}
u64 size_bytes = 0;
size_t num_words = 0;
WordsArray<stack_words> cpu;
WordsArray<stack_words> gpu;
WordsArray<stack_words> untracked;
};
template <size_t stack_words = 1>
class WordManager {
public:
explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes)
: tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {}
explicit WordManager() = default;
void SetCpuAddress(VAddr new_cpu_addr) {
cpu_addr = new_cpu_addr;
}
VAddr GetCpuAddr() const {
return cpu_addr;
}
static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
constexpr size_t number_bits = sizeof(u64) * 8;
const size_t limit_page_end = number_bits - std::min(page_end, number_bits);
u64 bits = (word >> page_start) << page_start;
bits = (bits << limit_page_end) >> limit_page_end;
return bits;
}
static std::pair<size_t, size_t> GetWordPage(VAddr address) {
const size_t converted_address = static_cast<size_t>(address);
const size_t word_number = converted_address / BYTES_PER_WORD;
const size_t amount_pages = converted_address % BYTES_PER_WORD;
return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE);
}
template <typename Func>
void IterateWords(size_t offset, size_t size, Func&& func) const {
using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
if (start >= SizeBytes() || end <= start) {
return;
}
auto [start_word, start_page] = GetWordPage(start);
auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
const size_t num_words = NumWords();
start_word = std::min(start_word, num_words);
end_word = std::min(end_word, num_words);
const size_t diff = end_word - start_word;
end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD;
end_word = std::min(end_word, num_words);
end_page += diff * PAGES_PER_WORD;
constexpr u64 base_mask{~0ULL};
for (size_t word_index = start_word; word_index < end_word; word_index++) {
const u64 mask = ExtractBits(base_mask, start_page, end_page);
start_page = 0;
end_page -= PAGES_PER_WORD;
if constexpr (BOOL_BREAK) {
if (func(word_index, mask)) {
return;
}
} else {
func(word_index, mask);
}
}
}
template <typename Func>
void IteratePages(u64 mask, Func&& func) const {
size_t offset = 0;
while (mask != 0) {
const size_t empty_bits = std::countr_zero(mask);
offset += empty_bits;
mask = mask >> empty_bits;
const size_t continuous_bits = std::countr_one(mask);
func(offset, continuous_bits);
mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0;
offset += continuous_bits;
}
}
/**
* Change the state of a range of pages
*
* @param dirty_addr Base address to mark or unmark as modified
* @param size Size in bytes to mark or unmark as modified
*/
template <Type type, bool enable>
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
std::span<u64> state_words = words.template Span<type>();
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::CPU) {
NotifyPageTracker<!enable>(index, untracked_words[index], mask);
}
if constexpr (enable) {
state_words[index] |= mask;
if constexpr (type == Type::CPU) {
untracked_words[index] |= mask;
}
} else {
state_words[index] &= ~mask;
if constexpr (type == Type::CPU) {
untracked_words[index] &= ~mask;
}
}
});
}
/**
* Loop over each page in the given range, turn off those bits and notify the tracker if
* needed. Call the given function on each turned off range.
*
* @param query_cpu_range Base CPU address to loop over
* @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region
*/
template <Type type, bool clear, typename Func>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
static_assert(type != Type::Untracked);
std::span<u64> state_words = words.template Span<type>();
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
const size_t offset = query_cpu_range - cpu_addr;
bool pending = false;
size_t pending_offset{};
size_t pending_pointer{};
const auto release = [&]() {
func(cpu_addr + pending_offset * BYTES_PER_PAGE,
(pending_pointer - pending_offset) * BYTES_PER_PAGE);
};
IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index];
}
const u64 word = state_words[index] & mask;
if constexpr (clear) {
if constexpr (type == Type::CPU) {
NotifyPageTracker<true>(index, untracked_words[index], mask);
}
state_words[index] &= ~mask;
if constexpr (type == Type::CPU) {
untracked_words[index] &= ~mask;
}
}
const size_t base_offset = index * PAGES_PER_WORD;
IteratePages(word, [&](size_t pages_offset, size_t pages_size) {
const auto reset = [&]() {
pending_offset = base_offset + pages_offset;
pending_pointer = base_offset + pages_offset + pages_size;
};
if (!pending) {
reset();
pending = true;
return;
}
if (pending_pointer == base_offset + pages_offset) {
pending_pointer += pages_size;
return;
}
release();
reset();
});
});
if (pending) {
release();
}
}
/**
* Returns true when a region has been modified
*
* @param offset Offset in bytes from the start of the buffer
* @param size Size in bytes of the region to query for modifications
*/
template <Type type>
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
const std::span<const u64> state_words = words.template Span<type>();
[[maybe_unused]] const std::span<const u64> untracked_words =
words.template Span<Type::Untracked>();
bool result = false;
IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index];
}
const u64 word = state_words[index] & mask;
if (word != 0) {
result = true;
return true;
}
return false;
});
return result;
}
/// Returns the number of words of the manager
[[nodiscard]] size_t NumWords() const noexcept {
return words.NumWords();
}
/// Returns the size in bytes of the manager
[[nodiscard]] u64 SizeBytes() const noexcept {
return words.size_bytes;
}
/// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept {
return words.IsShort();
}
private:
template <Type type>
u64* Array() noexcept {
if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort());
}
}
template <Type type>
const u64* Array() const noexcept {
if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort());
}
}
/**
* Notify tracker about changes in the CPU tracking state of a word in the buffer
*
* @param word_index Index to the word to notify to the tracker
* @param current_bits Current state of the word
* @param new_bits New state of the word
*
* @tparam add_to_tracker True when the tracker should start tracking the new pages
*/
template <bool add_to_tracker>
void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const {
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) {
tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE,
add_to_tracker ? 1 : -1);
});
}
PageManager* tracker;
VAddr cpu_addr = 0;
Words<stack_words> words;
};
} // namespace VideoCore
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <span>
#include <utility>
#include "common/div_ceil.h"
#include "common/types.h"
#include "video_core/page_manager.h"
namespace VideoCore {
constexpr u64 PAGES_PER_WORD = 64;
constexpr u64 BYTES_PER_PAGE = 4_KB;
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
enum class Type {
CPU,
GPU,
Untracked,
};
/// Vector tracking modified pages tightly packed with small vector optimization
template <size_t stack_words = 1>
struct WordsArray {
/// Returns the pointer to the words state
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
return is_short ? stack.data() : heap;
}
/// Returns the pointer to the words state
[[nodiscard]] u64* Pointer(bool is_short) noexcept {
return is_short ? stack.data() : heap;
}
std::array<u64, stack_words> stack{}; ///< Small buffers storage
u64* heap; ///< Not-small buffers pointer to the storage
};
template <size_t stack_words = 1>
struct Words {
explicit Words() = default;
explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
if (IsShort()) {
cpu.stack.fill(~u64{0});
gpu.stack.fill(0);
untracked.stack.fill(~u64{0});
} else {
// Share allocation between CPU and GPU pages and set their default values
u64* const alloc = new u64[num_words * 3];
cpu.heap = alloc;
gpu.heap = alloc + num_words;
untracked.heap = alloc + num_words * 2;
std::fill_n(cpu.heap, num_words, ~u64{0});
std::fill_n(gpu.heap, num_words, 0);
std::fill_n(untracked.heap, num_words, ~u64{0});
}
// Clean up tailing bits
const u64 last_word_size = size_bytes % BYTES_PER_WORD;
const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
const u64 last_word = (~u64{0} << shift) >> shift;
cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
}
~Words() {
Release();
}
Words& operator=(Words&& rhs) noexcept {
Release();
size_bytes = rhs.size_bytes;
num_words = rhs.num_words;
cpu = rhs.cpu;
gpu = rhs.gpu;
untracked = rhs.untracked;
rhs.cpu.heap = nullptr;
return *this;
}
Words(Words&& rhs) noexcept
: size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
untracked{rhs.untracked} {
rhs.cpu.heap = nullptr;
}
Words& operator=(const Words&) = delete;
Words(const Words&) = delete;
/// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept {
return num_words <= stack_words;
}
/// Returns the number of words of the buffer
[[nodiscard]] size_t NumWords() const noexcept {
return num_words;
}
/// Release buffer resources
void Release() {
if (!IsShort()) {
// CPU written words is the base for the heap allocation
delete[] cpu.heap;
}
}
template <Type type>
std::span<u64> Span() noexcept {
if constexpr (type == Type::CPU) {
return std::span<u64>(cpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::GPU) {
return std::span<u64>(gpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::Untracked) {
return std::span<u64>(untracked.Pointer(IsShort()), num_words);
}
}
template <Type type>
std::span<const u64> Span() const noexcept {
if constexpr (type == Type::CPU) {
return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::GPU) {
return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::Untracked) {
return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
}
}
u64 size_bytes = 0;
size_t num_words = 0;
WordsArray<stack_words> cpu;
WordsArray<stack_words> gpu;
WordsArray<stack_words> untracked;
};
template <size_t stack_words = 1>
class WordManager {
public:
explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes)
: tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {}
explicit WordManager() = default;
void SetCpuAddress(VAddr new_cpu_addr) {
cpu_addr = new_cpu_addr;
}
VAddr GetCpuAddr() const {
return cpu_addr;
}
static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
constexpr size_t number_bits = sizeof(u64) * 8;
const size_t limit_page_end = number_bits - std::min(page_end, number_bits);
u64 bits = (word >> page_start) << page_start;
bits = (bits << limit_page_end) >> limit_page_end;
return bits;
}
static std::pair<size_t, size_t> GetWordPage(VAddr address) {
const size_t converted_address = static_cast<size_t>(address);
const size_t word_number = converted_address / BYTES_PER_WORD;
const size_t amount_pages = converted_address % BYTES_PER_WORD;
return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE);
}
template <typename Func>
void IterateWords(size_t offset, size_t size, Func&& func) const {
using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
if (start >= SizeBytes() || end <= start) {
return;
}
auto [start_word, start_page] = GetWordPage(start);
auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
const size_t num_words = NumWords();
start_word = std::min(start_word, num_words);
end_word = std::min(end_word, num_words);
const size_t diff = end_word - start_word;
end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD;
end_word = std::min(end_word, num_words);
end_page += diff * PAGES_PER_WORD;
constexpr u64 base_mask{~0ULL};
for (size_t word_index = start_word; word_index < end_word; word_index++) {
const u64 mask = ExtractBits(base_mask, start_page, end_page);
start_page = 0;
end_page -= PAGES_PER_WORD;
if constexpr (BOOL_BREAK) {
if (func(word_index, mask)) {
return;
}
} else {
func(word_index, mask);
}
}
}
template <typename Func>
void IteratePages(u64 mask, Func&& func) const {
size_t offset = 0;
while (mask != 0) {
const size_t empty_bits = std::countr_zero(mask);
offset += empty_bits;
mask = mask >> empty_bits;
const size_t continuous_bits = std::countr_one(mask);
func(offset, continuous_bits);
mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0;
offset += continuous_bits;
}
}
/**
* Change the state of a range of pages
*
* @param dirty_addr Base address to mark or unmark as modified
* @param size Size in bytes to mark or unmark as modified
*/
template <Type type, bool enable>
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
std::span<u64> state_words = words.template Span<type>();
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::CPU) {
NotifyPageTracker<!enable>(index, untracked_words[index], mask);
}
if constexpr (enable) {
state_words[index] |= mask;
if constexpr (type == Type::CPU) {
untracked_words[index] |= mask;
}
} else {
state_words[index] &= ~mask;
if constexpr (type == Type::CPU) {
untracked_words[index] &= ~mask;
}
}
});
}
/**
* Loop over each page in the given range, turn off those bits and notify the tracker if
* needed. Call the given function on each turned off range.
*
* @param query_cpu_range Base CPU address to loop over
* @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region
*/
template <Type type, bool clear, typename Func>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
static_assert(type != Type::Untracked);
std::span<u64> state_words = words.template Span<type>();
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
const size_t offset = query_cpu_range - cpu_addr;
bool pending = false;
size_t pending_offset{};
size_t pending_pointer{};
const auto release = [&]() {
func(cpu_addr + pending_offset * BYTES_PER_PAGE,
(pending_pointer - pending_offset) * BYTES_PER_PAGE);
};
IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index];
}
const u64 word = state_words[index] & mask;
if constexpr (clear) {
if constexpr (type == Type::CPU) {
NotifyPageTracker<true>(index, untracked_words[index], mask);
}
state_words[index] &= ~mask;
if constexpr (type == Type::CPU) {
untracked_words[index] &= ~mask;
}
}
const size_t base_offset = index * PAGES_PER_WORD;
IteratePages(word, [&](size_t pages_offset, size_t pages_size) {
const auto reset = [&]() {
pending_offset = base_offset + pages_offset;
pending_pointer = base_offset + pages_offset + pages_size;
};
if (!pending) {
reset();
pending = true;
return;
}
if (pending_pointer == base_offset + pages_offset) {
pending_pointer += pages_size;
return;
}
release();
reset();
});
});
if (pending) {
release();
}
}
/**
* Returns true when a region has been modified
*
* @param offset Offset in bytes from the start of the buffer
* @param size Size in bytes of the region to query for modifications
*/
template <Type type>
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
const std::span<const u64> state_words = words.template Span<type>();
[[maybe_unused]] const std::span<const u64> untracked_words =
words.template Span<Type::Untracked>();
bool result = false;
IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index];
}
const u64 word = state_words[index] & mask;
if (word != 0) {
result = true;
return true;
}
return false;
});
return result;
}
/// Returns the number of words of the manager
[[nodiscard]] size_t NumWords() const noexcept {
return words.NumWords();
}
/// Returns the size in bytes of the manager
[[nodiscard]] u64 SizeBytes() const noexcept {
return words.size_bytes;
}
/// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept {
return words.IsShort();
}
private:
template <Type type>
u64* Array() noexcept {
if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort());
}
}
template <Type type>
const u64* Array() const noexcept {
if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort());
}
}
/**
* Notify tracker about changes in the CPU tracking state of a word in the buffer
*
* @param word_index Index to the word to notify to the tracker
* @param current_bits Current state of the word
* @param new_bits New state of the word
*
* @tparam add_to_tracker True when the tracker should start tracking the new pages
*/
template <bool add_to_tracker>
void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const {
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) {
tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE,
add_to_tracker ? 1 : -1);
});
}
PageManager* tracker;
VAddr cpu_addr = 0;
Words<stack_words> words;
};
} // namespace VideoCore