mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-22 14:31:39 +00:00
Merge pull request #80 from shadps4-emu/linux
common: Rework timekeeping with native RDTSC and port to linux
This commit is contained in:
commit
deb3551189
7
.gitmodules
vendored
7
.gitmodules
vendored
|
@ -33,11 +33,10 @@
|
|||
path = third-party/toml11
|
||||
url = https://github.com/ToruNiina/toml11
|
||||
branch = master
|
||||
[submodule "third-party/vulkan"]
|
||||
path = third-party/vulkan
|
||||
url = https://github.com/shadps4/vulkan.git
|
||||
branch = main
|
||||
[submodule "third-party/xxHash"]
|
||||
path = third-party/xxHash
|
||||
url = https://github.com/Cyan4973/xxHash.git
|
||||
branch = dev
|
||||
[submodule "third-party/vulkan"]
|
||||
path = third-party/vulkan
|
||||
url = https://github.com/GPUCode/vulkan
|
||||
|
|
|
@ -30,8 +30,8 @@ endfunction()
|
|||
add_subdirectory(third-party)
|
||||
include_directories(src)
|
||||
|
||||
set(LIBC_SOURCES src/core/hle/libraries/libc/Libc.cpp
|
||||
src/core/hle/libraries/libc/Libc.h
|
||||
set(LIBC_SOURCES src/core/hle/libraries/libc/libc.cpp
|
||||
src/core/hle/libraries/libc/libc.h
|
||||
src/core/hle/libraries/libc/printf.h
|
||||
src/core/hle/libraries/libc/va_ctx.h
|
||||
src/core/hle/libraries/libc/libc_cxa.cpp
|
||||
|
@ -77,12 +77,15 @@ add_executable(shadps4
|
|||
src/common/fs_file.h
|
||||
src/common/log.cpp
|
||||
src/common/log.h
|
||||
src/common/native_clock.cpp
|
||||
src/common/native_clock.h
|
||||
src/common/rdtsc.cpp
|
||||
src/common/rdtsc.h
|
||||
src/common/singleton.h
|
||||
src/common/string_util.cpp
|
||||
src/common/string_util.h
|
||||
src/common/timer.cpp
|
||||
src/common/timer.h
|
||||
src/common/types.h
|
||||
src/common/uint128.h
|
||||
src/common/version.h
|
||||
${LIBC_SOURCES}
|
||||
${USERSERVICE_SOURCES}
|
||||
|
@ -143,8 +146,6 @@ add_executable(shadps4
|
|||
src/core/PS4/HLE/Graphics/graphics_render.h
|
||||
src/core/PS4/GPU/tile_manager.cpp
|
||||
src/core/PS4/GPU/tile_manager.h
|
||||
src/emuTimer.cpp
|
||||
src/emuTimer.h
|
||||
src/core/hle/libraries/libkernel/time_management.cpp
|
||||
src/core/hle/libraries/libkernel/time_management.h
|
||||
"src/common/io_file.cpp" "src/common/io_file.h")
|
||||
|
@ -154,13 +155,15 @@ create_target_directory_groups(shadps4)
|
|||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt spdlog::spdlog toml11::toml11)
|
||||
target_link_libraries(shadps4 PRIVATE discord-rpc imgui SDL3-shared vulkan-1 xxhash Zydis)
|
||||
if (WIN32)
|
||||
target_link_libraries(shadps4 PRIVATE mincore winpthread)
|
||||
target_link_libraries(shadps4 PRIVATE mincore winpthread clang_rt.builtins-x86_64.lib)
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET shadps4 POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
$<TARGET_FILE:SDL3-shared>
|
||||
$<TARGET_FILE_DIR:shadps4>)
|
||||
add_custom_command(TARGET shadps4 POST_BUILD
|
||||
if (WIN32)
|
||||
add_custom_command(TARGET shadps4 POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
"${PROJECT_SOURCE_DIR}/third-party/winpthread/bin/libwinpthread-1.dll" $<TARGET_FILE_DIR:shadps4>)
|
||||
endif()
|
||||
|
|
|
@ -109,7 +109,11 @@ int Init(bool use_stdout) {
|
|||
if (use_stdout) {
|
||||
sinks.push_back(std::make_shared<spdlog::sinks::stdout_color_sink_mt>());
|
||||
}
|
||||
#ifdef _WIN64
|
||||
sinks.push_back(std::make_shared<spdlog::sinks::basic_file_sink_mt>(L"shadps4.txt", true));
|
||||
#else
|
||||
sinks.push_back(std::make_shared<spdlog::sinks::basic_file_sink_mt>("shadps4.txt", true));
|
||||
#endif
|
||||
spdlog::set_default_logger(std::make_shared<spdlog::logger>("shadps4 logger", begin(sinks), end(sinks)));
|
||||
auto f = std::make_unique<spdlog::pattern_formatter>("%^|%L|: %v%$", spdlog::pattern_time_type::local, std::string("")); // disable eol
|
||||
spdlog::set_formatter(std::move(f));
|
||||
|
|
43
src/common/native_clock.cpp
Normal file
43
src/common/native_clock.cpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/uint128.h"
|
||||
#include "common/native_clock.h"
|
||||
#include "common/rdtsc.h"
|
||||
#ifdef _WIN64
|
||||
#include <pthread_time.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
|
||||
NativeClock::NativeClock()
|
||||
: rdtsc_frequency{EstimateRDTSCFrequency()}, ns_rdtsc_factor{GetFixedPoint64Factor(std::nano::den,
|
||||
rdtsc_frequency)},
|
||||
us_rdtsc_factor{GetFixedPoint64Factor(std::micro::den, rdtsc_frequency)},
|
||||
ms_rdtsc_factor{GetFixedPoint64Factor(std::milli::den, rdtsc_frequency)} {}
|
||||
|
||||
u64 NativeClock::GetTimeNS() const {
|
||||
return MultiplyHigh(GetUptime(), ns_rdtsc_factor);
|
||||
}
|
||||
|
||||
u64 NativeClock::GetTimeUS() const {
|
||||
return MultiplyHigh(GetUptime(), us_rdtsc_factor);
|
||||
}
|
||||
|
||||
u64 NativeClock::GetTimeMS() const {
|
||||
return MultiplyHigh(GetUptime(), ms_rdtsc_factor);
|
||||
}
|
||||
|
||||
u64 NativeClock::GetUptime() const {
|
||||
return FencedRDTSC();
|
||||
}
|
||||
|
||||
u64 NativeClock::GetProcessTimeUS() const {
|
||||
timespec ret;
|
||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ret);
|
||||
return ret.tv_nsec / 1000 + ret.tv_sec * 1000000;
|
||||
}
|
||||
|
||||
} // namespace Common::X64
|
32
src/common/native_clock.h
Normal file
32
src/common/native_clock.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
class NativeClock final {
|
||||
public:
|
||||
explicit NativeClock();
|
||||
|
||||
u64 GetTscFrequency() const {
|
||||
return rdtsc_frequency;
|
||||
}
|
||||
|
||||
u64 GetTimeNS() const;
|
||||
u64 GetTimeUS() const;
|
||||
u64 GetTimeMS() const;
|
||||
u64 GetUptime() const;
|
||||
u64 GetProcessTimeUS() const;
|
||||
|
||||
private:
|
||||
u64 rdtsc_frequency;
|
||||
u64 ns_rdtsc_factor;
|
||||
u64 us_rdtsc_factor;
|
||||
u64 ms_rdtsc_factor;
|
||||
};
|
||||
|
||||
} // namespace Common
|
60
src/common/rdtsc.cpp
Normal file
60
src/common/rdtsc.cpp
Normal file
|
@ -0,0 +1,60 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <thread>
|
||||
#include "common/rdtsc.h"
|
||||
#include "common/uint128.h"
|
||||
|
||||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
|
||||
static constexpr size_t SecondToNanoseconds = 1000000000ULL;
|
||||
|
||||
template <u64 Nearest>
|
||||
static u64 RoundToNearest(u64 value) {
|
||||
const auto mod = value % Nearest;
|
||||
return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
|
||||
}
|
||||
|
||||
static u64 GetTimeNs() {
|
||||
#ifdef _WIN64
|
||||
// GetSystemTimePreciseAsFileTime returns the file time in 100ns units.
|
||||
static constexpr u64 Multiplier = 100;
|
||||
// Convert Windows epoch to Unix epoch.
|
||||
static constexpr u64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL;
|
||||
FILETIME filetime;
|
||||
GetSystemTimePreciseAsFileTime(&filetime);
|
||||
return Multiplier * ((static_cast<u64>(filetime.dwHighDateTime) << 32) +
|
||||
static_cast<u64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch);
|
||||
#elif defined(__APPLE__)
|
||||
return clock_gettime_nsec_np(CLOCK_REALTIME);
|
||||
#else
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
return ts.tv_sec * SecondToNanoseconds + ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
|
||||
u64 EstimateRDTSCFrequency() {
|
||||
// Discard the first result measuring the rdtsc.
|
||||
FencedRDTSC();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{1});
|
||||
FencedRDTSC();
|
||||
|
||||
// Get the current time.
|
||||
const auto start_time = GetTimeNs();
|
||||
const u64 tsc_start = FencedRDTSC();
|
||||
// Wait for 100 milliseconds.
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{100});
|
||||
const auto end_time = GetTimeNs();
|
||||
const u64 tsc_end = FencedRDTSC();
|
||||
// Calculate differences.
|
||||
const u64 tsc_diff = tsc_end - tsc_start;
|
||||
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, end_time - start_time);
|
||||
return RoundToNearest<100'000>(tsc_freq);
|
||||
}
|
||||
|
||||
} // namespace Common
|
37
src/common/rdtsc.h
Normal file
37
src/common/rdtsc.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__forceinline static u64 FencedRDTSC() {
|
||||
_mm_lfence();
|
||||
_ReadWriteBarrier();
|
||||
const u64 result = __rdtsc();
|
||||
_mm_lfence();
|
||||
_ReadWriteBarrier();
|
||||
return result;
|
||||
}
|
||||
#else
|
||||
static inline u64 FencedRDTSC() {
|
||||
u64 eax;
|
||||
u64 edx;
|
||||
asm volatile("lfence\n\t"
|
||||
"rdtsc\n\t"
|
||||
"lfence\n\t"
|
||||
: "=a"(eax), "=d"(edx));
|
||||
return (edx << 32) | eax;
|
||||
}
|
||||
#endif
|
||||
|
||||
u64 EstimateRDTSCFrequency();
|
||||
|
||||
} // namespace Common
|
|
@ -1,108 +0,0 @@
|
|||
#include "common/timer.h"
|
||||
|
||||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
|
||||
Timer::Timer() {
|
||||
#ifdef _WIN64
|
||||
LARGE_INTEGER f;
|
||||
QueryPerformanceFrequency(&f);
|
||||
m_Frequency = f.QuadPart;
|
||||
#else
|
||||
#error Unimplemented Timer constructor
|
||||
#endif
|
||||
}
|
||||
|
||||
void Timer::Start() {
|
||||
#ifdef _WIN64
|
||||
LARGE_INTEGER c;
|
||||
QueryPerformanceCounter(&c);
|
||||
m_StartTime = c.QuadPart;
|
||||
#else
|
||||
#error Unimplemented Timer::Start()
|
||||
#endif
|
||||
m_is_timer_paused = false;
|
||||
}
|
||||
|
||||
void Timer::Pause() {
|
||||
#ifdef _WIN64
|
||||
LARGE_INTEGER c;
|
||||
QueryPerformanceCounter(&c);
|
||||
m_PauseTime = c.QuadPart;
|
||||
#else
|
||||
#error Unimplemented Timer::Pause()
|
||||
#endif
|
||||
m_is_timer_paused = true;
|
||||
}
|
||||
|
||||
void Timer::Resume() {
|
||||
u64 current_time = 0;
|
||||
#ifdef _WIN64
|
||||
LARGE_INTEGER c;
|
||||
QueryPerformanceCounter(&c);
|
||||
current_time = c.QuadPart;
|
||||
#else
|
||||
#error Unimplemented Timer::Resume()
|
||||
#endif
|
||||
m_StartTime += current_time - m_PauseTime;
|
||||
m_is_timer_paused = false;
|
||||
}
|
||||
|
||||
double Timer::GetTimeMsec() const {
|
||||
if (m_is_timer_paused) {
|
||||
return 1000.0 * (static_cast<double>(m_PauseTime - m_StartTime)) / static_cast<double>(m_Frequency);
|
||||
}
|
||||
|
||||
u64 current_time = 0;
|
||||
#ifdef _WIN64
|
||||
LARGE_INTEGER c;
|
||||
QueryPerformanceCounter(&c);
|
||||
current_time = c.QuadPart;
|
||||
#else
|
||||
#error Unimplemented Timer::GetTimeMsec()
|
||||
#endif
|
||||
return 1000.0 * (static_cast<double>(current_time - m_StartTime)) / static_cast<double>(m_Frequency);
|
||||
}
|
||||
|
||||
double Timer::GetTimeSec() const {
|
||||
if (m_is_timer_paused) {
|
||||
return (static_cast<double>(m_PauseTime - m_StartTime)) / static_cast<double>(m_Frequency);
|
||||
}
|
||||
|
||||
u64 current_time = 0;
|
||||
#ifdef _WIN64
|
||||
LARGE_INTEGER c;
|
||||
QueryPerformanceCounter(&c);
|
||||
current_time = c.QuadPart;
|
||||
#else
|
||||
#error Unimplemented Timer::GetTimeSec()
|
||||
#endif
|
||||
return (static_cast<double>(current_time - m_StartTime)) / static_cast<double>(m_Frequency);
|
||||
}
|
||||
|
||||
u64 Timer::GetTicks() const {
|
||||
if (m_is_timer_paused) {
|
||||
return (m_PauseTime - m_StartTime);
|
||||
}
|
||||
|
||||
u64 current_time = 0;
|
||||
#ifdef _WIN64
|
||||
LARGE_INTEGER c;
|
||||
QueryPerformanceCounter(&c);
|
||||
current_time = c.QuadPart;
|
||||
#else
|
||||
#error Unimplemented Timer::GetTicks()
|
||||
#endif
|
||||
return (current_time - m_StartTime);
|
||||
}
|
||||
|
||||
u64 Timer::getQueryPerformanceCounter() {
|
||||
LARGE_INTEGER c;
|
||||
QueryPerformanceCounter(&c);
|
||||
return c.QuadPart;
|
||||
}
|
||||
|
||||
} // namespace Common
|
|
@ -1,43 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
class Timer final {
|
||||
public:
|
||||
Timer();
|
||||
~Timer() = default;
|
||||
|
||||
void Start();
|
||||
void Pause();
|
||||
void Resume();
|
||||
|
||||
bool IsPaused() const {
|
||||
return m_is_timer_paused;
|
||||
}
|
||||
|
||||
u64 GetFrequency() const {
|
||||
return m_Frequency;
|
||||
}
|
||||
|
||||
double GetTimeMsec() const;
|
||||
double GetTimeSec() const;
|
||||
u64 GetTicks() const;
|
||||
|
||||
[[nodiscard]] static u64 getQueryPerformanceCounter();
|
||||
|
||||
public:
|
||||
Timer(const Timer&) = delete;
|
||||
Timer& operator=(const Timer&) = delete;
|
||||
Timer(Timer&&) = delete;
|
||||
Timer& operator=(Timer&&) = delete;
|
||||
|
||||
private:
|
||||
bool m_is_timer_paused = true;
|
||||
u64 m_Frequency{};
|
||||
u64 m_StartTime{};
|
||||
u64 m_PauseTime{};
|
||||
};
|
||||
|
||||
} // namespace Common
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
|
||||
using s08 = std::int8_t;
|
||||
|
@ -15,9 +16,12 @@ using u64 = std::uint64_t;
|
|||
using f32 = float;
|
||||
using f64 = double;
|
||||
|
||||
using u128 = std::array<std::uint64_t, 2>;
|
||||
static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
|
||||
|
||||
#define PS4_SYSV_ABI __attribute__((sysv_abi))
|
||||
|
||||
// UDLs for memory size values
|
||||
constexpr u64 operator""_KB(u64 x) { return 1024ULL * x; }
|
||||
constexpr u64 operator""_MB(u64 x) { return 1024_KB * x; }
|
||||
constexpr u64 operator""_GB(u64 x) { return 1024_MB * x; }
|
||||
constexpr unsigned long long operator""_KB(unsigned long long x) { return 1024ULL * x; }
|
||||
constexpr unsigned long long operator""_MB(unsigned long long x) { return 1024_KB * x; }
|
||||
constexpr unsigned long long operator""_GB(unsigned long long x) { return 1024_MB * x; }
|
||||
|
|
115
src/common/uint128.h
Normal file
115
src/common/uint128.h
Normal file
|
@ -0,0 +1,115 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifndef __clang__
|
||||
#define HAS_INTRINSICS
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(__umulh)
|
||||
#pragma intrinsic(_umul128)
|
||||
#pragma intrinsic(_udiv128)
|
||||
#else
|
||||
#endif
|
||||
#else
|
||||
#include <cstring>
|
||||
#endif
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
// This function multiplies 2 u64 values and divides it by a u64 value.
|
||||
[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
|
||||
#ifdef HAS_INTRINSICS
|
||||
u128 r{};
|
||||
r[0] = _umul128(a, b, &r[1]);
|
||||
u64 remainder;
|
||||
return _udiv128(r[1], r[0], d, &remainder);
|
||||
#else
|
||||
const u64 diva = a / d;
|
||||
const u64 moda = a % d;
|
||||
const u64 divb = b / d;
|
||||
const u64 modb = b % d;
|
||||
return diva * b + moda * divb + moda * modb / d;
|
||||
#endif
|
||||
}
|
||||
|
||||
// This function multiplies 2 u64 values and produces a u128 value;
|
||||
[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) {
|
||||
u128 result;
|
||||
#ifdef HAS_INTRINSICS
|
||||
result[0] = _umul128(a, b, &result[1]);
|
||||
#else
|
||||
unsigned __int128 tmp = a;
|
||||
tmp *= b;
|
||||
std::memcpy(&result, &tmp, sizeof(u128));
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
|
||||
#ifdef __SIZEOF_INT128__
|
||||
const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
|
||||
return static_cast<u64>(base / divisor);
|
||||
#elif defined(_M_X64) || defined(_M_ARM64)
|
||||
std::array<u64, 2> r = {0, numerator};
|
||||
u64 remainder;
|
||||
return _udiv128(r[1], r[0], divisor, &remainder);
|
||||
#else
|
||||
// This one is bit more inaccurate.
|
||||
return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
|
||||
#endif
|
||||
}
|
||||
|
||||
[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) {
|
||||
#ifdef __SIZEOF_INT128__
|
||||
return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
|
||||
#elif defined(_M_X64) || defined(_M_ARM64)
|
||||
return __umulh(a, b); // MSVC
|
||||
#else
|
||||
// Generic fallback
|
||||
const u64 a_lo = u32(a);
|
||||
const u64 a_hi = a >> 32;
|
||||
const u64 b_lo = u32(b);
|
||||
const u64 b_hi = b >> 32;
|
||||
|
||||
const u64 a_x_b_hi = a_hi * b_hi;
|
||||
const u64 a_x_b_mid = a_hi * b_lo;
|
||||
const u64 b_x_a_mid = b_hi * a_lo;
|
||||
const u64 a_x_b_lo = a_lo * b_lo;
|
||||
|
||||
const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
|
||||
static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
|
||||
32;
|
||||
|
||||
const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
|
||||
|
||||
return multhi;
|
||||
#endif
|
||||
}
|
||||
|
||||
// This function divides a u128 by a u32 value and produces two u64 values:
|
||||
// the result of division and the remainder
|
||||
[[nodiscard]] static inline std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
|
||||
u64 remainder = dividend[0] % divisor;
|
||||
u64 accum = dividend[0] / divisor;
|
||||
if (dividend[1] == 0)
|
||||
return {accum, remainder};
|
||||
// We ignore dividend[1] / divisor as that overflows
|
||||
const u64 first_segment = (dividend[1] % divisor) << 32;
|
||||
accum += (first_segment / divisor) << 32;
|
||||
const u64 second_segment = (first_segment % divisor) << 32;
|
||||
accum += (second_segment / divisor);
|
||||
remainder += second_segment % divisor;
|
||||
if (remainder >= divisor) {
|
||||
accum++;
|
||||
remainder -= divisor;
|
||||
}
|
||||
return {accum, remainder};
|
||||
}
|
||||
|
||||
} // namespace Common
|
|
@ -1,5 +1,5 @@
|
|||
#include "gpu_memory.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <xxh3.h>
|
||||
|
||||
#include "common/singleton.h"
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
|
||||
#include "common/types.h"
|
||||
#include <vector>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
|
||||
namespace HLE::Libs::Graphics {
|
||||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -52,4 +54,4 @@ class HandleTable {
|
|||
std::mutex m_mutex;
|
||||
};
|
||||
|
||||
} // namespace Core::FileSys
|
||||
} // namespace Core::FileSys
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#include "common/debug.h"
|
||||
#include "common/timer.h"
|
||||
#include "core/hle/kernel/objects/event_queue.h"
|
||||
#include "core/hle/kernel/Objects/event_queue.h"
|
||||
|
||||
namespace Core::Kernel {
|
||||
|
||||
|
@ -24,28 +23,19 @@ int EqueueInternal::addEvent(const EqueueEvent& event) {
|
|||
|
||||
int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) {
|
||||
std::unique_lock lock{m_mutex};
|
||||
int ret = 0;
|
||||
|
||||
u32 timeElapsed = 0;
|
||||
Common::Timer t;
|
||||
t.Start();
|
||||
const auto predicate = [&] {
|
||||
ret = getTriggeredEvents(ev, num);
|
||||
return ret > 0;
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
int ret = getTriggeredEvents(ev, num);
|
||||
|
||||
if (ret > 0 || (timeElapsed >= micros && micros != 0)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (micros == 0) {
|
||||
m_cond.wait(lock);
|
||||
} else {
|
||||
m_cond.wait_for(lock, std::chrono::microseconds(micros - timeElapsed));
|
||||
}
|
||||
|
||||
timeElapsed = static_cast<uint32_t>(t.GetTimeSec() * 1000000.0);
|
||||
if (micros == 0) {
|
||||
m_cond.wait(lock, predicate);
|
||||
} else {
|
||||
m_cond.wait_for(lock, std::chrono::microseconds(micros), predicate);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool EqueueInternal::triggerEvent(u64 ident, s16 filter, void* trigger_data) {
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <condition_variable>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Core::Kernel {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include "core/hle/kernel/objects/physical_memory.h"
|
||||
#include "core/hle/kernel/Objects/physical_memory.h"
|
||||
|
||||
namespace Core::Kernel {
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/hle/kernel/objects/event_queue.h"
|
||||
#include "core/hle/kernel/Objects/event_queue.h"
|
||||
|
||||
namespace Core::Kernel {
|
||||
|
||||
|
|
|
@ -4,35 +4,35 @@
|
|||
namespace Core::Libraries::LibC {
|
||||
|
||||
float PS4_SYSV_ABI ps4_atan2f(float y, float x) {
|
||||
return std::atan2f(y, x);
|
||||
return atan2f(y, x);
|
||||
}
|
||||
|
||||
float PS4_SYSV_ABI ps4_acosf(float num) {
|
||||
return std::acosf(num);
|
||||
return acosf(num);
|
||||
}
|
||||
|
||||
float PS4_SYSV_ABI ps4_tanf(float num) {
|
||||
return std::tanf(num);
|
||||
return tanf(num);
|
||||
}
|
||||
|
||||
float PS4_SYSV_ABI ps4_asinf(float num) {
|
||||
return std::asinf(num);
|
||||
return asinf(num);
|
||||
}
|
||||
|
||||
double PS4_SYSV_ABI ps4_pow(double base, double exponent) {
|
||||
return std::pow(base, exponent);
|
||||
return pow(base, exponent);
|
||||
}
|
||||
|
||||
double PS4_SYSV_ABI ps4__Sin(double x) {
|
||||
return std::sin(x);
|
||||
return sin(x);
|
||||
}
|
||||
|
||||
float PS4_SYSV_ABI ps4__Fsin(float arg) {
|
||||
return std::sinf(arg);
|
||||
return sinf(arg);
|
||||
}
|
||||
|
||||
double PS4_SYSV_ABI ps4_exp2(double arg) {
|
||||
return std::exp2(arg);
|
||||
return exp2(arg);
|
||||
}
|
||||
|
||||
} // namespace Core::Libraries::LibC
|
||||
|
|
|
@ -12,7 +12,7 @@ int PS4_SYSV_ABI ps4_printf(VA_ARGS) {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI ps4_fprintf(FILE* file, VA_ARGS) {
|
||||
int fd = _fileno(file);
|
||||
int fd = fileno(file);
|
||||
if (fd == 1 || fd == 2) { // output stdout and stderr to console
|
||||
VA_CTX(ctx);
|
||||
return printf_ctx(&ctx);
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
#include "thread_management.h"
|
||||
|
||||
|
@ -56,6 +58,7 @@ int* PS4_SYSV_ABI __Error() { return &libc_error; }
|
|||
#define PROT_WRITE 0x2
|
||||
|
||||
int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, off_t offset, void** res) {
|
||||
#ifdef _WIN64
|
||||
PRINT_FUNCTION_NAME();
|
||||
if (prot > 3) // READ,WRITE or bitwise READ | WRITE supported
|
||||
{
|
||||
|
@ -86,6 +89,14 @@ int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd,
|
|||
}
|
||||
*res = ret;
|
||||
return 0;
|
||||
#else
|
||||
void* result = mmap(addr, len, prot, flags, fd, offset);
|
||||
if (result != MAP_FAILED) {
|
||||
*res = result;
|
||||
return 0;
|
||||
}
|
||||
std::abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
PS4_SYSV_ABI void* posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) {
|
||||
|
|
|
@ -1,27 +1,31 @@
|
|||
#include "common/timer.h"
|
||||
#include "common/native_clock.h"
|
||||
#include "core/hle/libraries/libkernel/time_management.h"
|
||||
#include "core/hle/libraries/libs.h"
|
||||
#include "emuTimer.h"
|
||||
|
||||
namespace Core::Libraries::LibKernel {
|
||||
|
||||
static u64 initial_ptc;
|
||||
static std::unique_ptr<Common::NativeClock> clock;
|
||||
|
||||
u64 PS4_SYSV_ABI sceKernelGetProcessTime() {
|
||||
return static_cast<u64>(Emulator::emuTimer::getTimeMsec() * 1000.0); // return time in microseconds
|
||||
return clock->GetProcessTimeUS();
|
||||
}
|
||||
|
||||
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter() {
|
||||
return Emulator::emuTimer::getTimeCounter();
|
||||
return clock->GetUptime() - initial_ptc;
|
||||
}
|
||||
|
||||
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency() {
|
||||
return Emulator::emuTimer::getTimeFrequency();
|
||||
return clock->GetTscFrequency();
|
||||
}
|
||||
|
||||
u64 PS4_SYSV_ABI sceKernelReadTsc() {
|
||||
return Common::Timer::getQueryPerformanceCounter();
|
||||
return clock->GetUptime();
|
||||
}
|
||||
|
||||
void timeSymbolsRegister(Loader::SymbolsResolver* sym) {
|
||||
clock = std::make_unique<Common::NativeClock>();
|
||||
initial_ptc = clock->GetUptime();
|
||||
LIB_FUNCTION("4J2sUJmuHZQ", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTime);
|
||||
LIB_FUNCTION("fgxnMeTNUtY", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounter);
|
||||
LIB_FUNCTION("BNowx2l588E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounterFrequency);
|
||||
|
|
|
@ -658,12 +658,12 @@ void Linker::Resolve(const std::string& name, int Symtype, Module* m, Loader::Sy
|
|||
}
|
||||
else
|
||||
{
|
||||
__debugbreak();//den tha prepei na ftasoume edo
|
||||
//__debugbreak();//den tha prepei na ftasoume edo
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__debugbreak();//oute edo mallon
|
||||
//__debugbreak();//oute edo mallon
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
namespace Core::Loader {
|
||||
|
||||
constexpr bool log_file_loader = true; // disable it to disable logging
|
||||
constexpr bool log_file_loader = false; // disable it to disable logging
|
||||
|
||||
static std::string_view getProgramTypeName(program_type_es type) {
|
||||
switch (type) {
|
||||
|
|
|
@ -79,7 +79,8 @@ bool memory_protect(u64 address, u64 size, MemoryMode mode, MemoryMode* old_mode
|
|||
}
|
||||
return true;
|
||||
#else
|
||||
#error Unimplement memory_protect function
|
||||
int ret = mprotect(reinterpret_cast<void*>(address), size, convertMemoryMode(mode));
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -117,6 +118,7 @@ bool memory_patch(u64 vaddr, u64 value) {
|
|||
static u64 AlignUp(u64 pos, u64 align) { return (align != 0 ? (pos + (align - 1)) & ~(align - 1) : pos); }
|
||||
|
||||
u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment) {
|
||||
#ifdef _WIN64
|
||||
// try allocate aligned address inside user area
|
||||
MEM_ADDRESS_REQUIREMENTS req{};
|
||||
MEM_EXTENDED_PARAMETER param{};
|
||||
|
@ -134,5 +136,13 @@ u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment)
|
|||
LOG_ERROR_IF(true, "VirtualAlloc2() failed: 0x{:X}\n", err);
|
||||
}
|
||||
return ptr;
|
||||
#else
|
||||
void* hint_address = reinterpret_cast<void*>(AlignUp(address, alignment));
|
||||
void* ptr = mmap(hint_address, size, convertMemoryMode(mode), MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
if (ptr == MAP_FAILED) {
|
||||
std::abort();
|
||||
}
|
||||
return reinterpret_cast<u64>(ptr);
|
||||
#endif
|
||||
}
|
||||
} // namespace VirtualMemory
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
#include "common/timer.h"
|
||||
|
||||
namespace Emulator::emuTimer {
|
||||
|
||||
static Common::Timer timer;
|
||||
|
||||
void start() {
|
||||
timer.Start();
|
||||
}
|
||||
|
||||
double getTimeMsec() {
|
||||
return timer.GetTimeMsec();
|
||||
}
|
||||
|
||||
u64 getTimeCounter() {
|
||||
return timer.GetTicks();
|
||||
}
|
||||
|
||||
u64 getTimeFrequency() {
|
||||
return timer.GetFrequency();
|
||||
}
|
||||
|
||||
} // namespace Emulator::emuTimer
|
|
@ -1,10 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Emulator::emuTimer {
|
||||
void start();
|
||||
double getTimeMsec();
|
||||
u64 getTimeCounter();
|
||||
u64 getTimeFrequency();
|
||||
} // namespace Emulator::emuTimer
|
|
@ -1,6 +1,5 @@
|
|||
#include <fmt/core.h>
|
||||
#include <vulkan_util.h>
|
||||
#include "common/timer.h"
|
||||
#include "common/singleton.h"
|
||||
#include "common/version.h"
|
||||
#include "emulator.h"
|
||||
|
@ -90,8 +89,6 @@ static void calculateFps(double game_time_s) {
|
|||
}
|
||||
}
|
||||
void emuRun() {
|
||||
Common::Timer timer;
|
||||
timer.Start();
|
||||
auto window_ctx = Common::Singleton<Emu::WindowCtx>::Instance();
|
||||
{
|
||||
// init window and wait until init finishes
|
||||
|
@ -100,7 +97,7 @@ void emuRun() {
|
|||
Graphics::Vulkan::vulkanCreate(window_ctx);
|
||||
window_ctx->m_is_graphic_initialized = true;
|
||||
window_ctx->m_graphic_initialized_cond.notify_one();
|
||||
calculateFps(timer.GetTimeSec());
|
||||
calculateFps(0); // TODO: Proper fps
|
||||
}
|
||||
|
||||
bool exit_loop = false;
|
||||
|
@ -138,10 +135,6 @@ void emuRun() {
|
|||
continue;
|
||||
}
|
||||
if (m_game_is_paused) {
|
||||
if (!timer.IsPaused()) {
|
||||
timer.Pause();
|
||||
}
|
||||
|
||||
SDL_WaitEvent(&event);
|
||||
|
||||
switch (event.type) {
|
||||
|
@ -171,21 +164,13 @@ void emuRun() {
|
|||
continue;
|
||||
}
|
||||
exit_loop = m_emu_needs_exit;
|
||||
if (m_game_is_paused) {
|
||||
if (!timer.IsPaused()) {
|
||||
timer.Pause();
|
||||
}
|
||||
} else {
|
||||
if (timer.IsPaused()) {
|
||||
timer.Resume();
|
||||
}
|
||||
|
||||
if (!m_game_is_paused) {
|
||||
if (!exit_loop) {
|
||||
update();
|
||||
}
|
||||
if (!exit_loop) {
|
||||
if (HLE::Libs::Graphics::VideoOut::videoOutFlip(100000)) { // flip every 0.1 sec
|
||||
calculateFps(timer.GetTimeSec());
|
||||
calculateFps(0); // TODO: Proper fps
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -215,7 +200,7 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
|
|||
window_ctx->swapchain.current_index = static_cast<u32>(-1);
|
||||
|
||||
auto result = vkAcquireNextImageKHR(window_ctx->m_graphic_ctx.m_device, window_ctx->swapchain.swapchain, UINT64_MAX, nullptr,
|
||||
window_ctx->swapchain.present_complete_fence, &window_ctx->swapchain.current_index);
|
||||
VK_NULL_HANDLE, &window_ctx->swapchain.current_index);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
fmt::print("Can't aquireNextImage\n");
|
||||
|
@ -226,16 +211,6 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
|
|||
std::exit(0);
|
||||
}
|
||||
|
||||
do {
|
||||
result = vkWaitForFences(window_ctx->m_graphic_ctx.m_device, 1, &window_ctx->swapchain.present_complete_fence, VK_TRUE, 100000000);
|
||||
} while (result == VK_TIMEOUT);
|
||||
if (result != VK_SUCCESS) {
|
||||
fmt::print("vkWaitForFences is not success\n");
|
||||
std::exit(0);
|
||||
}
|
||||
|
||||
vkResetFences(window_ctx->m_graphic_ctx.m_device, 1, &window_ctx->swapchain.present_complete_fence);
|
||||
|
||||
auto blt_src_image = image;
|
||||
auto blt_dst_image = window_ctx->swapchain;
|
||||
|
||||
|
@ -272,6 +247,7 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
|
|||
|
||||
buffer.end();
|
||||
buffer.executeWithSemaphore();
|
||||
buffer.waitForFence(); // HACK: The whole vulkan backend needs a rewrite
|
||||
|
||||
VkPresentInfoKHR present{};
|
||||
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
#include "core/PS4/HLE/Graphics/video_out.h"
|
||||
#include "core/hle/libraries/libs.h"
|
||||
#include "core/linker.h"
|
||||
#include "emuTimer.h"
|
||||
#include "emulator.h"
|
||||
#include <core/hle/libraries/libkernel/thread_management.h>
|
||||
#include "core/file_sys/fs.h"
|
||||
|
@ -31,7 +30,6 @@ int main(int argc, char* argv[]) {
|
|||
auto height = Config::getScreenHeight();
|
||||
Emu::emuInit(width, height);
|
||||
HLE::Libs::Graphics::VideoOut::videoOutInit(width, height);
|
||||
Emulator::emuTimer::start();
|
||||
|
||||
// Argument 1 is the path of self file to boot
|
||||
const char* const path = argv[1];
|
||||
|
|
188
src/video_core/gpu_memory.cpp
Normal file
188
src/video_core/gpu_memory.cpp
Normal file
|
@ -0,0 +1,188 @@
|
|||
#include "gpu_memory.h"
|
||||
#include <atomic>
|
||||
#include <xxh3.h>
|
||||
|
||||
#include "common/singleton.h"
|
||||
|
||||
void* GPU::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo /*CommandBuffer?*/, u64 virtual_addr, u64 size,
|
||||
const GPUObject& info) {
|
||||
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||
|
||||
return gpumemory->memoryCreateObj(submit_id, ctx, nullptr, &virtual_addr, &size, 1, info);
|
||||
}
|
||||
|
||||
void GPU::memorySetAllocArea(u64 virtual_addr, u64 size) {
|
||||
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||
|
||||
std::scoped_lock lock{gpumemory->m_mutex};
|
||||
|
||||
MemoryHeap h;
|
||||
h.allocated_virtual_addr = virtual_addr;
|
||||
h.allocated_size = size;
|
||||
|
||||
gpumemory->m_heaps.push_back(h);
|
||||
}
|
||||
|
||||
u64 GPU::calculate_hash(const u08* buf, u64 size) { return (size > 0 && buf != nullptr ? XXH3_64bits(buf, size) : 0); }
|
||||
|
||||
bool GPU::vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem) {
|
||||
static std::atomic_uint64_t unique_id = 0;
|
||||
|
||||
VkPhysicalDeviceMemoryProperties memory_properties{};
|
||||
vkGetPhysicalDeviceMemoryProperties(ctx->m_physical_device, &memory_properties);
|
||||
|
||||
u32 index = 0;
|
||||
for (; index < memory_properties.memoryTypeCount; index++) {
|
||||
if ((mem->requirements.memoryTypeBits & (static_cast<uint32_t>(1) << index)) != 0 &&
|
||||
(memory_properties.memoryTypes[index].propertyFlags & mem->property) == mem->property) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mem->type = index;
|
||||
mem->offset = 0;
|
||||
|
||||
VkMemoryAllocateInfo alloc_info{};
|
||||
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
alloc_info.pNext = nullptr;
|
||||
alloc_info.allocationSize = mem->requirements.size;
|
||||
alloc_info.memoryTypeIndex = index;
|
||||
|
||||
mem->unique_id = ++unique_id;
|
||||
|
||||
auto result = vkAllocateMemory(ctx->m_device, &alloc_info, nullptr, &mem->memory);
|
||||
|
||||
if (result == VK_SUCCESS) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void GPU::flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx) {
|
||||
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||
gpumemory->flushAllHeaps(ctx);
|
||||
}
|
||||
|
||||
int GPU::GPUMemory::getHeapId(u64 virtual_addr, u64 size) {
|
||||
int index = 0;
|
||||
for (const auto& heap : m_heaps) {
|
||||
if ((virtual_addr >= heap.allocated_virtual_addr && virtual_addr < heap.allocated_virtual_addr + heap.allocated_size) ||
|
||||
((virtual_addr + size - 1) >= heap.allocated_virtual_addr &&
|
||||
(virtual_addr + size - 1) < heap.allocated_virtual_addr + heap.allocated_size)) {
|
||||
return index;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void* GPU::GPUMemory::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo, const u64* virtual_addr, const u64* size,
|
||||
int virtual_addr_num, const GPUObject& info) {
|
||||
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||
|
||||
std::scoped_lock lock{gpumemory->m_mutex};
|
||||
|
||||
int heap_id = gpumemory->getHeapId(virtual_addr[0], size[0]);
|
||||
|
||||
if (heap_id < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& heap = m_heaps[heap_id];
|
||||
|
||||
ObjInfo objInfo = {};
|
||||
|
||||
// Copy parameters from info to obj
|
||||
for (int i = 0; i < 8; i++) {
|
||||
objInfo.obj_params[i] = info.obj_params[i];
|
||||
}
|
||||
|
||||
objInfo.gpu_object.objectType = info.objectType;
|
||||
objInfo.gpu_object.obj = nullptr;
|
||||
|
||||
for (int h = 0; h < virtual_addr_num; h++) {
|
||||
if (info.check_hash) {
|
||||
objInfo.hash[h] = GPU::calculate_hash(reinterpret_cast<const u08*>(virtual_addr[h]), size[h]);
|
||||
} else {
|
||||
objInfo.hash[h] = 0;
|
||||
}
|
||||
}
|
||||
objInfo.submit_id = submit_id;
|
||||
objInfo.check_hash = info.check_hash;
|
||||
|
||||
objInfo.gpu_object.obj = info.getCreateFunc()(ctx, objInfo.obj_params, virtual_addr, size, virtual_addr_num, &objInfo.mem);
|
||||
|
||||
objInfo.update_func = info.getUpdateFunc();
|
||||
int index = static_cast<int>(heap.objects.size());
|
||||
|
||||
HeapObject hobj{};
|
||||
hobj.block = createHeapBlock(virtual_addr, size, virtual_addr_num, heap_id, index);
|
||||
hobj.info = objInfo;
|
||||
hobj.free = false;
|
||||
heap.objects.push_back(hobj);
|
||||
|
||||
return objInfo.gpu_object.obj;
|
||||
}
|
||||
|
||||
GPU::HeapBlock GPU::GPUMemory::createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id) {
|
||||
auto& heap = m_heaps[heap_id];
|
||||
|
||||
GPU::HeapBlock heapBlock{};
|
||||
heapBlock.virtual_addr_num = virtual_addr_num;
|
||||
for (int vi = 0; vi < virtual_addr_num; vi++) {
|
||||
heapBlock.virtual_addr[vi] = virtual_addr[vi];
|
||||
heapBlock.size[vi] = size[vi];
|
||||
}
|
||||
return heapBlock;
|
||||
}
|
||||
|
||||
void GPU::GPUMemory::update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id) {
|
||||
auto& heap = m_heaps[heap_id];
|
||||
|
||||
auto& heapObj = heap.objects[obj_id];
|
||||
auto& objInfo = heapObj.info;
|
||||
bool need_update = false;
|
||||
|
||||
if (submit_id > objInfo.submit_id) {
|
||||
uint64_t hash[3] = {};
|
||||
|
||||
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
|
||||
if (objInfo.check_hash) {
|
||||
hash[i] = GPU::calculate_hash(reinterpret_cast<const uint8_t*>(heapObj.block.virtual_addr[i]), heapObj.block.size[i]);
|
||||
} else {
|
||||
hash[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
|
||||
if (objInfo.hash[i] != hash[i]) {
|
||||
need_update = true;
|
||||
objInfo.hash[i] = hash[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (submit_id != UINT64_MAX) {
|
||||
objInfo.submit_id = submit_id;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_update) {
|
||||
objInfo.update_func(ctx, objInfo.obj_params, objInfo.gpu_object.obj, heapObj.block.virtual_addr, heapObj.block.size,
|
||||
heapObj.block.virtual_addr_num);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::GPUMemory::flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
|
||||
int heap_id = 0;
|
||||
for (auto& heap : m_heaps) {
|
||||
int index = 0;
|
||||
for (auto& heapObj : heap.objects) {
|
||||
if (!heapObj.free) {
|
||||
update(UINT64_MAX, ctx, heap_id, index);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
heap_id++;
|
||||
}
|
||||
}
|
86
src/video_core/gpu_memory.h
Normal file
86
src/video_core/gpu_memory.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class GPUObject;
|
||||
|
||||
enum class MemoryMode : u32 {
|
||||
NoAccess = 0,
|
||||
Read = 1,
|
||||
Write = 2,
|
||||
ReadWrite = 3,
|
||||
};
|
||||
|
||||
enum class MemoryObjectType : u64 {
|
||||
Invalid,
|
||||
VideoOutBuffer,
|
||||
};
|
||||
|
||||
struct GpuMemoryObject {
|
||||
MemoryObjectType object_type = MemoryObjectType::Invalid;
|
||||
void* obj = nullptr;
|
||||
};
|
||||
|
||||
struct HeapBlock {
|
||||
std::array<u64, 3> virtual_address{};
|
||||
std::array<u64, 3> size{};
|
||||
u32 virtual_addr_num = 0;
|
||||
};
|
||||
|
||||
class GPUObject {
|
||||
public:
|
||||
GPUObject() = default;
|
||||
virtual ~GPUObject() = default;
|
||||
u64 obj_params[8] = {};
|
||||
bool check_hash = false;
|
||||
bool isReadOnly = false;
|
||||
MemoryObjectType objectType = MemoryObjectType::Invalid;
|
||||
};
|
||||
|
||||
struct ObjInfo {
|
||||
std::array<u64, 8> obj_params{};
|
||||
GpuMemoryObject gpu_object;
|
||||
std::array<u64, 3> hash{};
|
||||
u64 submit_id = 0;
|
||||
bool check_hash = false;
|
||||
};
|
||||
|
||||
struct HeapObject {
|
||||
HeapBlock block;
|
||||
ObjInfo info;
|
||||
bool free = true;
|
||||
};
|
||||
struct MemoryHeap {
|
||||
u64 allocated_virtual_addr = 0;
|
||||
u64 allocated_size = 0;
|
||||
std::vector<HeapObject> objects;
|
||||
};
|
||||
|
||||
class GPUMemory {
|
||||
public:
|
||||
GPUMemory() {}
|
||||
virtual ~GPUMemory() {}
|
||||
int getHeapId(u64 vaddr, u64 size);
|
||||
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, const u64* virtual_addr,
|
||||
const u64* size, int virtual_addr_num, const GPUObject& info);
|
||||
HeapBlock createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id);
|
||||
void update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id);
|
||||
void flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx);
|
||||
|
||||
private:
|
||||
std::mutex m_mutex;
|
||||
std::vector<MemoryHeap> m_heaps;
|
||||
};
|
||||
|
||||
void memorySetAllocArea(u64 virtual_addr, u64 size);
|
||||
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, u64 virtual_addr, u64 size,
|
||||
const GPUObject& info);
|
||||
u64 calculate_hash(const u08* buf, u64 size);
|
||||
bool vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem);
|
||||
void flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx);
|
||||
|
||||
} // namespace VideoCore
|
151
src/video_core/tile_manager.cpp
Normal file
151
src/video_core/tile_manager.cpp
Normal file
|
@ -0,0 +1,151 @@
|
|||
#include <bit>
|
||||
#include <cstring>
|
||||
#include "video_core/tile_manager.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class TileManager32 {
|
||||
public:
|
||||
u32 m_macro_tile_height = 0;
|
||||
u32 m_bank_height = 0;
|
||||
u32 m_num_banks = 0;
|
||||
u32 m_num_pipes = 0;
|
||||
u32 m_padded_width = 0;
|
||||
u32 m_padded_height = 0;
|
||||
u32 m_pipe_bits = 0;
|
||||
u32 m_bank_bits = 0;
|
||||
|
||||
TileManager32(u32 width, u32 height, bool is_neo) {
|
||||
m_macro_tile_height = (is_neo ? 128 : 64);
|
||||
m_bank_height = is_neo ? 2 : 1;
|
||||
m_num_banks = is_neo ? 8 : 16;
|
||||
m_num_pipes = is_neo ? 16 : 8;
|
||||
m_padded_width = width;
|
||||
if (height == 1080) {
|
||||
m_padded_height = is_neo ? 1152 : 1088;
|
||||
}
|
||||
if (height == 720) {
|
||||
m_padded_height = 768;
|
||||
}
|
||||
m_pipe_bits = is_neo ? 4 : 3;
|
||||
m_bank_bits = is_neo ? 3 : 4;
|
||||
}
|
||||
|
||||
static u32 GetElementIndex(u32 x, u32 y) {
|
||||
u32 elem = 0;
|
||||
elem |= ((x >> 0u) & 0x1u) << 0u;
|
||||
elem |= ((x >> 1u) & 0x1u) << 1u;
|
||||
elem |= ((y >> 0u) & 0x1u) << 2u;
|
||||
elem |= ((x >> 2u) & 0x1u) << 3u;
|
||||
elem |= ((y >> 1u) & 0x1u) << 4u;
|
||||
elem |= ((y >> 2u) & 0x1u) << 5u;
|
||||
|
||||
return elem;
|
||||
}
|
||||
|
||||
static u32 GetPipeIndex(u32 x, u32 y, bool is_neo) {
|
||||
u32 pipe = 0;
|
||||
|
||||
if (!is_neo) {
|
||||
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
|
||||
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
|
||||
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
|
||||
} else {
|
||||
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
|
||||
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
|
||||
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
|
||||
pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u;
|
||||
}
|
||||
|
||||
return pipe;
|
||||
}
|
||||
|
||||
static u32 GetBankIndex(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, u32 num_pipes) {
|
||||
const u32 x_shift_offset = std::bit_width(bank_width * num_pipes);
|
||||
const u32 y_shift_offset = std::bit_width(bank_height);
|
||||
const u32 xs = x >> x_shift_offset;
|
||||
const u32 ys = y >> y_shift_offset;
|
||||
u32 bank = 0;
|
||||
switch (num_banks) {
|
||||
case 8:
|
||||
bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u;
|
||||
bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u;
|
||||
bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u;
|
||||
break;
|
||||
case 16:
|
||||
bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u;
|
||||
bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u;
|
||||
bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u;
|
||||
bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u;
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
return bank;
|
||||
}
|
||||
|
||||
u64 GetTiledOffset(u32 x, u32 y, bool is_neo) const {
|
||||
u64 element_index = GetElementIndex(x, y);
|
||||
|
||||
u32 xh = x;
|
||||
u32 yh = y;
|
||||
u64 pipe = GetPipeIndex(xh, yh, is_neo);
|
||||
u64 bank = GetBankIndex(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes);
|
||||
u32 tile_bytes = (8 * 8 * 32 + 7) / 8;
|
||||
u64 element_offset = (element_index * 32);
|
||||
u64 tile_split_slice = 0;
|
||||
|
||||
if (tile_bytes > 512) {
|
||||
tile_split_slice = element_offset / (static_cast<u64>(512) * 8);
|
||||
element_offset %= (static_cast<u64>(512) * 8);
|
||||
tile_bytes = 512;
|
||||
}
|
||||
|
||||
u64 macro_tile_bytes = (128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks);
|
||||
u64 macro_tiles_per_row = m_padded_width / 128;
|
||||
u64 macro_tile_row_index = y / m_macro_tile_height;
|
||||
u64 macro_tile_column_index = x / 128;
|
||||
u64 macro_tile_index = (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
|
||||
u64 macro_tile_offset = macro_tile_index * macro_tile_bytes;
|
||||
u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height);
|
||||
u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
|
||||
u64 slice_offset = tile_split_slice * slice_bytes;
|
||||
u64 tile_row_index = (y / 8) % m_bank_height;
|
||||
u64 tile_index = tile_row_index;
|
||||
u64 tile_offset = tile_index * tile_bytes;
|
||||
|
||||
u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice;
|
||||
bank ^= tile_split_slice_rotation;
|
||||
bank &= (m_num_banks - 1);
|
||||
|
||||
u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
|
||||
u64 bit_offset = total_offset & 0x7u;
|
||||
total_offset /= 8;
|
||||
|
||||
u64 pipe_interleave_offset = total_offset & 0xffu;
|
||||
u64 offset = total_offset >> 8u;
|
||||
u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) | (offset << (8u + m_pipe_bits + m_bank_bits));
|
||||
|
||||
return ((byte_offset << 3u) | bit_offset) / 8;
|
||||
}
|
||||
};
|
||||
|
||||
void ConvertTileToLinear(u08* dst, const u08* src,u32 width, u32 height, bool is_neo) {
|
||||
const TileManager32 t{width, height, is_neo};
|
||||
for (u32 y = 0; y < height; y++) {
|
||||
u32 x = 0;
|
||||
u64 linear_offset = y * width * 4;
|
||||
|
||||
for (; x + 1 < width; x += 2) {
|
||||
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
|
||||
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64));
|
||||
linear_offset += sizeof(u64);
|
||||
}
|
||||
if (x < width) {
|
||||
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
|
||||
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
9
src/video_core/tile_manager.h
Normal file
9
src/video_core/tile_manager.h
Normal file
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
void ConvertTileToLinear(void* dst, const void* src, u32 width, u32 height, bool neo);
|
||||
|
||||
} // namespace VideoCore
|
3
third-party/CMakeLists.txt
vendored
3
third-party/CMakeLists.txt
vendored
|
@ -30,7 +30,6 @@ add_subdirectory(toml11 EXCLUDE_FROM_ALL)
|
|||
|
||||
# Vulkan
|
||||
add_subdirectory(vulkan EXCLUDE_FROM_ALL)
|
||||
target_include_directories(vulkan-1 INTERFACE vulkan/include)
|
||||
|
||||
# Winpthreads
|
||||
if (WIN32)
|
||||
|
@ -66,6 +65,6 @@ target_include_directories(imgui PUBLIC
|
|||
imgui/include
|
||||
)
|
||||
|
||||
target_link_libraries(imgui PRIVATE SDL3-shared ${CMAKE_DL_LIBS} Zydis winpthread discord-rpc)
|
||||
target_link_libraries(imgui PRIVATE SDL3-shared ${CMAKE_DL_LIBS} Zydis discord-rpc)
|
||||
|
||||
|
||||
|
|
2
third-party/fmt
vendored
2
third-party/fmt
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 661b23edeb52d400cf5812e7330f14f05c072fab
|
||||
Subproject commit 8e42eef4950feb5d2b76574a9cd2591dfaae2449
|
2
third-party/spdlog
vendored
2
third-party/spdlog
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 76dfc7e7c0d3c69d3cdaa3399b63545235ccbb02
|
||||
Subproject commit 134f9194bb93072b72b8cfa27ac3bb30a0fb5b57
|
2
third-party/vulkan
vendored
2
third-party/vulkan
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 8c9feb4f480b32f7c7421af546aa6ffb558bdd5e
|
||||
Subproject commit 72b2e740754bc6b86b724fa5b2c90dca6f69462e
|
Loading…
Reference in a new issue