diff --git a/src/common/div_ceil.h b/src/common/div_ceil.h index de275e76..c12477d4 100755 --- a/src/common/div_ceil.h +++ b/src/common/div_ceil.h @@ -1,25 +1,25 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include - -namespace Common { - -/// Ceiled integer division. -template - requires std::is_integral_v && std::is_unsigned_v -[[nodiscard]] constexpr N DivCeil(N number, D divisor) { - return static_cast((static_cast(number) + divisor - 1) / divisor); -} - -/// Ceiled integer division with logarithmic divisor in base 2 -template - requires std::is_integral_v && std::is_unsigned_v -[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) { - return static_cast((static_cast(value) + (D(1) << alignment_log2) - 1) >> alignment_log2); -} - -} // namespace Common +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +namespace Common { + +/// Ceiled integer division. +template + requires std::is_integral_v && std::is_unsigned_v +[[nodiscard]] constexpr N DivCeil(N number, D divisor) { + return static_cast((static_cast(number) + divisor - 1) / divisor); +} + +/// Ceiled integer division with logarithmic divisor in base 2 +template + requires std::is_integral_v && std::is_unsigned_v +[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) { + return static_cast((static_cast(value) + (D(1) << alignment_log2) - 1) >> alignment_log2); +} + +} // namespace Common diff --git a/src/common/ntapi.cpp b/src/common/ntapi.cpp index c76c4657..46ec57e0 100644 --- a/src/common/ntapi.cpp +++ b/src/common/ntapi.cpp @@ -1,30 +1,30 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#ifdef _WIN32 - -#include "ntapi.h" - -NtClose_t NtClose = nullptr; -NtSetInformationFile_t NtSetInformationFile = nullptr; -NtCreateThread_t NtCreateThread = nullptr; -NtTerminateThread_t NtTerminateThread = nullptr; -NtQueueApcThreadEx_t NtQueueApcThreadEx = nullptr; - -namespace Common::NtApi { - -void Initialize() { - HMODULE nt_handle = GetModuleHandleA("ntdll.dll"); - - // http://stackoverflow.com/a/31411628/4725495 - NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose"); - NtSetInformationFile = - (NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile"); - NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread"); - NtTerminateThread = (NtTerminateThread_t)GetProcAddress(nt_handle, "NtTerminateThread"); - NtQueueApcThreadEx = (NtQueueApcThreadEx_t)GetProcAddress(nt_handle, "NtQueueApcThreadEx"); -} - -} // namespace Common::NtApi - -#endif +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#ifdef _WIN32 + +#include "ntapi.h" + +NtClose_t NtClose = nullptr; +NtSetInformationFile_t NtSetInformationFile = nullptr; +NtCreateThread_t NtCreateThread = nullptr; +NtTerminateThread_t NtTerminateThread = nullptr; +NtQueueApcThreadEx_t NtQueueApcThreadEx = nullptr; + +namespace Common::NtApi { + +void Initialize() { + HMODULE nt_handle = GetModuleHandleA("ntdll.dll"); + + // http://stackoverflow.com/a/31411628/4725495 + NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose"); + NtSetInformationFile = + (NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile"); + NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread"); + NtTerminateThread = (NtTerminateThread_t)GetProcAddress(nt_handle, "NtTerminateThread"); + NtQueueApcThreadEx = (NtQueueApcThreadEx_t)GetProcAddress(nt_handle, "NtQueueApcThreadEx"); +} + +} // namespace Common::NtApi + +#endif diff --git a/src/common/ntapi.h b/src/common/ntapi.h index daab8440..c876af58 100644 --- a/src/common/ntapi.h +++ b/src/common/ntapi.h @@ -1,554 +1,554 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#ifdef _WIN32 - -#include -#include "common/types.h" - -typedef enum _FILE_INFORMATION_CLASS { - FileDirectoryInformation = 1, - FileFullDirectoryInformation = 2, - FileBothDirectoryInformation = 3, - FileBasicInformation = 4, - FileStandardInformation = 5, - FileInternalInformation = 6, - FileEaInformation = 7, - FileAccessInformation = 8, - FileNameInformation = 9, - FileRenameInformation = 10, - FileLinkInformation = 11, - FileNamesInformation = 12, - FileDispositionInformation = 13, - FilePositionInformation = 14, - FileFullEaInformation = 15, - FileModeInformation = 16, - FileAlignmentInformation = 17, - FileAllInformation = 18, - FileAllocationInformation = 19, - FileEndOfFileInformation = 20, - FileAlternateNameInformation = 21, - FileStreamInformation = 22, - FilePipeInformation = 23, - FilePipeLocalInformation = 24, - FilePipeRemoteInformation = 25, - FileMailslotQueryInformation = 26, - FileMailslotSetInformation = 27, - FileCompressionInformation = 28, - FileObjectIdInformation = 29, - FileCompletionInformation = 30, - FileMoveClusterInformation = 31, - FileQuotaInformation = 32, - FileReparsePointInformation = 33, - FileNetworkOpenInformation = 34, - FileAttributeTagInformation = 35, - FileTrackingInformation = 36, - FileIdBothDirectoryInformation = 37, - FileIdFullDirectoryInformation = 38, - FileValidDataLengthInformation = 39, - FileShortNameInformation = 40, - FileIoCompletionNotificationInformation = 41, - FileIoStatusBlockRangeInformation = 42, - FileIoPriorityHintInformation = 43, - FileSfioReserveInformation = 44, - FileSfioVolumeInformation = 45, - FileHardLinkInformation = 46, - FileProcessIdsUsingFileInformation = 47, - FileNormalizedNameInformation = 48, - FileNetworkPhysicalNameInformation = 49, - FileIdGlobalTxDirectoryInformation = 50, - FileIsRemoteDeviceInformation = 51, - FileUnusedInformation = 52, - FileNumaNodeInformation = 53, - FileStandardLinkInformation = 54, - FileRemoteProtocolInformation = 55, - FileRenameInformationBypassAccessCheck = 56, - FileLinkInformationBypassAccessCheck = 57, - FileVolumeNameInformation = 58, - FileIdInformation = 59, - FileIdExtdDirectoryInformation = 60, - FileReplaceCompletionInformation = 61, - FileHardLinkFullIdInformation = 62, - FileIdExtdBothDirectoryInformation = 63, - FileDispositionInformationEx = 64, - FileRenameInformationEx = 65, - FileRenameInformationExBypassAccessCheck = 66, - FileDesiredStorageClassInformation = 67, - FileStatInformation = 68, - FileMemoryPartitionInformation = 69, - FileStatLxInformation = 70, - FileCaseSensitiveInformation = 71, - FileLinkInformationEx = 72, - FileLinkInformationExBypassAccessCheck = 73, - FileStorageReserveIdInformation = 74, - FileCaseSensitiveInformationForceAccessCheck = 75, - FileKnownFolderInformation = 76, - FileStatBasicInformation = 77, - FileId64ExtdDirectoryInformation = 78, - FileId64ExtdBothDirectoryInformation = 79, - FileIdAllExtdDirectoryInformation = 80, - FileIdAllExtdBothDirectoryInformation = 81, - FileStreamReservationInformation, - FileMupProviderInfo, - FileMaximumInformation -} FILE_INFORMATION_CLASS, - *PFILE_INFORMATION_CLASS; - -typedef struct _IO_STATUS_BLOCK { - union { - u32 Status; - PVOID Pointer; - }; - ULONG_PTR Information; -} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK; - -typedef struct _FILE_DISPOSITION_INFORMATION { - BOOLEAN DeleteFile; -} FILE_DISPOSITION_INFORMATION, *PFILE_DISPOSITION_INFORMATION; - -typedef struct _UNICODE_STRING { - USHORT Length; - USHORT MaximumLength; - PWCH Buffer; -} UNICODE_STRING, *PUNICODE_STRING; - -typedef const UNICODE_STRING* PCUNICODE_STRING; - -typedef struct _OBJECT_ATTRIBUTES { - ULONG Length; - HANDLE RootDirectory; - PCUNICODE_STRING ObjectName; - ULONG Attributes; - PVOID SecurityDescriptor; // PSECURITY_DESCRIPTOR; - PVOID SecurityQualityOfService; // PSECURITY_QUALITY_OF_SERVICE -} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES; - -typedef const OBJECT_ATTRIBUTES* PCOBJECT_ATTRIBUTES; - -typedef struct _CLIENT_ID { - HANDLE UniqueProcess; - HANDLE UniqueThread; -} CLIENT_ID, *PCLIENT_ID; - -typedef struct _INITIAL_TEB { - struct { - PVOID OldStackBase; - PVOID OldStackLimit; - } OldInitialTeb; - PVOID StackBase; - PVOID StackLimit; - PVOID StackAllocationBase; -} INITIAL_TEB, *PINITIAL_TEB; - -typedef struct _PEB_LDR_DATA { - ULONG Length; - BOOLEAN Initialized; - PVOID SsHandle; - LIST_ENTRY InLoadOrderModuleList; - LIST_ENTRY InMemoryOrderModuleList; - LIST_ENTRY InInitializationOrderModuleList; - PVOID EntryInProgress; - BOOLEAN ShutdownInProgress; - HANDLE ShutdownThreadId; -} PEB_LDR_DATA, *PPEB_LDR_DATA; - -typedef struct _CURDIR { - UNICODE_STRING DosPath; - PVOID Handle; -} CURDIR, *PCURDIR; - -typedef struct RTL_DRIVE_LETTER_CURDIR { - USHORT Flags; - USHORT Length; - ULONG TimeStamp; - UNICODE_STRING DosPath; -} RTL_DRIVE_LETTER_CURDIR, *PRTL_DRIVE_LETTER_CURDIR; - -typedef struct _RTL_USER_PROCESS_PARAMETERS { - ULONG AllocationSize; - ULONG Size; - ULONG Flags; - ULONG DebugFlags; - HANDLE ConsoleHandle; - ULONG ConsoleFlags; - HANDLE hStdInput; - HANDLE hStdOutput; - HANDLE hStdError; - CURDIR CurrentDirectory; - UNICODE_STRING DllPath; - UNICODE_STRING ImagePathName; - UNICODE_STRING CommandLine; - PWSTR Environment; - ULONG dwX; - ULONG dwY; - ULONG dwXSize; - ULONG dwYSize; - ULONG dwXCountChars; - ULONG dwYCountChars; - ULONG dwFillAttribute; - ULONG dwFlags; - ULONG wShowWindow; - UNICODE_STRING WindowTitle; - UNICODE_STRING Desktop; - UNICODE_STRING ShellInfo; - UNICODE_STRING RuntimeInfo; - RTL_DRIVE_LETTER_CURDIR DLCurrentDirectory[0x20]; - ULONG_PTR EnvironmentSize; - ULONG_PTR EnvironmentVersion; - PVOID PackageDependencyData; - ULONG ProcessGroupId; - ULONG LoaderThreads; -} RTL_USER_PROCESS_PARAMETERS, *PRTL_USER_PROCESS_PARAMETERS; - -typedef struct tagRTL_BITMAP { - ULONG SizeOfBitMap; - PULONG Buffer; -} RTL_BITMAP, *PRTL_BITMAP; - -typedef struct { - UINT next; - UINT id; - ULONGLONG addr; - ULONGLONG size; - UINT args[4]; -} CROSS_PROCESS_WORK_ENTRY; - -typedef union { - struct { - UINT first; - UINT counter; - }; - volatile LONGLONG hdr; -} CROSS_PROCESS_WORK_HDR; - -typedef struct { - CROSS_PROCESS_WORK_HDR free_list; - CROSS_PROCESS_WORK_HDR work_list; - ULONGLONG unknown[4]; - CROSS_PROCESS_WORK_ENTRY entries[1]; -} CROSS_PROCESS_WORK_LIST; - -typedef struct _CHPEV2_PROCESS_INFO { - ULONG Wow64ExecuteFlags; /* 000 */ - USHORT NativeMachineType; /* 004 */ - USHORT EmulatedMachineType; /* 006 */ - HANDLE SectionHandle; /* 008 */ - CROSS_PROCESS_WORK_LIST* CrossProcessWorkList; /* 010 */ - void* unknown; /* 018 */ -} CHPEV2_PROCESS_INFO, *PCHPEV2_PROCESS_INFO; - -typedef u64(__stdcall* KERNEL_CALLBACK_PROC)(void*, ULONG); - -typedef struct _PEB { /* win32/win64 */ - BOOLEAN InheritedAddressSpace; /* 000/000 */ - BOOLEAN ReadImageFileExecOptions; /* 001/001 */ - BOOLEAN BeingDebugged; /* 002/002 */ - UCHAR ImageUsedLargePages : 1; /* 003/003 */ - UCHAR IsProtectedProcess : 1; - UCHAR IsImageDynamicallyRelocated : 1; - UCHAR SkipPatchingUser32Forwarders : 1; - UCHAR IsPackagedProcess : 1; - UCHAR IsAppContainer : 1; - UCHAR IsProtectedProcessLight : 1; - UCHAR IsLongPathAwareProcess : 1; - HANDLE Mutant; /* 004/008 */ - HMODULE ImageBaseAddress; /* 008/010 */ - PPEB_LDR_DATA LdrData; /* 00c/018 */ - RTL_USER_PROCESS_PARAMETERS* ProcessParameters; /* 010/020 */ - PVOID SubSystemData; /* 014/028 */ - HANDLE ProcessHeap; /* 018/030 */ - PRTL_CRITICAL_SECTION FastPebLock; /* 01c/038 */ - PVOID AtlThunkSListPtr; /* 020/040 */ - PVOID IFEOKey; /* 024/048 */ - ULONG ProcessInJob : 1; /* 028/050 */ - ULONG ProcessInitializing : 1; - ULONG ProcessUsingVEH : 1; - ULONG ProcessUsingVCH : 1; - ULONG ProcessUsingFTH : 1; - ULONG ProcessPreviouslyThrottled : 1; - ULONG ProcessCurrentlyThrottled : 1; - ULONG ProcessImagesHotPatched : 1; - ULONG ReservedBits0 : 24; - KERNEL_CALLBACK_PROC* KernelCallbackTable; /* 02c/058 */ - ULONG Reserved; /* 030/060 */ - ULONG AtlThunkSListPtr32; /* 034/064 */ - PVOID ApiSetMap; /* 038/068 */ - ULONG TlsExpansionCounter; /* 03c/070 */ - PRTL_BITMAP TlsBitmap; /* 040/078 */ - ULONG TlsBitmapBits[2]; /* 044/080 */ - PVOID ReadOnlySharedMemoryBase; /* 04c/088 */ - PVOID SharedData; /* 050/090 */ - PVOID* ReadOnlyStaticServerData; /* 054/098 */ - PVOID AnsiCodePageData; /* 058/0a0 */ - PVOID OemCodePageData; /* 05c/0a8 */ - PVOID UnicodeCaseTableData; /* 060/0b0 */ - ULONG NumberOfProcessors; /* 064/0b8 */ - ULONG NtGlobalFlag; /* 068/0bc */ - LARGE_INTEGER CriticalSectionTimeout; /* 070/0c0 */ - SIZE_T HeapSegmentReserve; /* 078/0c8 */ - SIZE_T HeapSegmentCommit; /* 07c/0d0 */ - SIZE_T HeapDeCommitTotalFreeThreshold; /* 080/0d8 */ - SIZE_T HeapDeCommitFreeBlockThreshold; /* 084/0e0 */ - ULONG NumberOfHeaps; /* 088/0e8 */ - ULONG MaximumNumberOfHeaps; /* 08c/0ec */ - PVOID* ProcessHeaps; /* 090/0f0 */ - PVOID GdiSharedHandleTable; /* 094/0f8 */ - PVOID ProcessStarterHelper; /* 098/100 */ - PVOID GdiDCAttributeList; /* 09c/108 */ - PVOID LoaderLock; /* 0a0/110 */ - ULONG OSMajorVersion; /* 0a4/118 */ - ULONG OSMinorVersion; /* 0a8/11c */ - ULONG OSBuildNumber; /* 0ac/120 */ - ULONG OSPlatformId; /* 0b0/124 */ - ULONG ImageSubSystem; /* 0b4/128 */ - ULONG ImageSubSystemMajorVersion; /* 0b8/12c */ - ULONG ImageSubSystemMinorVersion; /* 0bc/130 */ - KAFFINITY ActiveProcessAffinityMask; /* 0c0/138 */ -#ifdef _WIN64 - ULONG GdiHandleBuffer[60]; /* /140 */ -#else - ULONG GdiHandleBuffer[34]; /* 0c4/ */ -#endif - PVOID PostProcessInitRoutine; /* 14c/230 */ - PRTL_BITMAP TlsExpansionBitmap; /* 150/238 */ - ULONG TlsExpansionBitmapBits[32]; /* 154/240 */ - ULONG SessionId; /* 1d4/2c0 */ - ULARGE_INTEGER AppCompatFlags; /* 1d8/2c8 */ - ULARGE_INTEGER AppCompatFlagsUser; /* 1e0/2d0 */ - PVOID ShimData; /* 1e8/2d8 */ - PVOID AppCompatInfo; /* 1ec/2e0 */ - UNICODE_STRING CSDVersion; /* 1f0/2e8 */ - PVOID ActivationContextData; /* 1f8/2f8 */ - PVOID ProcessAssemblyStorageMap; /* 1fc/300 */ - PVOID SystemDefaultActivationData; /* 200/308 */ - PVOID SystemAssemblyStorageMap; /* 204/310 */ - SIZE_T MinimumStackCommit; /* 208/318 */ - PVOID* FlsCallback; /* 20c/320 */ - LIST_ENTRY FlsListHead; /* 210/328 */ - union { - PRTL_BITMAP FlsBitmap; /* 218/338 */ -#ifdef _WIN64 - CHPEV2_PROCESS_INFO* ChpeV2ProcessInfo; /* /338 */ -#endif - }; - ULONG FlsBitmapBits[4]; /* 21c/340 */ - ULONG FlsHighIndex; /* 22c/350 */ - PVOID WerRegistrationData; /* 230/358 */ - PVOID WerShipAssertPtr; /* 234/360 */ - PVOID EcCodeBitMap; /* 238/368 */ - PVOID pImageHeaderHash; /* 23c/370 */ - ULONG HeapTracingEnabled : 1; /* 240/378 */ - ULONG CritSecTracingEnabled : 1; - ULONG LibLoaderTracingEnabled : 1; - ULONG SpareTracingBits : 29; - ULONGLONG CsrServerReadOnlySharedMemoryBase; /* 248/380 */ - ULONG TppWorkerpListLock; /* 250/388 */ - LIST_ENTRY TppWorkerpList; /* 254/390 */ - PVOID WaitOnAddressHashTable[0x80]; /* 25c/3a0 */ - PVOID TelemetryCoverageHeader; /* 45c/7a0 */ - ULONG CloudFileFlags; /* 460/7a8 */ - ULONG CloudFileDiagFlags; /* 464/7ac */ - CHAR PlaceholderCompatibilityMode; /* 468/7b0 */ - CHAR PlaceholderCompatibilityModeReserved[7]; /* 469/7b1 */ - PVOID LeapSecondData; /* 470/7b8 */ - ULONG LeapSecondFlags; /* 474/7c0 */ - ULONG NtGlobalFlag2; /* 478/7c4 */ -} PEB, *PPEB; - -typedef struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME { - struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME* Previous; - struct _ACTIVATION_CONTEXT* ActivationContext; - ULONG Flags; -} RTL_ACTIVATION_CONTEXT_STACK_FRAME, *PRTL_ACTIVATION_CONTEXT_STACK_FRAME; - -typedef struct _ACTIVATION_CONTEXT_STACK { - RTL_ACTIVATION_CONTEXT_STACK_FRAME* ActiveFrame; - LIST_ENTRY FrameListCache; - ULONG Flags; - ULONG NextCookieSequenceNumber; - ULONG_PTR StackId; -} ACTIVATION_CONTEXT_STACK, *PACTIVATION_CONTEXT_STACK; - -typedef struct _GDI_TEB_BATCH { - ULONG Offset; - HANDLE HDC; - ULONG Buffer[0x136]; -} GDI_TEB_BATCH; - -typedef struct _TEB_ACTIVE_FRAME_CONTEXT { - ULONG Flags; - const char* FrameName; -} TEB_ACTIVE_FRAME_CONTEXT, *PTEB_ACTIVE_FRAME_CONTEXT; - -typedef struct _TEB_ACTIVE_FRAME { - ULONG Flags; - struct _TEB_ACTIVE_FRAME* Previous; - TEB_ACTIVE_FRAME_CONTEXT* Context; -} TEB_ACTIVE_FRAME, *PTEB_ACTIVE_FRAME; - -typedef struct _TEB { /* win32/win64 */ - NT_TIB Tib; /* 000/0000 */ - PVOID EnvironmentPointer; /* 01c/0038 */ - CLIENT_ID ClientId; /* 020/0040 */ - PVOID ActiveRpcHandle; /* 028/0050 */ - PVOID ThreadLocalStoragePointer; /* 02c/0058 */ - PPEB Peb; /* 030/0060 */ - ULONG LastErrorValue; /* 034/0068 */ - ULONG CountOfOwnedCriticalSections; /* 038/006c */ - PVOID CsrClientThread; /* 03c/0070 */ - PVOID Win32ThreadInfo; /* 040/0078 */ - ULONG User32Reserved[26]; /* 044/0080 */ - ULONG UserReserved[5]; /* 0ac/00e8 */ - PVOID WOW32Reserved; /* 0c0/0100 */ - ULONG CurrentLocale; /* 0c4/0108 */ - ULONG FpSoftwareStatusRegister; /* 0c8/010c */ - PVOID ReservedForDebuggerInstrumentation[16]; /* 0cc/0110 */ -#ifdef _WIN64 - PVOID SystemReserved1[30]; /* /0190 */ -#else - PVOID SystemReserved1[26]; /* 10c/ */ -#endif - char PlaceholderCompatibilityMode; /* 174/0280 */ - BOOLEAN PlaceholderHydrationAlwaysExplicit; /* 175/0281 */ - char PlaceholderReserved[10]; /* 176/0282 */ - DWORD ProxiedProcessId; /* 180/028c */ - ACTIVATION_CONTEXT_STACK ActivationContextStack; /* 184/0290 */ - UCHAR WorkingOnBehalfOfTicket[8]; /* 19c/02b8 */ - LONG ExceptionCode; /* 1a4/02c0 */ - ACTIVATION_CONTEXT_STACK* ActivationContextStackPointer; /* 1a8/02c8 */ - ULONG_PTR InstrumentationCallbackSp; /* 1ac/02d0 */ - ULONG_PTR InstrumentationCallbackPreviousPc; /* 1b0/02d8 */ - ULONG_PTR InstrumentationCallbackPreviousSp; /* 1b4/02e0 */ -#ifdef _WIN64 - ULONG TxFsContext; /* /02e8 */ - BOOLEAN InstrumentationCallbackDisabled; /* /02ec */ - BOOLEAN UnalignedLoadStoreExceptions; /* /02ed */ -#else - BOOLEAN InstrumentationCallbackDisabled; /* 1b8/ */ - BYTE SpareBytes1[23]; /* 1b9/ */ - ULONG TxFsContext; /* 1d0/ */ -#endif - GDI_TEB_BATCH GdiTebBatch; /* 1d4/02f0 */ - CLIENT_ID RealClientId; /* 6b4/07d8 */ - HANDLE GdiCachedProcessHandle; /* 6bc/07e8 */ - ULONG GdiClientPID; /* 6c0/07f0 */ - ULONG GdiClientTID; /* 6c4/07f4 */ - PVOID GdiThreadLocaleInfo; /* 6c8/07f8 */ - ULONG_PTR Win32ClientInfo[62]; /* 6cc/0800 */ - PVOID glDispatchTable[233]; /* 7c4/09f0 */ - PVOID glReserved1[29]; /* b68/1138 */ - PVOID glReserved2; /* bdc/1220 */ - PVOID glSectionInfo; /* be0/1228 */ - PVOID glSection; /* be4/1230 */ - PVOID glTable; /* be8/1238 */ - PVOID glCurrentRC; /* bec/1240 */ - PVOID glContext; /* bf0/1248 */ - ULONG LastStatusValue; /* bf4/1250 */ - UNICODE_STRING StaticUnicodeString; /* bf8/1258 */ - WCHAR StaticUnicodeBuffer[261]; /* c00/1268 */ - PVOID DeallocationStack; /* e0c/1478 */ - PVOID TlsSlots[64]; /* e10/1480 */ - LIST_ENTRY TlsLinks; /* f10/1680 */ - PVOID Vdm; /* f18/1690 */ - PVOID ReservedForNtRpc; /* f1c/1698 */ - PVOID DbgSsReserved[2]; /* f20/16a0 */ - ULONG HardErrorMode; /* f28/16b0 */ -#ifdef _WIN64 - PVOID Instrumentation[11]; /* /16b8 */ -#else - PVOID Instrumentation[9]; /* f2c/ */ -#endif - GUID ActivityId; /* f50/1710 */ - PVOID SubProcessTag; /* f60/1720 */ - PVOID PerflibData; /* f64/1728 */ - PVOID EtwTraceData; /* f68/1730 */ - PVOID WinSockData; /* f6c/1738 */ - ULONG GdiBatchCount; /* f70/1740 */ - ULONG IdealProcessorValue; /* f74/1744 */ - ULONG GuaranteedStackBytes; /* f78/1748 */ - PVOID ReservedForPerf; /* f7c/1750 */ - PVOID ReservedForOle; /* f80/1758 */ - ULONG WaitingOnLoaderLock; /* f84/1760 */ - PVOID SavedPriorityState; /* f88/1768 */ - ULONG_PTR ReservedForCodeCoverage; /* f8c/1770 */ - PVOID ThreadPoolData; /* f90/1778 */ - PVOID* TlsExpansionSlots; /* f94/1780 */ -#ifdef _WIN64 - union { - PVOID DeallocationBStore; /* /1788 */ - PVOID* ChpeV2CpuAreaInfo; /* /1788 */ - } DUMMYUNIONNAME; - PVOID BStoreLimit; /* /1790 */ -#endif - ULONG MuiGeneration; /* f98/1798 */ - ULONG IsImpersonating; /* f9c/179c */ - PVOID NlsCache; /* fa0/17a0 */ - PVOID ShimData; /* fa4/17a8 */ - ULONG HeapVirtualAffinity; /* fa8/17b0 */ - PVOID CurrentTransactionHandle; /* fac/17b8 */ - TEB_ACTIVE_FRAME* ActiveFrame; /* fb0/17c0 */ - PVOID* FlsSlots; /* fb4/17c8 */ - PVOID PreferredLanguages; /* fb8/17d0 */ - PVOID UserPrefLanguages; /* fbc/17d8 */ - PVOID MergedPrefLanguages; /* fc0/17e0 */ - ULONG MuiImpersonation; /* fc4/17e8 */ - USHORT CrossTebFlags; /* fc8/17ec */ - USHORT SameTebFlags; /* fca/17ee */ - PVOID TxnScopeEnterCallback; /* fcc/17f0 */ - PVOID TxnScopeExitCallback; /* fd0/17f8 */ - PVOID TxnScopeContext; /* fd4/1800 */ - ULONG LockCount; /* fd8/1808 */ - LONG WowTebOffset; /* fdc/180c */ - PVOID ResourceRetValue; /* fe0/1810 */ - PVOID ReservedForWdf; /* fe4/1818 */ - ULONGLONG ReservedForCrt; /* fe8/1820 */ - GUID EffectiveContainerId; /* ff0/1828 */ -} TEB, *PTEB; -static_assert(offsetof(TEB, DeallocationStack) == - 0x1478); /* The only member we care about at the moment */ - -typedef enum _QUEUE_USER_APC_FLAGS { - QueueUserApcFlagsNone, - QueueUserApcFlagsSpecialUserApc, - QueueUserApcFlagsMaxValue -} QUEUE_USER_APC_FLAGS; - -typedef union _USER_APC_OPTION { - ULONG_PTR UserApcFlags; - HANDLE MemoryReserveHandle; -} USER_APC_OPTION, *PUSER_APC_OPTION; - -using PPS_APC_ROUTINE = void (*)(PVOID ApcArgument1, PVOID ApcArgument2, PVOID ApcArgument3, - PCONTEXT Context); - -typedef u64(__stdcall* NtClose_t)(HANDLE Handle); - -typedef u64(__stdcall* NtSetInformationFile_t)(HANDLE FileHandle, PIO_STATUS_BLOCK IoStatusBlock, - PVOID FileInformation, ULONG Length, - FILE_INFORMATION_CLASS FileInformationClass); - -typedef u64(__stdcall* NtCreateThread_t)(PHANDLE ThreadHandle, ACCESS_MASK DesiredAccess, - PCOBJECT_ATTRIBUTES ObjectAttributes, HANDLE ProcessHandle, - PCLIENT_ID ClientId, PCONTEXT ThreadContext, - PINITIAL_TEB InitialTeb, BOOLEAN CreateSuspended); - -typedef u64(__stdcall* NtTerminateThread_t)(HANDLE ThreadHandle, u64 ExitStatus); - -typedef u64(__stdcall* NtQueueApcThreadEx_t)(HANDLE ThreadHandle, - USER_APC_OPTION UserApcReserveHandle, - PPS_APC_ROUTINE ApcRoutine, PVOID ApcArgument1, - PVOID ApcArgument2, PVOID ApcArgument3); - -extern NtClose_t NtClose; -extern NtSetInformationFile_t NtSetInformationFile; -extern NtCreateThread_t NtCreateThread; -extern NtTerminateThread_t NtTerminateThread; -extern NtQueueApcThreadEx_t NtQueueApcThreadEx; - -namespace Common::NtApi { -void Initialize(); -} - -#endif +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifdef _WIN32 + +#include +#include "common/types.h" + +typedef enum _FILE_INFORMATION_CLASS { + FileDirectoryInformation = 1, + FileFullDirectoryInformation = 2, + FileBothDirectoryInformation = 3, + FileBasicInformation = 4, + FileStandardInformation = 5, + FileInternalInformation = 6, + FileEaInformation = 7, + FileAccessInformation = 8, + FileNameInformation = 9, + FileRenameInformation = 10, + FileLinkInformation = 11, + FileNamesInformation = 12, + FileDispositionInformation = 13, + FilePositionInformation = 14, + FileFullEaInformation = 15, + FileModeInformation = 16, + FileAlignmentInformation = 17, + FileAllInformation = 18, + FileAllocationInformation = 19, + FileEndOfFileInformation = 20, + FileAlternateNameInformation = 21, + FileStreamInformation = 22, + FilePipeInformation = 23, + FilePipeLocalInformation = 24, + FilePipeRemoteInformation = 25, + FileMailslotQueryInformation = 26, + FileMailslotSetInformation = 27, + FileCompressionInformation = 28, + FileObjectIdInformation = 29, + FileCompletionInformation = 30, + FileMoveClusterInformation = 31, + FileQuotaInformation = 32, + FileReparsePointInformation = 33, + FileNetworkOpenInformation = 34, + FileAttributeTagInformation = 35, + FileTrackingInformation = 36, + FileIdBothDirectoryInformation = 37, + FileIdFullDirectoryInformation = 38, + FileValidDataLengthInformation = 39, + FileShortNameInformation = 40, + FileIoCompletionNotificationInformation = 41, + FileIoStatusBlockRangeInformation = 42, + FileIoPriorityHintInformation = 43, + FileSfioReserveInformation = 44, + FileSfioVolumeInformation = 45, + FileHardLinkInformation = 46, + FileProcessIdsUsingFileInformation = 47, + FileNormalizedNameInformation = 48, + FileNetworkPhysicalNameInformation = 49, + FileIdGlobalTxDirectoryInformation = 50, + FileIsRemoteDeviceInformation = 51, + FileUnusedInformation = 52, + FileNumaNodeInformation = 53, + FileStandardLinkInformation = 54, + FileRemoteProtocolInformation = 55, + FileRenameInformationBypassAccessCheck = 56, + FileLinkInformationBypassAccessCheck = 57, + FileVolumeNameInformation = 58, + FileIdInformation = 59, + FileIdExtdDirectoryInformation = 60, + FileReplaceCompletionInformation = 61, + FileHardLinkFullIdInformation = 62, + FileIdExtdBothDirectoryInformation = 63, + FileDispositionInformationEx = 64, + FileRenameInformationEx = 65, + FileRenameInformationExBypassAccessCheck = 66, + FileDesiredStorageClassInformation = 67, + FileStatInformation = 68, + FileMemoryPartitionInformation = 69, + FileStatLxInformation = 70, + FileCaseSensitiveInformation = 71, + FileLinkInformationEx = 72, + FileLinkInformationExBypassAccessCheck = 73, + FileStorageReserveIdInformation = 74, + FileCaseSensitiveInformationForceAccessCheck = 75, + FileKnownFolderInformation = 76, + FileStatBasicInformation = 77, + FileId64ExtdDirectoryInformation = 78, + FileId64ExtdBothDirectoryInformation = 79, + FileIdAllExtdDirectoryInformation = 80, + FileIdAllExtdBothDirectoryInformation = 81, + FileStreamReservationInformation, + FileMupProviderInfo, + FileMaximumInformation +} FILE_INFORMATION_CLASS, + *PFILE_INFORMATION_CLASS; + +typedef struct _IO_STATUS_BLOCK { + union { + u32 Status; + PVOID Pointer; + }; + ULONG_PTR Information; +} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK; + +typedef struct _FILE_DISPOSITION_INFORMATION { + BOOLEAN DeleteFile; +} FILE_DISPOSITION_INFORMATION, *PFILE_DISPOSITION_INFORMATION; + +typedef struct _UNICODE_STRING { + USHORT Length; + USHORT MaximumLength; + PWCH Buffer; +} UNICODE_STRING, *PUNICODE_STRING; + +typedef const UNICODE_STRING* PCUNICODE_STRING; + +typedef struct _OBJECT_ATTRIBUTES { + ULONG Length; + HANDLE RootDirectory; + PCUNICODE_STRING ObjectName; + ULONG Attributes; + PVOID SecurityDescriptor; // PSECURITY_DESCRIPTOR; + PVOID SecurityQualityOfService; // PSECURITY_QUALITY_OF_SERVICE +} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES; + +typedef const OBJECT_ATTRIBUTES* PCOBJECT_ATTRIBUTES; + +typedef struct _CLIENT_ID { + HANDLE UniqueProcess; + HANDLE UniqueThread; +} CLIENT_ID, *PCLIENT_ID; + +typedef struct _INITIAL_TEB { + struct { + PVOID OldStackBase; + PVOID OldStackLimit; + } OldInitialTeb; + PVOID StackBase; + PVOID StackLimit; + PVOID StackAllocationBase; +} INITIAL_TEB, *PINITIAL_TEB; + +typedef struct _PEB_LDR_DATA { + ULONG Length; + BOOLEAN Initialized; + PVOID SsHandle; + LIST_ENTRY InLoadOrderModuleList; + LIST_ENTRY InMemoryOrderModuleList; + LIST_ENTRY InInitializationOrderModuleList; + PVOID EntryInProgress; + BOOLEAN ShutdownInProgress; + HANDLE ShutdownThreadId; +} PEB_LDR_DATA, *PPEB_LDR_DATA; + +typedef struct _CURDIR { + UNICODE_STRING DosPath; + PVOID Handle; +} CURDIR, *PCURDIR; + +typedef struct RTL_DRIVE_LETTER_CURDIR { + USHORT Flags; + USHORT Length; + ULONG TimeStamp; + UNICODE_STRING DosPath; +} RTL_DRIVE_LETTER_CURDIR, *PRTL_DRIVE_LETTER_CURDIR; + +typedef struct _RTL_USER_PROCESS_PARAMETERS { + ULONG AllocationSize; + ULONG Size; + ULONG Flags; + ULONG DebugFlags; + HANDLE ConsoleHandle; + ULONG ConsoleFlags; + HANDLE hStdInput; + HANDLE hStdOutput; + HANDLE hStdError; + CURDIR CurrentDirectory; + UNICODE_STRING DllPath; + UNICODE_STRING ImagePathName; + UNICODE_STRING CommandLine; + PWSTR Environment; + ULONG dwX; + ULONG dwY; + ULONG dwXSize; + ULONG dwYSize; + ULONG dwXCountChars; + ULONG dwYCountChars; + ULONG dwFillAttribute; + ULONG dwFlags; + ULONG wShowWindow; + UNICODE_STRING WindowTitle; + UNICODE_STRING Desktop; + UNICODE_STRING ShellInfo; + UNICODE_STRING RuntimeInfo; + RTL_DRIVE_LETTER_CURDIR DLCurrentDirectory[0x20]; + ULONG_PTR EnvironmentSize; + ULONG_PTR EnvironmentVersion; + PVOID PackageDependencyData; + ULONG ProcessGroupId; + ULONG LoaderThreads; +} RTL_USER_PROCESS_PARAMETERS, *PRTL_USER_PROCESS_PARAMETERS; + +typedef struct tagRTL_BITMAP { + ULONG SizeOfBitMap; + PULONG Buffer; +} RTL_BITMAP, *PRTL_BITMAP; + +typedef struct { + UINT next; + UINT id; + ULONGLONG addr; + ULONGLONG size; + UINT args[4]; +} CROSS_PROCESS_WORK_ENTRY; + +typedef union { + struct { + UINT first; + UINT counter; + }; + volatile LONGLONG hdr; +} CROSS_PROCESS_WORK_HDR; + +typedef struct { + CROSS_PROCESS_WORK_HDR free_list; + CROSS_PROCESS_WORK_HDR work_list; + ULONGLONG unknown[4]; + CROSS_PROCESS_WORK_ENTRY entries[1]; +} CROSS_PROCESS_WORK_LIST; + +typedef struct _CHPEV2_PROCESS_INFO { + ULONG Wow64ExecuteFlags; /* 000 */ + USHORT NativeMachineType; /* 004 */ + USHORT EmulatedMachineType; /* 006 */ + HANDLE SectionHandle; /* 008 */ + CROSS_PROCESS_WORK_LIST* CrossProcessWorkList; /* 010 */ + void* unknown; /* 018 */ +} CHPEV2_PROCESS_INFO, *PCHPEV2_PROCESS_INFO; + +typedef u64(__stdcall* KERNEL_CALLBACK_PROC)(void*, ULONG); + +typedef struct _PEB { /* win32/win64 */ + BOOLEAN InheritedAddressSpace; /* 000/000 */ + BOOLEAN ReadImageFileExecOptions; /* 001/001 */ + BOOLEAN BeingDebugged; /* 002/002 */ + UCHAR ImageUsedLargePages : 1; /* 003/003 */ + UCHAR IsProtectedProcess : 1; + UCHAR IsImageDynamicallyRelocated : 1; + UCHAR SkipPatchingUser32Forwarders : 1; + UCHAR IsPackagedProcess : 1; + UCHAR IsAppContainer : 1; + UCHAR IsProtectedProcessLight : 1; + UCHAR IsLongPathAwareProcess : 1; + HANDLE Mutant; /* 004/008 */ + HMODULE ImageBaseAddress; /* 008/010 */ + PPEB_LDR_DATA LdrData; /* 00c/018 */ + RTL_USER_PROCESS_PARAMETERS* ProcessParameters; /* 010/020 */ + PVOID SubSystemData; /* 014/028 */ + HANDLE ProcessHeap; /* 018/030 */ + PRTL_CRITICAL_SECTION FastPebLock; /* 01c/038 */ + PVOID AtlThunkSListPtr; /* 020/040 */ + PVOID IFEOKey; /* 024/048 */ + ULONG ProcessInJob : 1; /* 028/050 */ + ULONG ProcessInitializing : 1; + ULONG ProcessUsingVEH : 1; + ULONG ProcessUsingVCH : 1; + ULONG ProcessUsingFTH : 1; + ULONG ProcessPreviouslyThrottled : 1; + ULONG ProcessCurrentlyThrottled : 1; + ULONG ProcessImagesHotPatched : 1; + ULONG ReservedBits0 : 24; + KERNEL_CALLBACK_PROC* KernelCallbackTable; /* 02c/058 */ + ULONG Reserved; /* 030/060 */ + ULONG AtlThunkSListPtr32; /* 034/064 */ + PVOID ApiSetMap; /* 038/068 */ + ULONG TlsExpansionCounter; /* 03c/070 */ + PRTL_BITMAP TlsBitmap; /* 040/078 */ + ULONG TlsBitmapBits[2]; /* 044/080 */ + PVOID ReadOnlySharedMemoryBase; /* 04c/088 */ + PVOID SharedData; /* 050/090 */ + PVOID* ReadOnlyStaticServerData; /* 054/098 */ + PVOID AnsiCodePageData; /* 058/0a0 */ + PVOID OemCodePageData; /* 05c/0a8 */ + PVOID UnicodeCaseTableData; /* 060/0b0 */ + ULONG NumberOfProcessors; /* 064/0b8 */ + ULONG NtGlobalFlag; /* 068/0bc */ + LARGE_INTEGER CriticalSectionTimeout; /* 070/0c0 */ + SIZE_T HeapSegmentReserve; /* 078/0c8 */ + SIZE_T HeapSegmentCommit; /* 07c/0d0 */ + SIZE_T HeapDeCommitTotalFreeThreshold; /* 080/0d8 */ + SIZE_T HeapDeCommitFreeBlockThreshold; /* 084/0e0 */ + ULONG NumberOfHeaps; /* 088/0e8 */ + ULONG MaximumNumberOfHeaps; /* 08c/0ec */ + PVOID* ProcessHeaps; /* 090/0f0 */ + PVOID GdiSharedHandleTable; /* 094/0f8 */ + PVOID ProcessStarterHelper; /* 098/100 */ + PVOID GdiDCAttributeList; /* 09c/108 */ + PVOID LoaderLock; /* 0a0/110 */ + ULONG OSMajorVersion; /* 0a4/118 */ + ULONG OSMinorVersion; /* 0a8/11c */ + ULONG OSBuildNumber; /* 0ac/120 */ + ULONG OSPlatformId; /* 0b0/124 */ + ULONG ImageSubSystem; /* 0b4/128 */ + ULONG ImageSubSystemMajorVersion; /* 0b8/12c */ + ULONG ImageSubSystemMinorVersion; /* 0bc/130 */ + KAFFINITY ActiveProcessAffinityMask; /* 0c0/138 */ +#ifdef _WIN64 + ULONG GdiHandleBuffer[60]; /* /140 */ +#else + ULONG GdiHandleBuffer[34]; /* 0c4/ */ +#endif + PVOID PostProcessInitRoutine; /* 14c/230 */ + PRTL_BITMAP TlsExpansionBitmap; /* 150/238 */ + ULONG TlsExpansionBitmapBits[32]; /* 154/240 */ + ULONG SessionId; /* 1d4/2c0 */ + ULARGE_INTEGER AppCompatFlags; /* 1d8/2c8 */ + ULARGE_INTEGER AppCompatFlagsUser; /* 1e0/2d0 */ + PVOID ShimData; /* 1e8/2d8 */ + PVOID AppCompatInfo; /* 1ec/2e0 */ + UNICODE_STRING CSDVersion; /* 1f0/2e8 */ + PVOID ActivationContextData; /* 1f8/2f8 */ + PVOID ProcessAssemblyStorageMap; /* 1fc/300 */ + PVOID SystemDefaultActivationData; /* 200/308 */ + PVOID SystemAssemblyStorageMap; /* 204/310 */ + SIZE_T MinimumStackCommit; /* 208/318 */ + PVOID* FlsCallback; /* 20c/320 */ + LIST_ENTRY FlsListHead; /* 210/328 */ + union { + PRTL_BITMAP FlsBitmap; /* 218/338 */ +#ifdef _WIN64 + CHPEV2_PROCESS_INFO* ChpeV2ProcessInfo; /* /338 */ +#endif + }; + ULONG FlsBitmapBits[4]; /* 21c/340 */ + ULONG FlsHighIndex; /* 22c/350 */ + PVOID WerRegistrationData; /* 230/358 */ + PVOID WerShipAssertPtr; /* 234/360 */ + PVOID EcCodeBitMap; /* 238/368 */ + PVOID pImageHeaderHash; /* 23c/370 */ + ULONG HeapTracingEnabled : 1; /* 240/378 */ + ULONG CritSecTracingEnabled : 1; + ULONG LibLoaderTracingEnabled : 1; + ULONG SpareTracingBits : 29; + ULONGLONG CsrServerReadOnlySharedMemoryBase; /* 248/380 */ + ULONG TppWorkerpListLock; /* 250/388 */ + LIST_ENTRY TppWorkerpList; /* 254/390 */ + PVOID WaitOnAddressHashTable[0x80]; /* 25c/3a0 */ + PVOID TelemetryCoverageHeader; /* 45c/7a0 */ + ULONG CloudFileFlags; /* 460/7a8 */ + ULONG CloudFileDiagFlags; /* 464/7ac */ + CHAR PlaceholderCompatibilityMode; /* 468/7b0 */ + CHAR PlaceholderCompatibilityModeReserved[7]; /* 469/7b1 */ + PVOID LeapSecondData; /* 470/7b8 */ + ULONG LeapSecondFlags; /* 474/7c0 */ + ULONG NtGlobalFlag2; /* 478/7c4 */ +} PEB, *PPEB; + +typedef struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME { + struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME* Previous; + struct _ACTIVATION_CONTEXT* ActivationContext; + ULONG Flags; +} RTL_ACTIVATION_CONTEXT_STACK_FRAME, *PRTL_ACTIVATION_CONTEXT_STACK_FRAME; + +typedef struct _ACTIVATION_CONTEXT_STACK { + RTL_ACTIVATION_CONTEXT_STACK_FRAME* ActiveFrame; + LIST_ENTRY FrameListCache; + ULONG Flags; + ULONG NextCookieSequenceNumber; + ULONG_PTR StackId; +} ACTIVATION_CONTEXT_STACK, *PACTIVATION_CONTEXT_STACK; + +typedef struct _GDI_TEB_BATCH { + ULONG Offset; + HANDLE HDC; + ULONG Buffer[0x136]; +} GDI_TEB_BATCH; + +typedef struct _TEB_ACTIVE_FRAME_CONTEXT { + ULONG Flags; + const char* FrameName; +} TEB_ACTIVE_FRAME_CONTEXT, *PTEB_ACTIVE_FRAME_CONTEXT; + +typedef struct _TEB_ACTIVE_FRAME { + ULONG Flags; + struct _TEB_ACTIVE_FRAME* Previous; + TEB_ACTIVE_FRAME_CONTEXT* Context; +} TEB_ACTIVE_FRAME, *PTEB_ACTIVE_FRAME; + +typedef struct _TEB { /* win32/win64 */ + NT_TIB Tib; /* 000/0000 */ + PVOID EnvironmentPointer; /* 01c/0038 */ + CLIENT_ID ClientId; /* 020/0040 */ + PVOID ActiveRpcHandle; /* 028/0050 */ + PVOID ThreadLocalStoragePointer; /* 02c/0058 */ + PPEB Peb; /* 030/0060 */ + ULONG LastErrorValue; /* 034/0068 */ + ULONG CountOfOwnedCriticalSections; /* 038/006c */ + PVOID CsrClientThread; /* 03c/0070 */ + PVOID Win32ThreadInfo; /* 040/0078 */ + ULONG User32Reserved[26]; /* 044/0080 */ + ULONG UserReserved[5]; /* 0ac/00e8 */ + PVOID WOW32Reserved; /* 0c0/0100 */ + ULONG CurrentLocale; /* 0c4/0108 */ + ULONG FpSoftwareStatusRegister; /* 0c8/010c */ + PVOID ReservedForDebuggerInstrumentation[16]; /* 0cc/0110 */ +#ifdef _WIN64 + PVOID SystemReserved1[30]; /* /0190 */ +#else + PVOID SystemReserved1[26]; /* 10c/ */ +#endif + char PlaceholderCompatibilityMode; /* 174/0280 */ + BOOLEAN PlaceholderHydrationAlwaysExplicit; /* 175/0281 */ + char PlaceholderReserved[10]; /* 176/0282 */ + DWORD ProxiedProcessId; /* 180/028c */ + ACTIVATION_CONTEXT_STACK ActivationContextStack; /* 184/0290 */ + UCHAR WorkingOnBehalfOfTicket[8]; /* 19c/02b8 */ + LONG ExceptionCode; /* 1a4/02c0 */ + ACTIVATION_CONTEXT_STACK* ActivationContextStackPointer; /* 1a8/02c8 */ + ULONG_PTR InstrumentationCallbackSp; /* 1ac/02d0 */ + ULONG_PTR InstrumentationCallbackPreviousPc; /* 1b0/02d8 */ + ULONG_PTR InstrumentationCallbackPreviousSp; /* 1b4/02e0 */ +#ifdef _WIN64 + ULONG TxFsContext; /* /02e8 */ + BOOLEAN InstrumentationCallbackDisabled; /* /02ec */ + BOOLEAN UnalignedLoadStoreExceptions; /* /02ed */ +#else + BOOLEAN InstrumentationCallbackDisabled; /* 1b8/ */ + BYTE SpareBytes1[23]; /* 1b9/ */ + ULONG TxFsContext; /* 1d0/ */ +#endif + GDI_TEB_BATCH GdiTebBatch; /* 1d4/02f0 */ + CLIENT_ID RealClientId; /* 6b4/07d8 */ + HANDLE GdiCachedProcessHandle; /* 6bc/07e8 */ + ULONG GdiClientPID; /* 6c0/07f0 */ + ULONG GdiClientTID; /* 6c4/07f4 */ + PVOID GdiThreadLocaleInfo; /* 6c8/07f8 */ + ULONG_PTR Win32ClientInfo[62]; /* 6cc/0800 */ + PVOID glDispatchTable[233]; /* 7c4/09f0 */ + PVOID glReserved1[29]; /* b68/1138 */ + PVOID glReserved2; /* bdc/1220 */ + PVOID glSectionInfo; /* be0/1228 */ + PVOID glSection; /* be4/1230 */ + PVOID glTable; /* be8/1238 */ + PVOID glCurrentRC; /* bec/1240 */ + PVOID glContext; /* bf0/1248 */ + ULONG LastStatusValue; /* bf4/1250 */ + UNICODE_STRING StaticUnicodeString; /* bf8/1258 */ + WCHAR StaticUnicodeBuffer[261]; /* c00/1268 */ + PVOID DeallocationStack; /* e0c/1478 */ + PVOID TlsSlots[64]; /* e10/1480 */ + LIST_ENTRY TlsLinks; /* f10/1680 */ + PVOID Vdm; /* f18/1690 */ + PVOID ReservedForNtRpc; /* f1c/1698 */ + PVOID DbgSsReserved[2]; /* f20/16a0 */ + ULONG HardErrorMode; /* f28/16b0 */ +#ifdef _WIN64 + PVOID Instrumentation[11]; /* /16b8 */ +#else + PVOID Instrumentation[9]; /* f2c/ */ +#endif + GUID ActivityId; /* f50/1710 */ + PVOID SubProcessTag; /* f60/1720 */ + PVOID PerflibData; /* f64/1728 */ + PVOID EtwTraceData; /* f68/1730 */ + PVOID WinSockData; /* f6c/1738 */ + ULONG GdiBatchCount; /* f70/1740 */ + ULONG IdealProcessorValue; /* f74/1744 */ + ULONG GuaranteedStackBytes; /* f78/1748 */ + PVOID ReservedForPerf; /* f7c/1750 */ + PVOID ReservedForOle; /* f80/1758 */ + ULONG WaitingOnLoaderLock; /* f84/1760 */ + PVOID SavedPriorityState; /* f88/1768 */ + ULONG_PTR ReservedForCodeCoverage; /* f8c/1770 */ + PVOID ThreadPoolData; /* f90/1778 */ + PVOID* TlsExpansionSlots; /* f94/1780 */ +#ifdef _WIN64 + union { + PVOID DeallocationBStore; /* /1788 */ + PVOID* ChpeV2CpuAreaInfo; /* /1788 */ + } DUMMYUNIONNAME; + PVOID BStoreLimit; /* /1790 */ +#endif + ULONG MuiGeneration; /* f98/1798 */ + ULONG IsImpersonating; /* f9c/179c */ + PVOID NlsCache; /* fa0/17a0 */ + PVOID ShimData; /* fa4/17a8 */ + ULONG HeapVirtualAffinity; /* fa8/17b0 */ + PVOID CurrentTransactionHandle; /* fac/17b8 */ + TEB_ACTIVE_FRAME* ActiveFrame; /* fb0/17c0 */ + PVOID* FlsSlots; /* fb4/17c8 */ + PVOID PreferredLanguages; /* fb8/17d0 */ + PVOID UserPrefLanguages; /* fbc/17d8 */ + PVOID MergedPrefLanguages; /* fc0/17e0 */ + ULONG MuiImpersonation; /* fc4/17e8 */ + USHORT CrossTebFlags; /* fc8/17ec */ + USHORT SameTebFlags; /* fca/17ee */ + PVOID TxnScopeEnterCallback; /* fcc/17f0 */ + PVOID TxnScopeExitCallback; /* fd0/17f8 */ + PVOID TxnScopeContext; /* fd4/1800 */ + ULONG LockCount; /* fd8/1808 */ + LONG WowTebOffset; /* fdc/180c */ + PVOID ResourceRetValue; /* fe0/1810 */ + PVOID ReservedForWdf; /* fe4/1818 */ + ULONGLONG ReservedForCrt; /* fe8/1820 */ + GUID EffectiveContainerId; /* ff0/1828 */ +} TEB, *PTEB; +static_assert(offsetof(TEB, DeallocationStack) == + 0x1478); /* The only member we care about at the moment */ + +typedef enum _QUEUE_USER_APC_FLAGS { + QueueUserApcFlagsNone, + QueueUserApcFlagsSpecialUserApc, + QueueUserApcFlagsMaxValue +} QUEUE_USER_APC_FLAGS; + +typedef union _USER_APC_OPTION { + ULONG_PTR UserApcFlags; + HANDLE MemoryReserveHandle; +} USER_APC_OPTION, *PUSER_APC_OPTION; + +using PPS_APC_ROUTINE = void (*)(PVOID ApcArgument1, PVOID ApcArgument2, PVOID ApcArgument3, + PCONTEXT Context); + +typedef u64(__stdcall* NtClose_t)(HANDLE Handle); + +typedef u64(__stdcall* NtSetInformationFile_t)(HANDLE FileHandle, PIO_STATUS_BLOCK IoStatusBlock, + PVOID FileInformation, ULONG Length, + FILE_INFORMATION_CLASS FileInformationClass); + +typedef u64(__stdcall* NtCreateThread_t)(PHANDLE ThreadHandle, ACCESS_MASK DesiredAccess, + PCOBJECT_ATTRIBUTES ObjectAttributes, HANDLE ProcessHandle, + PCLIENT_ID ClientId, PCONTEXT ThreadContext, + PINITIAL_TEB InitialTeb, BOOLEAN CreateSuspended); + +typedef u64(__stdcall* NtTerminateThread_t)(HANDLE ThreadHandle, u64 ExitStatus); + +typedef u64(__stdcall* NtQueueApcThreadEx_t)(HANDLE ThreadHandle, + USER_APC_OPTION UserApcReserveHandle, + PPS_APC_ROUTINE ApcRoutine, PVOID ApcArgument1, + PVOID ApcArgument2, PVOID ApcArgument3); + +extern NtClose_t NtClose; +extern NtSetInformationFile_t NtSetInformationFile; +extern NtCreateThread_t NtCreateThread; +extern NtTerminateThread_t NtTerminateThread; +extern NtQueueApcThreadEx_t NtQueueApcThreadEx; + +namespace Common::NtApi { +void Initialize(); +} + +#endif diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp index 9d4cfe36..b2ef4ea1 100755 --- a/src/common/spin_lock.cpp +++ b/src/common/spin_lock.cpp @@ -1,53 +1,53 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/spin_lock.h" - -#if _MSC_VER -#include -#if _M_AMD64 -#define __x86_64__ 1 -#endif -#if _M_ARM64 -#define __aarch64__ 1 -#endif -#else -#if __x86_64__ -#include -#endif -#endif - -namespace { - -void ThreadPause() { -#if __x86_64__ - _mm_pause(); -#elif __aarch64__ && _MSC_VER - __yield(); -#elif __aarch64__ - asm("yield"); -#endif -} - -} // Anonymous namespace - -namespace Common { - -void SpinLock::lock() { - while (lck.test_and_set(std::memory_order_acquire)) { - ThreadPause(); - } -} - -void SpinLock::unlock() { - lck.clear(std::memory_order_release); -} - -bool SpinLock::try_lock() { - if (lck.test_and_set(std::memory_order_acquire)) { - return false; - } - return true; -} - -} // namespace Common +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/spin_lock.h" + +#if _MSC_VER +#include +#if _M_AMD64 +#define __x86_64__ 1 +#endif +#if _M_ARM64 +#define __aarch64__ 1 +#endif +#else +#if __x86_64__ +#include +#endif +#endif + +namespace { + +void ThreadPause() { +#if __x86_64__ + _mm_pause(); +#elif __aarch64__ && _MSC_VER + __yield(); +#elif __aarch64__ + asm("yield"); +#endif +} + +} // Anonymous namespace + +namespace Common { + +void SpinLock::lock() { + while (lck.test_and_set(std::memory_order_acquire)) { + ThreadPause(); + } +} + +void SpinLock::unlock() { + lck.clear(std::memory_order_release); +} + +bool SpinLock::try_lock() { + if (lck.test_and_set(std::memory_order_acquire)) { + return false; + } + return true; +} + +} // namespace Common diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h index 3229a8c6..a8327485 100755 --- a/src/common/spin_lock.h +++ b/src/common/spin_lock.h @@ -1,33 +1,33 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include - -namespace Common { - -/** - * SpinLock class - * a lock similar to mutex that forces a thread to spin wait instead calling the - * supervisor. Should be used on short sequences of code. - */ -class SpinLock { -public: - SpinLock() = default; - - SpinLock(const SpinLock&) = delete; - SpinLock& operator=(const SpinLock&) = delete; - - SpinLock(SpinLock&&) = delete; - SpinLock& operator=(SpinLock&&) = delete; - - void lock(); - void unlock(); - [[nodiscard]] bool try_lock(); - -private: - std::atomic_flag lck = ATOMIC_FLAG_INIT; -}; - -} // namespace Common +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace Common { + +/** + * SpinLock class + * a lock similar to mutex that forces a thread to spin wait instead calling the + * supervisor. Should be used on short sequences of code. + */ +class SpinLock { +public: + SpinLock() = default; + + SpinLock(const SpinLock&) = delete; + SpinLock& operator=(const SpinLock&) = delete; + + SpinLock(SpinLock&&) = delete; + SpinLock& operator=(SpinLock&&) = delete; + + void lock(); + void unlock(); + [[nodiscard]] bool try_lock(); + +private: + std::atomic_flag lck = ATOMIC_FLAG_INIT; +}; + +} // namespace Common diff --git a/src/common/unique_function.h b/src/common/unique_function.h index 1891ec3c..c15d8834 100755 --- a/src/common/unique_function.h +++ b/src/common/unique_function.h @@ -1,61 +1,61 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include - -namespace Common { - -/// General purpose function wrapper similar to std::function. -/// Unlike std::function, the captured values don't have to be copyable. -/// This class can be moved but not copied. -template -class UniqueFunction { - class CallableBase { - public: - virtual ~CallableBase() = default; - virtual ResultType operator()(Args&&...) = 0; - }; - - template - class Callable final : public CallableBase { - public: - Callable(Functor&& functor_) : functor{std::move(functor_)} {} - ~Callable() override = default; - - ResultType operator()(Args&&... args) override { - return functor(std::forward(args)...); - } - - private: - Functor functor; - }; - -public: - UniqueFunction() = default; - - template - UniqueFunction(Functor&& functor) - : callable{std::make_unique>(std::move(functor))} {} - - UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default; - UniqueFunction(UniqueFunction&& rhs) noexcept = default; - - UniqueFunction& operator=(const UniqueFunction&) = delete; - UniqueFunction(const UniqueFunction&) = delete; - - ResultType operator()(Args&&... args) const { - return (*callable)(std::forward(args)...); - } - - explicit operator bool() const noexcept { - return static_cast(callable); - } - -private: - std::unique_ptr callable; -}; - -} // namespace Common +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +namespace Common { + +/// General purpose function wrapper similar to std::function. +/// Unlike std::function, the captured values don't have to be copyable. +/// This class can be moved but not copied. +template +class UniqueFunction { + class CallableBase { + public: + virtual ~CallableBase() = default; + virtual ResultType operator()(Args&&...) = 0; + }; + + template + class Callable final : public CallableBase { + public: + Callable(Functor&& functor_) : functor{std::move(functor_)} {} + ~Callable() override = default; + + ResultType operator()(Args&&... args) override { + return functor(std::forward(args)...); + } + + private: + Functor functor; + }; + +public: + UniqueFunction() = default; + + template + UniqueFunction(Functor&& functor) + : callable{std::make_unique>(std::move(functor))} {} + + UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default; + UniqueFunction(UniqueFunction&& rhs) noexcept = default; + + UniqueFunction& operator=(const UniqueFunction&) = delete; + UniqueFunction(const UniqueFunction&) = delete; + + ResultType operator()(Args&&... args) const { + return (*callable)(std::forward(args)...); + } + + explicit operator bool() const noexcept { + return static_cast(callable); + } + +private: + std::unique_ptr callable; +}; + +} // namespace Common diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 27809984..7bb81b61 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -1,484 +1,484 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "fiber.h" - -#include "common/logging/log.h" -#include "core/libraries/fiber/fiber_error.h" -#include "core/libraries/libs.h" -#include "core/tls.h" - -namespace Libraries::Fiber { - -static constexpr u32 kFiberSignature0 = 0xdef1649c; -static constexpr u32 kFiberSignature1 = 0xb37592a0; -static constexpr u32 kFiberOptSignature = 0xbb40e64d; -static constexpr u64 kFiberStackSignature = 0x7149f2ca7149f2ca; -static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef; - -static std::atomic context_size_check = false; - -OrbisFiberContext* GetFiberContext() { - return Core::GetTcbBase()->tcb_fiber; -} - -extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); -extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp"); -extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, - bool set_fpu) asm("_sceFiberSwitchEntry"); -extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit"); - -extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) { - OrbisFiberContext* g_ctx = GetFiberContext(); - g_ctx->return_val = ret; - _sceFiberLongJmp(g_ctx); -} - -void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) { - u64* stack_base = reinterpret_cast(ctx->current_fiber->addr_context); - if (stack_base && *stack_base != kFiberStackSignature) { - UNREACHABLE_MSG("Stack overflow detected in fiber."); - } -} - -void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, - OrbisFiberContext* ctx) { - OrbisFiberContext* fiber_ctx = fiber->context; - if (fiber_ctx) { - ctx->arg_on_run_to = arg_on_run_to; - _sceFiberLongJmp(fiber_ctx); - __builtin_trap(); - } - - OrbisFiberData data{}; - if (ctx->prev_fiber) { - OrbisFiber* prev_fiber = ctx->prev_fiber; - ctx->prev_fiber = nullptr; - data.state = reinterpret_cast(&prev_fiber->state); - } else { - data.state = nullptr; - } - - data.entry = fiber->entry; - data.arg_on_initialize = fiber->arg_on_initialize; - data.arg_on_run_to = arg_on_run_to; - data.stack_addr = - reinterpret_cast(reinterpret_cast(fiber->addr_context) + fiber->size_context); - if (fiber->flags & FiberFlags::SetFpuRegs) { - data.fpucw = 0x037f; - data.mxcsr = 0x9fc0; - _sceFiberSwitchEntry(&data, true); - } else { - _sceFiberSwitchEntry(&data, false); - } - - __builtin_trap(); -} - -void PS4_SYSV_ABI _sceFiberSwitch(OrbisFiber* cur_fiber, OrbisFiber* fiber, u64 arg_on_run_to, - OrbisFiberContext* ctx) { - ctx->prev_fiber = cur_fiber; - ctx->current_fiber = fiber; - - if (fiber->addr_context == nullptr) { - ctx->prev_fiber = nullptr; - - OrbisFiberData data{}; - data.entry = fiber->entry; - data.arg_on_initialize = fiber->arg_on_initialize; - data.arg_on_run_to = arg_on_run_to; - data.stack_addr = reinterpret_cast(ctx->rsp & ~15); - data.state = reinterpret_cast(&cur_fiber->state); - - if (fiber->flags & FiberFlags::SetFpuRegs) { - data.fpucw = 0x037f; - data.mxcsr = 0x9fc0; - _sceFiberSwitchEntry(&data, true); - } else { - _sceFiberSwitchEntry(&data, false); - } - - __builtin_trap(); - } - - _sceFiberSwitchToFiber(fiber, arg_on_run_to, ctx); - __builtin_trap(); -} - -void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, OrbisFiberContext* ctx) { - ctx->arg_on_return = arg_on_return; - _sceFiberLongJmp(ctx); - __builtin_trap(); -} - -s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, - u64 arg_on_initialize, void* addr_context, u64 size_context, - const OrbisFiberOptParam* opt_param, u32 build_ver) { - if (!fiber || !name || !entry) { - return ORBIS_FIBER_ERROR_NULL; - } - if ((u64)fiber & 7 || (u64)addr_context & 15) { - return ORBIS_FIBER_ERROR_ALIGNMENT; - } - if (opt_param && (u64)opt_param & 7) { - return ORBIS_FIBER_ERROR_ALIGNMENT; - } - if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) { - return ORBIS_FIBER_ERROR_RANGE; - } - if (size_context & 15) { - return ORBIS_FIBER_ERROR_INVALID; - } - if (!addr_context && size_context) { - return ORBIS_FIBER_ERROR_INVALID; - } - if (addr_context && !size_context) { - return ORBIS_FIBER_ERROR_INVALID; - } - if (opt_param && opt_param->magic != kFiberOptSignature) { - return ORBIS_FIBER_ERROR_INVALID; - } - - u32 flags = FiberFlags::None; - if (build_ver >= 0x3500000) { - flags |= FiberFlags::SetFpuRegs; - } - if (context_size_check) { - flags |= FiberFlags::ContextSizeCheck; - } - - strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); - - fiber->entry = entry; - fiber->arg_on_initialize = arg_on_initialize; - fiber->addr_context = addr_context; - fiber->size_context = size_context; - fiber->context = nullptr; - fiber->flags = flags; - - /* - A low stack area is problematic, as we can easily - cause a stack overflow with our HLE. - */ - if (size_context && size_context <= 4096) { - LOG_WARNING(Lib_Fiber, "Fiber initialized with small stack area."); - } - - fiber->magic_start = kFiberSignature0; - fiber->magic_end = kFiberSignature1; - - if (addr_context != nullptr) { - fiber->context_start = addr_context; - fiber->context_end = - reinterpret_cast(reinterpret_cast(addr_context) + size_context); - - /* Apply signature to start of stack */ - *(u64*)addr_context = kFiberStackSignature; - - if (flags & FiberFlags::ContextSizeCheck) { - u64* stack_start = reinterpret_cast(fiber->context_start); - u64* stack_end = reinterpret_cast(fiber->context_end); - - u64* stack_ptr = stack_start + 1; - while (stack_ptr < stack_end) { - *stack_ptr++ = kFiberStackSizeCheck; - } - } - } - - fiber->state = FiberState::Idle; - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param) { - if (!opt_param) { - return ORBIS_FIBER_ERROR_NULL; - } - if ((u64)opt_param & 7) { - return ORBIS_FIBER_ERROR_ALIGNMENT; - } - - opt_param->magic = kFiberOptSignature; - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) { - if (!fiber) { - return ORBIS_FIBER_ERROR_NULL; - } - if ((u64)fiber & 7) { - return ORBIS_FIBER_ERROR_ALIGNMENT; - } - if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { - return ORBIS_FIBER_ERROR_INVALID; - } - - FiberState expected = FiberState::Idle; - if (!fiber->state.compare_exchange_strong(expected, FiberState::Terminated)) { - return ORBIS_FIBER_ERROR_STATE; - } - - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { - if (!fiber) { - return ORBIS_FIBER_ERROR_NULL; - } - if ((u64)fiber & 7) { - return ORBIS_FIBER_ERROR_ALIGNMENT; - } - if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { - return ORBIS_FIBER_ERROR_INVALID; - } - - Core::Tcb* tcb = Core::GetTcbBase(); - if (tcb->tcb_fiber) { - return ORBIS_FIBER_ERROR_PERMISSION; - } - - FiberState expected = FiberState::Idle; - if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { - return ORBIS_FIBER_ERROR_STATE; - } - - OrbisFiberContext ctx{}; - ctx.current_fiber = fiber; - ctx.prev_fiber = nullptr; - ctx.return_val = 0; - - tcb->tcb_fiber = &ctx; - - s32 jmp = _sceFiberSetJmp(&ctx); - if (!jmp) { - if (fiber->addr_context) { - _sceFiberSwitchToFiber(fiber, arg_on_run_to, &ctx); - __builtin_trap(); - } - - OrbisFiberData data{}; - data.entry = fiber->entry; - data.arg_on_initialize = fiber->arg_on_initialize; - data.arg_on_run_to = arg_on_run_to; - data.stack_addr = reinterpret_cast(ctx.rsp & ~15); - data.state = nullptr; - if (fiber->flags & FiberFlags::SetFpuRegs) { - data.fpucw = 0x037f; - data.mxcsr = 0x9fc0; - _sceFiberSwitchEntry(&data, true); - } else { - _sceFiberSwitchEntry(&data, false); - } - } - - OrbisFiber* cur_fiber = ctx.current_fiber; - ctx.current_fiber = nullptr; - cur_fiber->state = FiberState::Idle; - - if (ctx.return_val != 0) { - /* Fiber entry returned! This should never happen. */ - UNREACHABLE_MSG("Fiber entry function returned."); - } - - if (arg_on_return) { - *arg_on_return = ctx.arg_on_return; - } - - tcb->tcb_fiber = nullptr; - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { - if (!fiber) { - return ORBIS_FIBER_ERROR_NULL; - } - if ((u64)fiber & 7) { - return ORBIS_FIBER_ERROR_ALIGNMENT; - } - if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { - return ORBIS_FIBER_ERROR_INVALID; - } - - OrbisFiberContext* g_ctx = GetFiberContext(); - if (!g_ctx) { - return ORBIS_FIBER_ERROR_PERMISSION; - } - - FiberState expected = FiberState::Idle; - if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { - return ORBIS_FIBER_ERROR_STATE; - } - - OrbisFiber* cur_fiber = g_ctx->current_fiber; - if (cur_fiber->addr_context == nullptr) { - _sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx); - __builtin_trap(); - } - - OrbisFiberContext ctx{}; - s32 jmp = _sceFiberSetJmp(&ctx); - if (!jmp) { - cur_fiber->context = &ctx; - _sceFiberCheckStackOverflow(g_ctx); - _sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx); - __builtin_trap(); - } - - g_ctx = GetFiberContext(); - if (g_ctx->prev_fiber) { - g_ctx->prev_fiber->state = FiberState::Idle; - g_ctx->prev_fiber = nullptr; - } - - if (arg_on_run) { - *arg_on_run = g_ctx->arg_on_run_to; - } - - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber) { - if (!fiber) { - return ORBIS_FIBER_ERROR_NULL; - } - - OrbisFiberContext* g_ctx = GetFiberContext(); - if (!g_ctx) { - return ORBIS_FIBER_ERROR_PERMISSION; - } - - *fiber = g_ctx->current_fiber; - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run) { - OrbisFiberContext* g_ctx = GetFiberContext(); - if (!g_ctx) { - return ORBIS_FIBER_ERROR_PERMISSION; - } - - OrbisFiber* cur_fiber = g_ctx->current_fiber; - if (cur_fiber->addr_context) { - OrbisFiberContext ctx{}; - s32 jmp = _sceFiberSetJmp(&ctx); - if (jmp) { - g_ctx = GetFiberContext(); - if (g_ctx->prev_fiber) { - g_ctx->prev_fiber->state = FiberState::Idle; - g_ctx->prev_fiber = nullptr; - } - if (arg_on_run) { - *arg_on_run = g_ctx->arg_on_run_to; - } - return ORBIS_OK; - } - - cur_fiber->context = &ctx; - _sceFiberCheckStackOverflow(g_ctx); - } - - _sceFiberTerminate(cur_fiber, arg_on_return, g_ctx); - __builtin_trap(); -} - -s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info) { - if (!fiber || !fiber_info) { - return ORBIS_FIBER_ERROR_NULL; - } - if ((u64)fiber & 7 || (u64)fiber_info & 7) { - return ORBIS_FIBER_ERROR_ALIGNMENT; - } - if (fiber_info->size != sizeof(OrbisFiberInfo)) { - return ORBIS_FIBER_ERROR_INVALID; - } - if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { - return ORBIS_FIBER_ERROR_INVALID; - } - - fiber_info->entry = fiber->entry; - fiber_info->arg_on_initialize = fiber->arg_on_initialize; - fiber_info->addr_context = fiber->addr_context; - fiber_info->size_context = fiber->size_context; - strncpy(fiber_info->name, fiber->name, ORBIS_FIBER_MAX_NAME_LENGTH); - - fiber_info->size_context_margin = -1; - if (fiber->flags & FiberFlags::ContextSizeCheck && fiber->addr_context != nullptr) { - u64 stack_margin = 0; - u64* stack_start = reinterpret_cast(fiber->context_start); - u64* stack_end = reinterpret_cast(fiber->context_end); - - if (*stack_start == kFiberStackSignature) { - u64* stack_ptr = stack_start + 1; - while (stack_ptr < stack_end) { - if (*stack_ptr == kFiberStackSizeCheck) { - stack_ptr++; - } - } - - stack_margin = - reinterpret_cast(stack_ptr) - reinterpret_cast(stack_start + 1); - } - - fiber_info->size_context_margin = stack_margin; - } - - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags) { - if (flags != 0) { - return ORBIS_FIBER_ERROR_INVALID; - } - - u32 expected = 0; - if (!context_size_check.compare_exchange_strong(expected, 1u)) { - return ORBIS_FIBER_ERROR_STATE; - } - - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck() { - u32 expected = 1; - if (!context_size_check.compare_exchange_strong(expected, 0u)) { - return ORBIS_FIBER_ERROR_STATE; - } - - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) { - if (!fiber || !name) { - return ORBIS_FIBER_ERROR_NULL; - } - if ((u64)fiber & 7) { - return ORBIS_FIBER_ERROR_ALIGNMENT; - } - if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { - return ORBIS_FIBER_ERROR_INVALID; - } - - strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); - return ORBIS_OK; -} - -void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { - LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); - LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); - LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize); - LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize); - - LIB_FUNCTION("a0LLrZWac0M", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRun); - LIB_FUNCTION("PFT2S-tJ7Uk", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberSwitch); - LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf); - LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread); - - LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo); - LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1, - sceFiberStartContextSizeCheck); - LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1, - sceFiberStopContextSizeCheck); - LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename); -} - -} // namespace Libraries::Fiber +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "fiber.h" + +#include "common/logging/log.h" +#include "core/libraries/fiber/fiber_error.h" +#include "core/libraries/libs.h" +#include "core/tls.h" + +namespace Libraries::Fiber { + +static constexpr u32 kFiberSignature0 = 0xdef1649c; +static constexpr u32 kFiberSignature1 = 0xb37592a0; +static constexpr u32 kFiberOptSignature = 0xbb40e64d; +static constexpr u64 kFiberStackSignature = 0x7149f2ca7149f2ca; +static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef; + +static std::atomic context_size_check = false; + +OrbisFiberContext* GetFiberContext() { + return Core::GetTcbBase()->tcb_fiber; +} + +extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); +extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp"); +extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, + bool set_fpu) asm("_sceFiberSwitchEntry"); +extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit"); + +extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) { + OrbisFiberContext* g_ctx = GetFiberContext(); + g_ctx->return_val = ret; + _sceFiberLongJmp(g_ctx); +} + +void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) { + u64* stack_base = reinterpret_cast(ctx->current_fiber->addr_context); + if (stack_base && *stack_base != kFiberStackSignature) { + UNREACHABLE_MSG("Stack overflow detected in fiber."); + } +} + +void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, + OrbisFiberContext* ctx) { + OrbisFiberContext* fiber_ctx = fiber->context; + if (fiber_ctx) { + ctx->arg_on_run_to = arg_on_run_to; + _sceFiberLongJmp(fiber_ctx); + __builtin_trap(); + } + + OrbisFiberData data{}; + if (ctx->prev_fiber) { + OrbisFiber* prev_fiber = ctx->prev_fiber; + ctx->prev_fiber = nullptr; + data.state = reinterpret_cast(&prev_fiber->state); + } else { + data.state = nullptr; + } + + data.entry = fiber->entry; + data.arg_on_initialize = fiber->arg_on_initialize; + data.arg_on_run_to = arg_on_run_to; + data.stack_addr = + reinterpret_cast(reinterpret_cast(fiber->addr_context) + fiber->size_context); + if (fiber->flags & FiberFlags::SetFpuRegs) { + data.fpucw = 0x037f; + data.mxcsr = 0x9fc0; + _sceFiberSwitchEntry(&data, true); + } else { + _sceFiberSwitchEntry(&data, false); + } + + __builtin_trap(); +} + +void PS4_SYSV_ABI _sceFiberSwitch(OrbisFiber* cur_fiber, OrbisFiber* fiber, u64 arg_on_run_to, + OrbisFiberContext* ctx) { + ctx->prev_fiber = cur_fiber; + ctx->current_fiber = fiber; + + if (fiber->addr_context == nullptr) { + ctx->prev_fiber = nullptr; + + OrbisFiberData data{}; + data.entry = fiber->entry; + data.arg_on_initialize = fiber->arg_on_initialize; + data.arg_on_run_to = arg_on_run_to; + data.stack_addr = reinterpret_cast(ctx->rsp & ~15); + data.state = reinterpret_cast(&cur_fiber->state); + + if (fiber->flags & FiberFlags::SetFpuRegs) { + data.fpucw = 0x037f; + data.mxcsr = 0x9fc0; + _sceFiberSwitchEntry(&data, true); + } else { + _sceFiberSwitchEntry(&data, false); + } + + __builtin_trap(); + } + + _sceFiberSwitchToFiber(fiber, arg_on_run_to, ctx); + __builtin_trap(); +} + +void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, OrbisFiberContext* ctx) { + ctx->arg_on_return = arg_on_return; + _sceFiberLongJmp(ctx); + __builtin_trap(); +} + +s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, + u64 arg_on_initialize, void* addr_context, u64 size_context, + const OrbisFiberOptParam* opt_param, u32 build_ver) { + if (!fiber || !name || !entry) { + return ORBIS_FIBER_ERROR_NULL; + } + if ((u64)fiber & 7 || (u64)addr_context & 15) { + return ORBIS_FIBER_ERROR_ALIGNMENT; + } + if (opt_param && (u64)opt_param & 7) { + return ORBIS_FIBER_ERROR_ALIGNMENT; + } + if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) { + return ORBIS_FIBER_ERROR_RANGE; + } + if (size_context & 15) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (!addr_context && size_context) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (addr_context && !size_context) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (opt_param && opt_param->magic != kFiberOptSignature) { + return ORBIS_FIBER_ERROR_INVALID; + } + + u32 flags = FiberFlags::None; + if (build_ver >= 0x3500000) { + flags |= FiberFlags::SetFpuRegs; + } + if (context_size_check) { + flags |= FiberFlags::ContextSizeCheck; + } + + strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); + + fiber->entry = entry; + fiber->arg_on_initialize = arg_on_initialize; + fiber->addr_context = addr_context; + fiber->size_context = size_context; + fiber->context = nullptr; + fiber->flags = flags; + + /* + A low stack area is problematic, as we can easily + cause a stack overflow with our HLE. + */ + if (size_context && size_context <= 4096) { + LOG_WARNING(Lib_Fiber, "Fiber initialized with small stack area."); + } + + fiber->magic_start = kFiberSignature0; + fiber->magic_end = kFiberSignature1; + + if (addr_context != nullptr) { + fiber->context_start = addr_context; + fiber->context_end = + reinterpret_cast(reinterpret_cast(addr_context) + size_context); + + /* Apply signature to start of stack */ + *(u64*)addr_context = kFiberStackSignature; + + if (flags & FiberFlags::ContextSizeCheck) { + u64* stack_start = reinterpret_cast(fiber->context_start); + u64* stack_end = reinterpret_cast(fiber->context_end); + + u64* stack_ptr = stack_start + 1; + while (stack_ptr < stack_end) { + *stack_ptr++ = kFiberStackSizeCheck; + } + } + } + + fiber->state = FiberState::Idle; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param) { + if (!opt_param) { + return ORBIS_FIBER_ERROR_NULL; + } + if ((u64)opt_param & 7) { + return ORBIS_FIBER_ERROR_ALIGNMENT; + } + + opt_param->magic = kFiberOptSignature; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) { + if (!fiber) { + return ORBIS_FIBER_ERROR_NULL; + } + if ((u64)fiber & 7) { + return ORBIS_FIBER_ERROR_ALIGNMENT; + } + if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { + return ORBIS_FIBER_ERROR_INVALID; + } + + FiberState expected = FiberState::Idle; + if (!fiber->state.compare_exchange_strong(expected, FiberState::Terminated)) { + return ORBIS_FIBER_ERROR_STATE; + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { + if (!fiber) { + return ORBIS_FIBER_ERROR_NULL; + } + if ((u64)fiber & 7) { + return ORBIS_FIBER_ERROR_ALIGNMENT; + } + if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { + return ORBIS_FIBER_ERROR_INVALID; + } + + Core::Tcb* tcb = Core::GetTcbBase(); + if (tcb->tcb_fiber) { + return ORBIS_FIBER_ERROR_PERMISSION; + } + + FiberState expected = FiberState::Idle; + if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { + return ORBIS_FIBER_ERROR_STATE; + } + + OrbisFiberContext ctx{}; + ctx.current_fiber = fiber; + ctx.prev_fiber = nullptr; + ctx.return_val = 0; + + tcb->tcb_fiber = &ctx; + + s32 jmp = _sceFiberSetJmp(&ctx); + if (!jmp) { + if (fiber->addr_context) { + _sceFiberSwitchToFiber(fiber, arg_on_run_to, &ctx); + __builtin_trap(); + } + + OrbisFiberData data{}; + data.entry = fiber->entry; + data.arg_on_initialize = fiber->arg_on_initialize; + data.arg_on_run_to = arg_on_run_to; + data.stack_addr = reinterpret_cast(ctx.rsp & ~15); + data.state = nullptr; + if (fiber->flags & FiberFlags::SetFpuRegs) { + data.fpucw = 0x037f; + data.mxcsr = 0x9fc0; + _sceFiberSwitchEntry(&data, true); + } else { + _sceFiberSwitchEntry(&data, false); + } + } + + OrbisFiber* cur_fiber = ctx.current_fiber; + ctx.current_fiber = nullptr; + cur_fiber->state = FiberState::Idle; + + if (ctx.return_val != 0) { + /* Fiber entry returned! This should never happen. */ + UNREACHABLE_MSG("Fiber entry function returned."); + } + + if (arg_on_return) { + *arg_on_return = ctx.arg_on_return; + } + + tcb->tcb_fiber = nullptr; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { + if (!fiber) { + return ORBIS_FIBER_ERROR_NULL; + } + if ((u64)fiber & 7) { + return ORBIS_FIBER_ERROR_ALIGNMENT; + } + if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { + return ORBIS_FIBER_ERROR_INVALID; + } + + OrbisFiberContext* g_ctx = GetFiberContext(); + if (!g_ctx) { + return ORBIS_FIBER_ERROR_PERMISSION; + } + + FiberState expected = FiberState::Idle; + if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { + return ORBIS_FIBER_ERROR_STATE; + } + + OrbisFiber* cur_fiber = g_ctx->current_fiber; + if (cur_fiber->addr_context == nullptr) { + _sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx); + __builtin_trap(); + } + + OrbisFiberContext ctx{}; + s32 jmp = _sceFiberSetJmp(&ctx); + if (!jmp) { + cur_fiber->context = &ctx; + _sceFiberCheckStackOverflow(g_ctx); + _sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx); + __builtin_trap(); + } + + g_ctx = GetFiberContext(); + if (g_ctx->prev_fiber) { + g_ctx->prev_fiber->state = FiberState::Idle; + g_ctx->prev_fiber = nullptr; + } + + if (arg_on_run) { + *arg_on_run = g_ctx->arg_on_run_to; + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber) { + if (!fiber) { + return ORBIS_FIBER_ERROR_NULL; + } + + OrbisFiberContext* g_ctx = GetFiberContext(); + if (!g_ctx) { + return ORBIS_FIBER_ERROR_PERMISSION; + } + + *fiber = g_ctx->current_fiber; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run) { + OrbisFiberContext* g_ctx = GetFiberContext(); + if (!g_ctx) { + return ORBIS_FIBER_ERROR_PERMISSION; + } + + OrbisFiber* cur_fiber = g_ctx->current_fiber; + if (cur_fiber->addr_context) { + OrbisFiberContext ctx{}; + s32 jmp = _sceFiberSetJmp(&ctx); + if (jmp) { + g_ctx = GetFiberContext(); + if (g_ctx->prev_fiber) { + g_ctx->prev_fiber->state = FiberState::Idle; + g_ctx->prev_fiber = nullptr; + } + if (arg_on_run) { + *arg_on_run = g_ctx->arg_on_run_to; + } + return ORBIS_OK; + } + + cur_fiber->context = &ctx; + _sceFiberCheckStackOverflow(g_ctx); + } + + _sceFiberTerminate(cur_fiber, arg_on_return, g_ctx); + __builtin_trap(); +} + +s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info) { + if (!fiber || !fiber_info) { + return ORBIS_FIBER_ERROR_NULL; + } + if ((u64)fiber & 7 || (u64)fiber_info & 7) { + return ORBIS_FIBER_ERROR_ALIGNMENT; + } + if (fiber_info->size != sizeof(OrbisFiberInfo)) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { + return ORBIS_FIBER_ERROR_INVALID; + } + + fiber_info->entry = fiber->entry; + fiber_info->arg_on_initialize = fiber->arg_on_initialize; + fiber_info->addr_context = fiber->addr_context; + fiber_info->size_context = fiber->size_context; + strncpy(fiber_info->name, fiber->name, ORBIS_FIBER_MAX_NAME_LENGTH); + + fiber_info->size_context_margin = -1; + if (fiber->flags & FiberFlags::ContextSizeCheck && fiber->addr_context != nullptr) { + u64 stack_margin = 0; + u64* stack_start = reinterpret_cast(fiber->context_start); + u64* stack_end = reinterpret_cast(fiber->context_end); + + if (*stack_start == kFiberStackSignature) { + u64* stack_ptr = stack_start + 1; + while (stack_ptr < stack_end) { + if (*stack_ptr == kFiberStackSizeCheck) { + stack_ptr++; + } + } + + stack_margin = + reinterpret_cast(stack_ptr) - reinterpret_cast(stack_start + 1); + } + + fiber_info->size_context_margin = stack_margin; + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags) { + if (flags != 0) { + return ORBIS_FIBER_ERROR_INVALID; + } + + u32 expected = 0; + if (!context_size_check.compare_exchange_strong(expected, 1u)) { + return ORBIS_FIBER_ERROR_STATE; + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck() { + u32 expected = 1; + if (!context_size_check.compare_exchange_strong(expected, 0u)) { + return ORBIS_FIBER_ERROR_STATE; + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) { + if (!fiber || !name) { + return ORBIS_FIBER_ERROR_NULL; + } + if ((u64)fiber & 7) { + return ORBIS_FIBER_ERROR_ALIGNMENT; + } + if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { + return ORBIS_FIBER_ERROR_INVALID; + } + + strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); + return ORBIS_OK; +} + +void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); + LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); + LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize); + LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize); + + LIB_FUNCTION("a0LLrZWac0M", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRun); + LIB_FUNCTION("PFT2S-tJ7Uk", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberSwitch); + LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf); + LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread); + + LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo); + LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberStartContextSizeCheck); + LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberStopContextSizeCheck); + LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename); +} + +} // namespace Libraries::Fiber diff --git a/src/core/libraries/fiber/fiber.h b/src/core/libraries/fiber/fiber.h index 325522fc..3c4e3b70 100644 --- a/src/core/libraries/fiber/fiber.h +++ b/src/core/libraries/fiber/fiber.h @@ -1,118 +1,118 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "common/assert.h" -#include "common/types.h" - -#include - -namespace Core::Loader { -class SymbolsResolver; -} -namespace Libraries::Fiber { - -#define ORBIS_FIBER_MAX_NAME_LENGTH (31) -#define ORBIS_FIBER_CONTEXT_MINIMUM_SIZE (512) - -typedef void PS4_SYSV_ABI (*OrbisFiberEntry)(u64 arg_on_initialize, u64 arg_on_run); - -enum FiberState : u32 { - Run = 1u, - Idle = 2u, - Terminated = 3u, -}; - -enum FiberFlags : u32 { - None = 0x0, - NoUlobjmgr = 0x1, - ContextSizeCheck = 0x10, - SetFpuRegs = 0x100, -}; - -struct OrbisFiber; - -struct OrbisFiberContext { - struct { - u64 rax, rcx, rdx, rbx, rsp, rbp, r8, r9, r10, r11, r12, r13, r14, r15; - u16 fpucw; - u32 mxcsr; - }; - OrbisFiber* current_fiber; - OrbisFiber* prev_fiber; - u64 arg_on_run_to; - u64 arg_on_return; - u64 return_val; -}; - -struct OrbisFiberData { - OrbisFiberEntry entry; - u64 arg_on_initialize; - u64 arg_on_run_to; - void* stack_addr; - u32* state; - u16 fpucw; - s8 pad[2]; - u32 mxcsr; -}; - -struct OrbisFiber { - u32 magic_start; - std::atomic state; - OrbisFiberEntry entry; - u64 arg_on_initialize; - void* addr_context; - u64 size_context; - char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1]; - OrbisFiberContext* context; - u32 flags; - void* context_start; - void* context_end; - u32 magic_end; -}; -static_assert(sizeof(OrbisFiber) <= 256); - -struct OrbisFiberInfo { - u64 size; - OrbisFiberEntry entry; - u64 arg_on_initialize; - void* addr_context; - u64 size_context; - char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1]; - u64 size_context_margin; - u8 pad[48]; -}; -static_assert(sizeof(OrbisFiberInfo) == 128); - -struct OrbisFiberOptParam { - u32 magic; -}; -static_assert(sizeof(OrbisFiberOptParam) <= 128); - -s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, - u64 arg_on_initialize, void* addr_context, u64 size_context, - const OrbisFiberOptParam* opt_param, u32 build_version); - -s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param); - -s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber); - -s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return); - -s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run); - -s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber); - -s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run); - -s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info); - -s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags); - -s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void); - -s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name); - -void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym); +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/types.h" + +#include + +namespace Core::Loader { +class SymbolsResolver; +} +namespace Libraries::Fiber { + +#define ORBIS_FIBER_MAX_NAME_LENGTH (31) +#define ORBIS_FIBER_CONTEXT_MINIMUM_SIZE (512) + +typedef void PS4_SYSV_ABI (*OrbisFiberEntry)(u64 arg_on_initialize, u64 arg_on_run); + +enum FiberState : u32 { + Run = 1u, + Idle = 2u, + Terminated = 3u, +}; + +enum FiberFlags : u32 { + None = 0x0, + NoUlobjmgr = 0x1, + ContextSizeCheck = 0x10, + SetFpuRegs = 0x100, +}; + +struct OrbisFiber; + +struct OrbisFiberContext { + struct { + u64 rax, rcx, rdx, rbx, rsp, rbp, r8, r9, r10, r11, r12, r13, r14, r15; + u16 fpucw; + u32 mxcsr; + }; + OrbisFiber* current_fiber; + OrbisFiber* prev_fiber; + u64 arg_on_run_to; + u64 arg_on_return; + u64 return_val; +}; + +struct OrbisFiberData { + OrbisFiberEntry entry; + u64 arg_on_initialize; + u64 arg_on_run_to; + void* stack_addr; + u32* state; + u16 fpucw; + s8 pad[2]; + u32 mxcsr; +}; + +struct OrbisFiber { + u32 magic_start; + std::atomic state; + OrbisFiberEntry entry; + u64 arg_on_initialize; + void* addr_context; + u64 size_context; + char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1]; + OrbisFiberContext* context; + u32 flags; + void* context_start; + void* context_end; + u32 magic_end; +}; +static_assert(sizeof(OrbisFiber) <= 256); + +struct OrbisFiberInfo { + u64 size; + OrbisFiberEntry entry; + u64 arg_on_initialize; + void* addr_context; + u64 size_context; + char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1]; + u64 size_context_margin; + u8 pad[48]; +}; +static_assert(sizeof(OrbisFiberInfo) == 128); + +struct OrbisFiberOptParam { + u32 magic; +}; +static_assert(sizeof(OrbisFiberOptParam) <= 128); + +s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, + u64 arg_on_initialize, void* addr_context, u64 size_context, + const OrbisFiberOptParam* opt_param, u32 build_version); + +s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param); + +s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber); + +s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return); + +s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run); + +s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber); + +s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run); + +s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info); + +s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags); + +s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void); + +s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name); + +void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Fiber \ No newline at end of file diff --git a/src/core/libraries/fiber/fiber_context.s b/src/core/libraries/fiber/fiber_context.s index 44e51d38..04fe9358 100644 --- a/src/core/libraries/fiber/fiber_context.s +++ b/src/core/libraries/fiber/fiber_context.s @@ -1,121 +1,121 @@ -# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -# SPDX-License-Identifier: GPL-2.0-or-later - -.global _sceFiberSetJmp -_sceFiberSetJmp: - movq %rax, 0x0(%rdi) - - movq (%rsp), %rdx - movq %rdx, 0x10(%rdi) - - movq %rcx, 0x08(%rdi) - movq %rbx, 0x18(%rdi) - movq %rsp, 0x20(%rdi) - movq %rbp, 0x28(%rdi) - - movq %r8, 0x30(%rdi) - movq %r9, 0x38(%rdi) - movq %r10, 0x40(%rdi) - movq %r11, 0x48(%rdi) - movq %r12, 0x50(%rdi) - movq %r13, 0x58(%rdi) - movq %r14, 0x60(%rdi) - movq %r15, 0x68(%rdi) - - fnstcw 0x70(%rdi) - stmxcsr 0x72(%rdi) - - xor %eax, %eax - ret - -.global _sceFiberLongJmp -_sceFiberLongJmp: - # MXCSR = (MXCSR & 0x3f) ^ (ctx->mxcsr & ~0x3f) - stmxcsr -0x4(%rsp) - movl 0x72(%rdi), %eax - andl $0xffffffc0, %eax - movl -0x4(%rsp), %ecx - andl $0x3f, %ecx - xorl %eax, %ecx - movl %ecx, -0x4(%rsp) - ldmxcsr -0x4(%rsp) - - movq 0x00(%rdi), %rax - movq 0x08(%rdi), %rcx - movq 0x10(%rdi), %rdx - movq 0x18(%rdi), %rbx - movq 0x20(%rdi), %rsp - movq 0x28(%rdi), %rbp - - movq 0x30(%rdi), %r8 - movq 0x38(%rdi), %r9 - movq 0x40(%rdi), %r10 - movq 0x48(%rdi), %r11 - movq 0x50(%rdi), %r12 - movq 0x58(%rdi), %r13 - movq 0x60(%rdi), %r14 - movq 0x68(%rdi), %r15 - - fldcw 0x70(%rdi) - - # Make the jump and return 1 - movq %rdx, 0x00(%rsp) - movl $0x1, %eax - ret - -.global _sceFiberSwitchEntry -_sceFiberSwitchEntry: - mov %rdi, %r11 - - # Set stack address to provided stack - movq 0x18(%r11), %rsp - xorl %ebp, %ebp - - movq 0x20(%r11), %r10 # data->state - - # Set previous fiber state to Idle - test %r10, %r10 - jz .clear_regs - movl $2, (%r10) - -.clear_regs: - test %esi, %esi - jz .skip_fpu_regs - - ldmxcsr 0x2c(%r11) - fldcw 0x28(%r11) - -.skip_fpu_regs: - movq 0x08(%r11), %rdi # data->arg_on_initialize - movq 0x10(%r11), %rsi # data->arg_on_run_to - movq 0x00(%r11), %r11 # data->entry - - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx - xorl %edx, %edx - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 - xorq %r12, %r12 - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 - pxor %mm0, %mm0 - pxor %mm1, %mm1 - pxor %mm2, %mm2 - pxor %mm3, %mm3 - pxor %mm4, %mm4 - pxor %mm5, %mm5 - pxor %mm6, %mm6 - pxor %mm7, %mm7 - emms - vzeroall - - # Call the fiber's entry function: entry(arg_on_initialize, arg_on_run_to) - call *%r11 - - # Fiber returned, not good - movl $1, %edi - call _sceFiberForceQuit +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +.global _sceFiberSetJmp +_sceFiberSetJmp: + movq %rax, 0x0(%rdi) + + movq (%rsp), %rdx + movq %rdx, 0x10(%rdi) + + movq %rcx, 0x08(%rdi) + movq %rbx, 0x18(%rdi) + movq %rsp, 0x20(%rdi) + movq %rbp, 0x28(%rdi) + + movq %r8, 0x30(%rdi) + movq %r9, 0x38(%rdi) + movq %r10, 0x40(%rdi) + movq %r11, 0x48(%rdi) + movq %r12, 0x50(%rdi) + movq %r13, 0x58(%rdi) + movq %r14, 0x60(%rdi) + movq %r15, 0x68(%rdi) + + fnstcw 0x70(%rdi) + stmxcsr 0x72(%rdi) + + xor %eax, %eax + ret + +.global _sceFiberLongJmp +_sceFiberLongJmp: + # MXCSR = (MXCSR & 0x3f) ^ (ctx->mxcsr & ~0x3f) + stmxcsr -0x4(%rsp) + movl 0x72(%rdi), %eax + andl $0xffffffc0, %eax + movl -0x4(%rsp), %ecx + andl $0x3f, %ecx + xorl %eax, %ecx + movl %ecx, -0x4(%rsp) + ldmxcsr -0x4(%rsp) + + movq 0x00(%rdi), %rax + movq 0x08(%rdi), %rcx + movq 0x10(%rdi), %rdx + movq 0x18(%rdi), %rbx + movq 0x20(%rdi), %rsp + movq 0x28(%rdi), %rbp + + movq 0x30(%rdi), %r8 + movq 0x38(%rdi), %r9 + movq 0x40(%rdi), %r10 + movq 0x48(%rdi), %r11 + movq 0x50(%rdi), %r12 + movq 0x58(%rdi), %r13 + movq 0x60(%rdi), %r14 + movq 0x68(%rdi), %r15 + + fldcw 0x70(%rdi) + + # Make the jump and return 1 + movq %rdx, 0x00(%rsp) + movl $0x1, %eax + ret + +.global _sceFiberSwitchEntry +_sceFiberSwitchEntry: + mov %rdi, %r11 + + # Set stack address to provided stack + movq 0x18(%r11), %rsp + xorl %ebp, %ebp + + movq 0x20(%r11), %r10 # data->state + + # Set previous fiber state to Idle + test %r10, %r10 + jz .clear_regs + movl $2, (%r10) + +.clear_regs: + test %esi, %esi + jz .skip_fpu_regs + + ldmxcsr 0x2c(%r11) + fldcw 0x28(%r11) + +.skip_fpu_regs: + movq 0x08(%r11), %rdi # data->arg_on_initialize + movq 0x10(%r11), %rsi # data->arg_on_run_to + movq 0x00(%r11), %r11 # data->entry + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r10 + xorq %r12, %r12 + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 + pxor %mm0, %mm0 + pxor %mm1, %mm1 + pxor %mm2, %mm2 + pxor %mm3, %mm3 + pxor %mm4, %mm4 + pxor %mm5, %mm5 + pxor %mm6, %mm6 + pxor %mm7, %mm7 + emms + vzeroall + + # Call the fiber's entry function: entry(arg_on_initialize, arg_on_run_to) + call *%r11 + + # Fiber returned, not good + movl $1, %edi + call _sceFiberForceQuit ret \ No newline at end of file diff --git a/src/core/libraries/ime/ime_common.h b/src/core/libraries/ime/ime_common.h index 6d4afd81..96f073dc 100644 --- a/src/core/libraries/ime/ime_common.h +++ b/src/core/libraries/ime/ime_common.h @@ -1,183 +1,183 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "common/types.h" -#include "core/libraries/rtc/rtc.h" - -enum class OrbisImeType : u32 { - Default = 0, - BasicLatin = 1, - Url = 2, - Mail = 3, - Number = 4, -}; - -enum class OrbisImeHorizontalAlignment : u32 { - Left = 0, - Center = 1, - Right = 2, -}; - -enum class OrbisImeVerticalAlignment : u32 { - Top = 0, - Center = 1, - Bottom = 2, -}; - -enum class OrbisImeEnterLabel : u32 { - Default = 0, - Send = 1, - Search = 2, - Go = 3, -}; - -enum class OrbisImeInputMethod : u32 { - Default = 0, -}; - -enum class OrbisImeEventId : u32 { - Open = 0, - UpdateText = 1, - UpdateCaret = 2, - PressClose = 4, - PressEnter = 5, - Abort = 6, - CandidateListStart = 7, - CandidateListEnd = 8, - CandidateWord = 9, - CandidateIndex = 10, - CandidateDone = 11, - CandidateCancel = 12, - ChangeDevice = 14, - ChangeInputMethodState = 18, - - KeyboardOpen = 256, - KeyboardKeycodeDoen = 257, - KeyboardKeycodeUp = 258, - KeyboardKeycodeRepeat = 259, - KeyboardConnection = 260, - KeyboardDisconnection = 261, - KeyboardAbort = 262, -}; - -enum class OrbisImeKeyboardType : u32 { - NONE = 0, - DANISH = 1, - GERMAN = 2, - GERMAN_SW = 3, - ENGLISH_US = 4, - ENGLISH_GB = 5, - SPANISH = 6, - SPANISH_LA = 7, - FINNISH = 8, - FRENCH = 9, - FRENCH_BR = 10, - FRENCH_CA = 11, - FRENCH_SW = 12, - ITALIAN = 13, - DUTCH = 14, - NORWEGIAN = 15, - POLISH = 16, - PORTUGUESE_BR = 17, - PORTUGUESE_PT = 18, - RUSSIAN = 19, - SWEDISH = 20, - TURKISH = 21, - JAPANESE_ROMAN = 22, - JAPANESE_KANA = 23, - KOREAN = 24, - SM_CHINESE = 25, - TR_CHINESE_ZY = 26, - TR_CHINESE_PY_HK = 27, - TR_CHINESE_PY_TW = 28, - TR_CHINESE_CG = 29, - ARABIC_AR = 30, - THAI = 31, - CZECH = 32, - GREEK = 33, - INDONESIAN = 34, - VIETNAMESE = 35, - ROMANIAN = 36, - HUNGARIAN = 37, -}; - -enum class OrbisImeDeviceType : u32 { - None = 0, - Controller = 1, - ExtKeyboard = 2, - RemoteOsk = 3, -}; - -struct OrbisImeRect { - f32 x; - f32 y; - u32 width; - u32 height; -}; - -struct OrbisImeTextAreaProperty { - u32 mode; // OrbisImeTextAreaMode - u32 index; - s32 length; -}; - -struct OrbisImeEditText { - char16_t* str; - u32 caret_index; - u32 area_num; - OrbisImeTextAreaProperty text_area[4]; -}; - -struct OrbisImeKeycode { - u16 keycode; - char16_t character; - u32 status; - OrbisImeKeyboardType type; - s32 user_id; - u32 resource_id; - Libraries::Rtc::OrbisRtcTick timestamp; -}; - -struct OrbisImeKeyboardResourceIdArray { - s32 userId; - u32 resourceId[5]; -}; - -enum class OrbisImeCaretMovementDirection : u32 { - Still = 0, - Left = 1, - Right = 2, - Up = 3, - Down = 4, - Home = 5, - End = 6, - PageUp = 7, - PageDown = 8, - Top = 9, - Bottom = 10, -}; - -union OrbisImeEventParam { - OrbisImeRect rect; - OrbisImeEditText text; - OrbisImeCaretMovementDirection caret_move; - OrbisImeKeycode keycode; - OrbisImeKeyboardResourceIdArray resource_id_array; - char16_t* candidate_word; - s32 candidate_index; - OrbisImeDeviceType device_type; - u32 input_method_state; - s8 reserved[64]; -}; - -struct OrbisImeEvent { - OrbisImeEventId id; - OrbisImeEventParam param; -}; - -using OrbisImeTextFilter = PS4_SYSV_ABI int (*)(char16_t* outText, u32* outTextLength, - const char16_t* srcText, u32 srcTextLength); - -using OrbisImeEventHandler = PS4_SYSV_ABI void (*)(void* arg, const OrbisImeEvent* e); +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" +#include "core/libraries/rtc/rtc.h" + +enum class OrbisImeType : u32 { + Default = 0, + BasicLatin = 1, + Url = 2, + Mail = 3, + Number = 4, +}; + +enum class OrbisImeHorizontalAlignment : u32 { + Left = 0, + Center = 1, + Right = 2, +}; + +enum class OrbisImeVerticalAlignment : u32 { + Top = 0, + Center = 1, + Bottom = 2, +}; + +enum class OrbisImeEnterLabel : u32 { + Default = 0, + Send = 1, + Search = 2, + Go = 3, +}; + +enum class OrbisImeInputMethod : u32 { + Default = 0, +}; + +enum class OrbisImeEventId : u32 { + Open = 0, + UpdateText = 1, + UpdateCaret = 2, + PressClose = 4, + PressEnter = 5, + Abort = 6, + CandidateListStart = 7, + CandidateListEnd = 8, + CandidateWord = 9, + CandidateIndex = 10, + CandidateDone = 11, + CandidateCancel = 12, + ChangeDevice = 14, + ChangeInputMethodState = 18, + + KeyboardOpen = 256, + KeyboardKeycodeDoen = 257, + KeyboardKeycodeUp = 258, + KeyboardKeycodeRepeat = 259, + KeyboardConnection = 260, + KeyboardDisconnection = 261, + KeyboardAbort = 262, +}; + +enum class OrbisImeKeyboardType : u32 { + NONE = 0, + DANISH = 1, + GERMAN = 2, + GERMAN_SW = 3, + ENGLISH_US = 4, + ENGLISH_GB = 5, + SPANISH = 6, + SPANISH_LA = 7, + FINNISH = 8, + FRENCH = 9, + FRENCH_BR = 10, + FRENCH_CA = 11, + FRENCH_SW = 12, + ITALIAN = 13, + DUTCH = 14, + NORWEGIAN = 15, + POLISH = 16, + PORTUGUESE_BR = 17, + PORTUGUESE_PT = 18, + RUSSIAN = 19, + SWEDISH = 20, + TURKISH = 21, + JAPANESE_ROMAN = 22, + JAPANESE_KANA = 23, + KOREAN = 24, + SM_CHINESE = 25, + TR_CHINESE_ZY = 26, + TR_CHINESE_PY_HK = 27, + TR_CHINESE_PY_TW = 28, + TR_CHINESE_CG = 29, + ARABIC_AR = 30, + THAI = 31, + CZECH = 32, + GREEK = 33, + INDONESIAN = 34, + VIETNAMESE = 35, + ROMANIAN = 36, + HUNGARIAN = 37, +}; + +enum class OrbisImeDeviceType : u32 { + None = 0, + Controller = 1, + ExtKeyboard = 2, + RemoteOsk = 3, +}; + +struct OrbisImeRect { + f32 x; + f32 y; + u32 width; + u32 height; +}; + +struct OrbisImeTextAreaProperty { + u32 mode; // OrbisImeTextAreaMode + u32 index; + s32 length; +}; + +struct OrbisImeEditText { + char16_t* str; + u32 caret_index; + u32 area_num; + OrbisImeTextAreaProperty text_area[4]; +}; + +struct OrbisImeKeycode { + u16 keycode; + char16_t character; + u32 status; + OrbisImeKeyboardType type; + s32 user_id; + u32 resource_id; + Libraries::Rtc::OrbisRtcTick timestamp; +}; + +struct OrbisImeKeyboardResourceIdArray { + s32 userId; + u32 resourceId[5]; +}; + +enum class OrbisImeCaretMovementDirection : u32 { + Still = 0, + Left = 1, + Right = 2, + Up = 3, + Down = 4, + Home = 5, + End = 6, + PageUp = 7, + PageDown = 8, + Top = 9, + Bottom = 10, +}; + +union OrbisImeEventParam { + OrbisImeRect rect; + OrbisImeEditText text; + OrbisImeCaretMovementDirection caret_move; + OrbisImeKeycode keycode; + OrbisImeKeyboardResourceIdArray resource_id_array; + char16_t* candidate_word; + s32 candidate_index; + OrbisImeDeviceType device_type; + u32 input_method_state; + s8 reserved[64]; +}; + +struct OrbisImeEvent { + OrbisImeEventId id; + OrbisImeEventParam param; +}; + +using OrbisImeTextFilter = PS4_SYSV_ABI int (*)(char16_t* outText, u32* outTextLength, + const char16_t* srcText, u32 srcTextLength); + +using OrbisImeEventHandler = PS4_SYSV_ABI void (*)(void* arg, const OrbisImeEvent* e); diff --git a/src/core/libraries/ime/ime_ui.cpp b/src/core/libraries/ime/ime_ui.cpp index 8eaa4817..37f25e20 100644 --- a/src/core/libraries/ime/ime_ui.cpp +++ b/src/core/libraries/ime/ime_ui.cpp @@ -1,253 +1,253 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "ime_ui.h" -#include "imgui/imgui_std.h" - -namespace Libraries::Ime { - -using namespace ImGui; - -static constexpr ImVec2 BUTTON_SIZE{100.0f, 30.0f}; - -ImeState::ImeState(const OrbisImeParam* param) { - if (!param) { - return; - } - - work_buffer = param->work; - text_buffer = param->inputTextBuffer; - - std::size_t text_len = std::char_traits::length(text_buffer); - if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(), - ORBIS_IME_MAX_TEXT_LENGTH * 4)) { - LOG_ERROR(Lib_ImeDialog, "Failed to convert text to utf8 encoding"); - } -} - -ImeState::ImeState(ImeState&& other) noexcept - : work_buffer(other.work_buffer), text_buffer(other.text_buffer), - current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) { - other.text_buffer = nullptr; -} - -ImeState& ImeState::operator=(ImeState&& other) noexcept { - if (this != &other) { - work_buffer = other.work_buffer; - text_buffer = other.text_buffer; - current_text = std::move(other.current_text); - event_queue = std::move(other.event_queue); - - other.text_buffer = nullptr; - } - return *this; -} - -void ImeState::SendEvent(OrbisImeEvent* event) { - std::unique_lock lock{queue_mutex}; - event_queue.push(*event); -} - -void ImeState::SendEnterEvent() { - OrbisImeEvent enterEvent{}; - enterEvent.id = OrbisImeEventId::PressEnter; - SendEvent(&enterEvent); -} - -void ImeState::SendCloseEvent() { - OrbisImeEvent closeEvent{}; - closeEvent.id = OrbisImeEventId::PressClose; - closeEvent.param.text.str = reinterpret_cast(work_buffer); - SendEvent(&closeEvent); -} - -void ImeState::SetText(const char16_t* text, u32 length) {} - -void ImeState::SetCaret(u32 position) {} - -bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, - char* utf8_text, std::size_t utf8_text_len) { - std::fill(utf8_text, utf8_text + utf8_text_len, '\0'); - const ImWchar* orbis_text_ptr = reinterpret_cast(orbis_text); - ImTextStrToUtf8(utf8_text, utf8_text_len, orbis_text_ptr, orbis_text_ptr + orbis_text_len); - - return true; -} - -bool ImeState::ConvertUTF8ToOrbis(const char* utf8_text, std::size_t utf8_text_len, - char16_t* orbis_text, std::size_t orbis_text_len) { - std::fill(orbis_text, orbis_text + orbis_text_len, u'\0'); - ImTextStrFromUtf8(reinterpret_cast(orbis_text), orbis_text_len, utf8_text, nullptr); - - return true; -} - -ImeUi::ImeUi(ImeState* state, const OrbisImeParam* param) : state(state), ime_param(param) { - if (param) { - AddLayer(this); - } -} - -ImeUi::~ImeUi() { - std::scoped_lock lock(draw_mutex); - Free(); -} - -ImeUi& ImeUi::operator=(ImeUi&& other) { - std::scoped_lock lock(draw_mutex, other.draw_mutex); - Free(); - - state = other.state; - ime_param = other.ime_param; - first_render = other.first_render; - other.state = nullptr; - other.ime_param = nullptr; - - AddLayer(this); - return *this; -} - -void ImeUi::Draw() { - std::unique_lock lock{draw_mutex}; - - if (!state) { - return; - } - - const auto& ctx = *GetCurrentContext(); - const auto& io = ctx.IO; - - // TODO: Figure out how to properly translate the positions - - // for example, if a game wants to center the IME panel, - // we have to translate the panel position in a way that it - // still becomes centered, as the game normally calculates - // the position assuming a it's running on a 1920x1080 screen, - // whereas we are running on a 1280x720 window size (by default). - // - // e.g. Panel position calculation from a game: - // param.posx = (1920 / 2) - (panelWidth / 2); - // param.posy = (1080 / 2) - (panelHeight / 2); - const auto size = GetIO().DisplaySize; - f32 pos_x = (ime_param->posx / 1920.0f * (float)size.x); - f32 pos_y = (ime_param->posy / 1080.0f * (float)size.y); - - ImVec2 window_pos = {pos_x, pos_y}; - ImVec2 window_size = {500.0f, 100.0f}; - - // SetNextWindowPos(window_pos); - SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, io.DisplaySize.y * 0.5f), - ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); - SetNextWindowSize(window_size); - SetNextWindowCollapsed(false); - - if (first_render || !io.NavActive) { - SetNextWindowFocus(); - } - - if (Begin("IME##Ime", nullptr, - ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | - ImGuiWindowFlags_NoSavedSettings)) { - DrawPrettyBackground(); - - DrawInputText(); - SetCursorPosY(GetCursorPosY() + 10.0f); - - const char* button_text; - button_text = "Done##ImeDone"; - - float button_spacing = 10.0f; - float total_button_width = BUTTON_SIZE.x * 2 + button_spacing; - float button_start_pos = (window_size.x - total_button_width) / 2.0f; - - SetCursorPosX(button_start_pos); - - if (Button(button_text, BUTTON_SIZE) || (IsKeyPressed(ImGuiKey_Enter))) { - state->SendEnterEvent(); - } - - SameLine(0.0f, button_spacing); - - if (Button("Close##ImeClose", BUTTON_SIZE)) { - state->SendCloseEvent(); - } - } - End(); - - first_render = false; -} - -void ImeUi::DrawInputText() { - ImVec2 input_size = {GetWindowWidth() - 40.0f, 0.0f}; - SetCursorPosX(20.0f); - if (first_render) { - SetKeyboardFocusHere(); - } - if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength, - input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) { - } -} - -int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) { - ImeUi* ui = static_cast(data->UserData); - ASSERT(ui); - - static std::string lastText; - std::string currentText(data->Buf, data->BufTextLen); - if (currentText != lastText) { - OrbisImeEditText eventParam{}; - eventParam.str = reinterpret_cast(ui->ime_param->work); - eventParam.caret_index = data->CursorPos; - eventParam.area_num = 1; - - eventParam.text_area[0].mode = 1; // Edit mode - eventParam.text_area[0].index = data->CursorPos; - eventParam.text_area[0].length = data->BufTextLen; - - if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str, - ui->ime_param->maxTextLength)) { - LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); - return 0; - } - - if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, - ui->ime_param->inputTextBuffer, - ui->ime_param->maxTextLength)) { - LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); - return 0; - } - - OrbisImeEvent event{}; - event.id = OrbisImeEventId::UpdateText; - event.param.text = eventParam; - - lastText = currentText; - ui->state->SendEvent(&event); - } - - static int lastCaretPos = -1; - if (lastCaretPos == -1) { - lastCaretPos = data->CursorPos; - } else if (data->CursorPos != lastCaretPos) { - OrbisImeCaretMovementDirection caretDirection = OrbisImeCaretMovementDirection::Still; - if (data->CursorPos < lastCaretPos) { - caretDirection = OrbisImeCaretMovementDirection::Left; - } else if (data->CursorPos > lastCaretPos) { - caretDirection = OrbisImeCaretMovementDirection::Right; - } - - OrbisImeEvent event{}; - event.id = OrbisImeEventId::UpdateCaret; - event.param.caret_move = caretDirection; - - lastCaretPos = data->CursorPos; - ui->state->SendEvent(&event); - } - - return 0; -} - -void ImeUi::Free() { - RemoveLayer(this); -} - -}; // namespace Libraries::Ime +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "ime_ui.h" +#include "imgui/imgui_std.h" + +namespace Libraries::Ime { + +using namespace ImGui; + +static constexpr ImVec2 BUTTON_SIZE{100.0f, 30.0f}; + +ImeState::ImeState(const OrbisImeParam* param) { + if (!param) { + return; + } + + work_buffer = param->work; + text_buffer = param->inputTextBuffer; + + std::size_t text_len = std::char_traits::length(text_buffer); + if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(), + ORBIS_IME_MAX_TEXT_LENGTH * 4)) { + LOG_ERROR(Lib_ImeDialog, "Failed to convert text to utf8 encoding"); + } +} + +ImeState::ImeState(ImeState&& other) noexcept + : work_buffer(other.work_buffer), text_buffer(other.text_buffer), + current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) { + other.text_buffer = nullptr; +} + +ImeState& ImeState::operator=(ImeState&& other) noexcept { + if (this != &other) { + work_buffer = other.work_buffer; + text_buffer = other.text_buffer; + current_text = std::move(other.current_text); + event_queue = std::move(other.event_queue); + + other.text_buffer = nullptr; + } + return *this; +} + +void ImeState::SendEvent(OrbisImeEvent* event) { + std::unique_lock lock{queue_mutex}; + event_queue.push(*event); +} + +void ImeState::SendEnterEvent() { + OrbisImeEvent enterEvent{}; + enterEvent.id = OrbisImeEventId::PressEnter; + SendEvent(&enterEvent); +} + +void ImeState::SendCloseEvent() { + OrbisImeEvent closeEvent{}; + closeEvent.id = OrbisImeEventId::PressClose; + closeEvent.param.text.str = reinterpret_cast(work_buffer); + SendEvent(&closeEvent); +} + +void ImeState::SetText(const char16_t* text, u32 length) {} + +void ImeState::SetCaret(u32 position) {} + +bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, + char* utf8_text, std::size_t utf8_text_len) { + std::fill(utf8_text, utf8_text + utf8_text_len, '\0'); + const ImWchar* orbis_text_ptr = reinterpret_cast(orbis_text); + ImTextStrToUtf8(utf8_text, utf8_text_len, orbis_text_ptr, orbis_text_ptr + orbis_text_len); + + return true; +} + +bool ImeState::ConvertUTF8ToOrbis(const char* utf8_text, std::size_t utf8_text_len, + char16_t* orbis_text, std::size_t orbis_text_len) { + std::fill(orbis_text, orbis_text + orbis_text_len, u'\0'); + ImTextStrFromUtf8(reinterpret_cast(orbis_text), orbis_text_len, utf8_text, nullptr); + + return true; +} + +ImeUi::ImeUi(ImeState* state, const OrbisImeParam* param) : state(state), ime_param(param) { + if (param) { + AddLayer(this); + } +} + +ImeUi::~ImeUi() { + std::scoped_lock lock(draw_mutex); + Free(); +} + +ImeUi& ImeUi::operator=(ImeUi&& other) { + std::scoped_lock lock(draw_mutex, other.draw_mutex); + Free(); + + state = other.state; + ime_param = other.ime_param; + first_render = other.first_render; + other.state = nullptr; + other.ime_param = nullptr; + + AddLayer(this); + return *this; +} + +void ImeUi::Draw() { + std::unique_lock lock{draw_mutex}; + + if (!state) { + return; + } + + const auto& ctx = *GetCurrentContext(); + const auto& io = ctx.IO; + + // TODO: Figure out how to properly translate the positions - + // for example, if a game wants to center the IME panel, + // we have to translate the panel position in a way that it + // still becomes centered, as the game normally calculates + // the position assuming a it's running on a 1920x1080 screen, + // whereas we are running on a 1280x720 window size (by default). + // + // e.g. Panel position calculation from a game: + // param.posx = (1920 / 2) - (panelWidth / 2); + // param.posy = (1080 / 2) - (panelHeight / 2); + const auto size = GetIO().DisplaySize; + f32 pos_x = (ime_param->posx / 1920.0f * (float)size.x); + f32 pos_y = (ime_param->posy / 1080.0f * (float)size.y); + + ImVec2 window_pos = {pos_x, pos_y}; + ImVec2 window_size = {500.0f, 100.0f}; + + // SetNextWindowPos(window_pos); + SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, io.DisplaySize.y * 0.5f), + ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); + SetNextWindowSize(window_size); + SetNextWindowCollapsed(false); + + if (first_render || !io.NavActive) { + SetNextWindowFocus(); + } + + if (Begin("IME##Ime", nullptr, + ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | + ImGuiWindowFlags_NoSavedSettings)) { + DrawPrettyBackground(); + + DrawInputText(); + SetCursorPosY(GetCursorPosY() + 10.0f); + + const char* button_text; + button_text = "Done##ImeDone"; + + float button_spacing = 10.0f; + float total_button_width = BUTTON_SIZE.x * 2 + button_spacing; + float button_start_pos = (window_size.x - total_button_width) / 2.0f; + + SetCursorPosX(button_start_pos); + + if (Button(button_text, BUTTON_SIZE) || (IsKeyPressed(ImGuiKey_Enter))) { + state->SendEnterEvent(); + } + + SameLine(0.0f, button_spacing); + + if (Button("Close##ImeClose", BUTTON_SIZE)) { + state->SendCloseEvent(); + } + } + End(); + + first_render = false; +} + +void ImeUi::DrawInputText() { + ImVec2 input_size = {GetWindowWidth() - 40.0f, 0.0f}; + SetCursorPosX(20.0f); + if (first_render) { + SetKeyboardFocusHere(); + } + if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength, + input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) { + } +} + +int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) { + ImeUi* ui = static_cast(data->UserData); + ASSERT(ui); + + static std::string lastText; + std::string currentText(data->Buf, data->BufTextLen); + if (currentText != lastText) { + OrbisImeEditText eventParam{}; + eventParam.str = reinterpret_cast(ui->ime_param->work); + eventParam.caret_index = data->CursorPos; + eventParam.area_num = 1; + + eventParam.text_area[0].mode = 1; // Edit mode + eventParam.text_area[0].index = data->CursorPos; + eventParam.text_area[0].length = data->BufTextLen; + + if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str, + ui->ime_param->maxTextLength)) { + LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); + return 0; + } + + if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, + ui->ime_param->inputTextBuffer, + ui->ime_param->maxTextLength)) { + LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); + return 0; + } + + OrbisImeEvent event{}; + event.id = OrbisImeEventId::UpdateText; + event.param.text = eventParam; + + lastText = currentText; + ui->state->SendEvent(&event); + } + + static int lastCaretPos = -1; + if (lastCaretPos == -1) { + lastCaretPos = data->CursorPos; + } else if (data->CursorPos != lastCaretPos) { + OrbisImeCaretMovementDirection caretDirection = OrbisImeCaretMovementDirection::Still; + if (data->CursorPos < lastCaretPos) { + caretDirection = OrbisImeCaretMovementDirection::Left; + } else if (data->CursorPos > lastCaretPos) { + caretDirection = OrbisImeCaretMovementDirection::Right; + } + + OrbisImeEvent event{}; + event.id = OrbisImeEventId::UpdateCaret; + event.param.caret_move = caretDirection; + + lastCaretPos = data->CursorPos; + ui->state->SendEvent(&event); + } + + return 0; +} + +void ImeUi::Free() { + RemoveLayer(this); +} + +}; // namespace Libraries::Ime diff --git a/src/core/libraries/ime/ime_ui.h b/src/core/libraries/ime/ime_ui.h index a2a806bb..3eea22b8 100644 --- a/src/core/libraries/ime/ime_ui.h +++ b/src/core/libraries/ime/ime_ui.h @@ -1,76 +1,76 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include "imgui/imgui_layer.h" - -#include "common/cstring.h" -#include "common/types.h" - -#include "ime.h" - -namespace Libraries::Ime { - -class ImeHandler; -class ImeUi; - -class ImeState { - friend class ImeHandler; - friend class ImeUi; - - void* work_buffer{}; - char16_t* text_buffer{}; - - // A character can hold up to 4 bytes in UTF-8 - Common::CString current_text; - - std::queue event_queue; - std::mutex queue_mutex; - -public: - ImeState(const OrbisImeParam* param = nullptr); - ImeState(ImeState&& other) noexcept; - ImeState& operator=(ImeState&& other) noexcept; - - void SendEvent(OrbisImeEvent* event); - void SendEnterEvent(); - void SendCloseEvent(); - - void SetText(const char16_t* text, u32 length); - void SetCaret(u32 position); - -private: - bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text, - std::size_t native_text_len); - bool ConvertUTF8ToOrbis(const char* native_text, std::size_t utf8_text_len, - char16_t* orbis_text, std::size_t orbis_text_len); -}; - -class ImeUi : public ImGui::Layer { - ImeState* state{}; - const OrbisImeParam* ime_param{}; - - bool first_render = true; - std::mutex draw_mutex; - -public: - explicit ImeUi(ImeState* state = nullptr, const OrbisImeParam* param = nullptr); - ~ImeUi() override; - ImeUi(const ImeUi& other) = delete; - ImeUi& operator=(ImeUi&& other); - - void Draw() override; - -private: - void Free(); - - void DrawInputText(); - - static int InputTextCallback(ImGuiInputTextCallbackData* data); -}; - +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "imgui/imgui_layer.h" + +#include "common/cstring.h" +#include "common/types.h" + +#include "ime.h" + +namespace Libraries::Ime { + +class ImeHandler; +class ImeUi; + +class ImeState { + friend class ImeHandler; + friend class ImeUi; + + void* work_buffer{}; + char16_t* text_buffer{}; + + // A character can hold up to 4 bytes in UTF-8 + Common::CString current_text; + + std::queue event_queue; + std::mutex queue_mutex; + +public: + ImeState(const OrbisImeParam* param = nullptr); + ImeState(ImeState&& other) noexcept; + ImeState& operator=(ImeState&& other) noexcept; + + void SendEvent(OrbisImeEvent* event); + void SendEnterEvent(); + void SendCloseEvent(); + + void SetText(const char16_t* text, u32 length); + void SetCaret(u32 position); + +private: + bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text, + std::size_t native_text_len); + bool ConvertUTF8ToOrbis(const char* native_text, std::size_t utf8_text_len, + char16_t* orbis_text, std::size_t orbis_text_len); +}; + +class ImeUi : public ImGui::Layer { + ImeState* state{}; + const OrbisImeParam* ime_param{}; + + bool first_render = true; + std::mutex draw_mutex; + +public: + explicit ImeUi(ImeState* state = nullptr, const OrbisImeParam* param = nullptr); + ~ImeUi() override; + ImeUi(const ImeUi& other) = delete; + ImeUi& operator=(ImeUi&& other); + + void Draw() override; + +private: + void Free(); + + void DrawInputText(); + + static int InputTextCallback(ImGuiInputTextCallbackData* data); +}; + }; // namespace Libraries::Ime \ No newline at end of file diff --git a/src/core/libraries/kernel/sync/mutex.cpp b/src/core/libraries/kernel/sync/mutex.cpp index c5e3eba1..b9bb3303 100644 --- a/src/core/libraries/kernel/sync/mutex.cpp +++ b/src/core/libraries/kernel/sync/mutex.cpp @@ -1,52 +1,52 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "mutex.h" - -#include "common/assert.h" - -namespace Libraries::Kernel { - -TimedMutex::TimedMutex() { -#ifdef _WIN64 - mtx = CreateMutex(nullptr, false, nullptr); - ASSERT(mtx); -#endif -} - -TimedMutex::~TimedMutex() { -#ifdef _WIN64 - CloseHandle(mtx); -#endif -} - -void TimedMutex::lock() { -#ifdef _WIN64 - for (;;) { - u64 res = WaitForSingleObjectEx(mtx, INFINITE, true); - if (res == WAIT_OBJECT_0) { - return; - } - } -#else - mtx.lock(); -#endif -} - -bool TimedMutex::try_lock() { -#ifdef _WIN64 - return WaitForSingleObjectEx(mtx, 0, true) == WAIT_OBJECT_0; -#else - return mtx.try_lock(); -#endif -} - -void TimedMutex::unlock() { -#ifdef _WIN64 - ReleaseMutex(mtx); -#else - mtx.unlock(); -#endif -} - +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "mutex.h" + +#include "common/assert.h" + +namespace Libraries::Kernel { + +TimedMutex::TimedMutex() { +#ifdef _WIN64 + mtx = CreateMutex(nullptr, false, nullptr); + ASSERT(mtx); +#endif +} + +TimedMutex::~TimedMutex() { +#ifdef _WIN64 + CloseHandle(mtx); +#endif +} + +void TimedMutex::lock() { +#ifdef _WIN64 + for (;;) { + u64 res = WaitForSingleObjectEx(mtx, INFINITE, true); + if (res == WAIT_OBJECT_0) { + return; + } + } +#else + mtx.lock(); +#endif +} + +bool TimedMutex::try_lock() { +#ifdef _WIN64 + return WaitForSingleObjectEx(mtx, 0, true) == WAIT_OBJECT_0; +#else + return mtx.try_lock(); +#endif +} + +void TimedMutex::unlock() { +#ifdef _WIN64 + ReleaseMutex(mtx); +#else + mtx.unlock(); +#endif +} + } // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/sync/mutex.h b/src/core/libraries/kernel/sync/mutex.h index f14a920b..d26c984e 100644 --- a/src/core/libraries/kernel/sync/mutex.h +++ b/src/core/libraries/kernel/sync/mutex.h @@ -1,80 +1,80 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include - -#include "common/types.h" - -#ifdef _WIN64 -#include -#else -#include -#endif - -namespace Libraries::Kernel { - -class TimedMutex { -public: - TimedMutex(); - ~TimedMutex(); - - void lock(); - bool try_lock(); - - void unlock(); - - template - bool try_lock_for(const std::chrono::duration& rel_time) { -#ifdef _WIN64 - constexpr auto zero = std::chrono::duration::zero(); - const auto now = std::chrono::steady_clock::now(); - - std::chrono::steady_clock::time_point abs_time = now; - if (rel_time > zero) { - constexpr auto max = (std::chrono::steady_clock::time_point::max)(); - if (abs_time < max - rel_time) { - abs_time += rel_time; - } else { - abs_time = max; - } - } - - return try_lock_until(abs_time); -#else - return mtx.try_lock_for(rel_time); -#endif - } - - template - bool try_lock_until(const std::chrono::time_point& abs_time) { -#ifdef _WIN64 - for (;;) { - const auto now = Clock::now(); - if (abs_time <= now) { - return false; - } - - const auto rel_ms = std::chrono::ceil(abs_time - now); - u64 res = WaitForSingleObjectEx(mtx, static_cast(rel_ms.count()), true); - if (res == WAIT_OBJECT_0) { - return true; - } else if (res == WAIT_TIMEOUT) { - return false; - } - } -#else - return mtx.try_lock_until(abs_time); -#endif - } - -private: -#ifdef _WIN64 - HANDLE mtx; -#else - std::timed_mutex mtx; -#endif -}; - +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/types.h" + +#ifdef _WIN64 +#include +#else +#include +#endif + +namespace Libraries::Kernel { + +class TimedMutex { +public: + TimedMutex(); + ~TimedMutex(); + + void lock(); + bool try_lock(); + + void unlock(); + + template + bool try_lock_for(const std::chrono::duration& rel_time) { +#ifdef _WIN64 + constexpr auto zero = std::chrono::duration::zero(); + const auto now = std::chrono::steady_clock::now(); + + std::chrono::steady_clock::time_point abs_time = now; + if (rel_time > zero) { + constexpr auto max = (std::chrono::steady_clock::time_point::max)(); + if (abs_time < max - rel_time) { + abs_time += rel_time; + } else { + abs_time = max; + } + } + + return try_lock_until(abs_time); +#else + return mtx.try_lock_for(rel_time); +#endif + } + + template + bool try_lock_until(const std::chrono::time_point& abs_time) { +#ifdef _WIN64 + for (;;) { + const auto now = Clock::now(); + if (abs_time <= now) { + return false; + } + + const auto rel_ms = std::chrono::ceil(abs_time - now); + u64 res = WaitForSingleObjectEx(mtx, static_cast(rel_ms.count()), true); + if (res == WAIT_OBJECT_0) { + return true; + } else if (res == WAIT_TIMEOUT) { + return false; + } + } +#else + return mtx.try_lock_until(abs_time); +#endif + } + +private: +#ifdef _WIN64 + HANDLE mtx; +#else + std::timed_mutex mtx; +#endif +}; + } // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/sync/semaphore.h b/src/core/libraries/kernel/sync/semaphore.h index 48a5dc0d..314e2cfa 100644 --- a/src/core/libraries/kernel/sync/semaphore.h +++ b/src/core/libraries/kernel/sync/semaphore.h @@ -1,167 +1,167 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include - -#include "common/assert.h" -#include "common/types.h" - -#ifdef _WIN64 -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - -namespace Libraries::Kernel { - -template -class Semaphore { -public: - Semaphore(s32 initialCount) -#if !defined(_WIN64) && !defined(__APPLE__) - : sem{initialCount} -#endif - { -#ifdef _WIN64 - sem = CreateSemaphore(nullptr, initialCount, max, nullptr); - ASSERT(sem); -#elif defined(__APPLE__) - sem = dispatch_semaphore_create(initialCount); - ASSERT(sem); -#endif - } - - ~Semaphore() { -#ifdef _WIN64 - CloseHandle(sem); -#elif defined(__APPLE__) - dispatch_release(sem); -#endif - } - - void release() { -#ifdef _WIN64 - ReleaseSemaphore(sem, 1, nullptr); -#elif defined(__APPLE__) - dispatch_semaphore_signal(sem); -#else - sem.release(); -#endif - } - - void acquire() { -#ifdef _WIN64 - for (;;) { - u64 res = WaitForSingleObjectEx(sem, INFINITE, true); - if (res == WAIT_OBJECT_0) { - return; - } - } -#elif defined(__APPLE__) - for (;;) { - const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); - if (res == 0) { - return; - } - } -#else - sem.acquire(); -#endif - } - - bool try_acquire() { -#ifdef _WIN64 - return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0; -#elif defined(__APPLE__) - return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0; -#else - return sem.try_acquire(); -#endif - } - - template - bool try_acquire_for(const std::chrono::duration& rel_time) { -#ifdef _WIN64 - const auto start_time = std::chrono::high_resolution_clock::now(); - auto rel_time_ms = std::chrono::ceil(rel_time); - - while (rel_time_ms.count() > 0) { - u64 timeout_ms = static_cast(rel_time_ms.count()); - u64 res = WaitForSingleObjectEx(sem, timeout_ms, true); - if (res == WAIT_OBJECT_0) { - return true; - } else if (res == WAIT_IO_COMPLETION) { - auto elapsed_time = std::chrono::high_resolution_clock::now() - start_time; - rel_time_ms -= std::chrono::duration_cast(elapsed_time); - } else { - return false; - } - } - - return false; -#elif defined(__APPLE__) - const auto rel_time_ns = std::chrono::ceil(rel_time).count(); - const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns); - return dispatch_semaphore_wait(sem, timeout) == 0; -#else - return sem.try_acquire_for(rel_time); -#endif - } - - template - bool try_acquire_until(const std::chrono::time_point& abs_time) { -#ifdef _WIN64 - const auto start_time = Clock::now(); - if (start_time >= abs_time) { - return false; - } - - auto rel_time = std::chrono::ceil(abs_time - start_time); - while (rel_time.count() > 0) { - u64 timeout_ms = static_cast(rel_time.count()); - u64 res = WaitForSingleObjectEx(sem, timeout_ms, true); - if (res == WAIT_OBJECT_0) { - return true; - } else if (res == WAIT_IO_COMPLETION) { - auto elapsed_time = Clock::now() - start_time; - rel_time -= std::chrono::duration_cast(elapsed_time); - } else { - return false; - } - } - - return false; -#elif defined(__APPLE__) - auto abs_s = std::chrono::time_point_cast(abs_time); - auto abs_ns = std::chrono::time_point_cast(abs_time) - - std::chrono::time_point_cast(abs_s); - const timespec abs_timespec = { - .tv_sec = abs_s.time_since_epoch().count(), - .tv_nsec = abs_ns.count(), - }; - const auto timeout = dispatch_walltime(&abs_timespec, 0); - return dispatch_semaphore_wait(sem, timeout) == 0; -#else - return sem.try_acquire_until(abs_time); -#endif - } - -private: -#ifdef _WIN64 - HANDLE sem; -#elif defined(__APPLE__) - dispatch_semaphore_t sem; -#else - std::counting_semaphore sem; -#endif -}; - -using BinarySemaphore = Semaphore<1>; -using CountingSemaphore = Semaphore<0x7FFFFFFF /*ORBIS_KERNEL_SEM_VALUE_MAX*/>; - +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "common/assert.h" +#include "common/types.h" + +#ifdef _WIN64 +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +namespace Libraries::Kernel { + +template +class Semaphore { +public: + Semaphore(s32 initialCount) +#if !defined(_WIN64) && !defined(__APPLE__) + : sem{initialCount} +#endif + { +#ifdef _WIN64 + sem = CreateSemaphore(nullptr, initialCount, max, nullptr); + ASSERT(sem); +#elif defined(__APPLE__) + sem = dispatch_semaphore_create(initialCount); + ASSERT(sem); +#endif + } + + ~Semaphore() { +#ifdef _WIN64 + CloseHandle(sem); +#elif defined(__APPLE__) + dispatch_release(sem); +#endif + } + + void release() { +#ifdef _WIN64 + ReleaseSemaphore(sem, 1, nullptr); +#elif defined(__APPLE__) + dispatch_semaphore_signal(sem); +#else + sem.release(); +#endif + } + + void acquire() { +#ifdef _WIN64 + for (;;) { + u64 res = WaitForSingleObjectEx(sem, INFINITE, true); + if (res == WAIT_OBJECT_0) { + return; + } + } +#elif defined(__APPLE__) + for (;;) { + const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); + if (res == 0) { + return; + } + } +#else + sem.acquire(); +#endif + } + + bool try_acquire() { +#ifdef _WIN64 + return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0; +#elif defined(__APPLE__) + return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0; +#else + return sem.try_acquire(); +#endif + } + + template + bool try_acquire_for(const std::chrono::duration& rel_time) { +#ifdef _WIN64 + const auto start_time = std::chrono::high_resolution_clock::now(); + auto rel_time_ms = std::chrono::ceil(rel_time); + + while (rel_time_ms.count() > 0) { + u64 timeout_ms = static_cast(rel_time_ms.count()); + u64 res = WaitForSingleObjectEx(sem, timeout_ms, true); + if (res == WAIT_OBJECT_0) { + return true; + } else if (res == WAIT_IO_COMPLETION) { + auto elapsed_time = std::chrono::high_resolution_clock::now() - start_time; + rel_time_ms -= std::chrono::duration_cast(elapsed_time); + } else { + return false; + } + } + + return false; +#elif defined(__APPLE__) + const auto rel_time_ns = std::chrono::ceil(rel_time).count(); + const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns); + return dispatch_semaphore_wait(sem, timeout) == 0; +#else + return sem.try_acquire_for(rel_time); +#endif + } + + template + bool try_acquire_until(const std::chrono::time_point& abs_time) { +#ifdef _WIN64 + const auto start_time = Clock::now(); + if (start_time >= abs_time) { + return false; + } + + auto rel_time = std::chrono::ceil(abs_time - start_time); + while (rel_time.count() > 0) { + u64 timeout_ms = static_cast(rel_time.count()); + u64 res = WaitForSingleObjectEx(sem, timeout_ms, true); + if (res == WAIT_OBJECT_0) { + return true; + } else if (res == WAIT_IO_COMPLETION) { + auto elapsed_time = Clock::now() - start_time; + rel_time -= std::chrono::duration_cast(elapsed_time); + } else { + return false; + } + } + + return false; +#elif defined(__APPLE__) + auto abs_s = std::chrono::time_point_cast(abs_time); + auto abs_ns = std::chrono::time_point_cast(abs_time) - + std::chrono::time_point_cast(abs_s); + const timespec abs_timespec = { + .tv_sec = abs_s.time_since_epoch().count(), + .tv_nsec = abs_ns.count(), + }; + const auto timeout = dispatch_walltime(&abs_timespec, 0); + return dispatch_semaphore_wait(sem, timeout) == 0; +#else + return sem.try_acquire_until(abs_time); +#endif + } + +private: +#ifdef _WIN64 + HANDLE sem; +#elif defined(__APPLE__) + dispatch_semaphore_t sem; +#else + std::counting_semaphore sem; +#endif +}; + +using BinarySemaphore = Semaphore<1>; +using CountingSemaphore = Semaphore<0x7FFFFFFF /*ORBIS_KERNEL_SEM_VALUE_MAX*/>; + } // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/videodec/videodec2.cpp b/src/core/libraries/videodec/videodec2.cpp index 0bd68afb..07f7a274 100644 --- a/src/core/libraries/videodec/videodec2.cpp +++ b/src/core/libraries/videodec/videodec2.cpp @@ -1,199 +1,199 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/assert.h" -#include "common/logging/log.h" -#include "core/libraries/libs.h" -#include "core/libraries/videodec/videodec2.h" -#include "core/libraries/videodec/videodec2_impl.h" -#include "core/libraries/videodec/videodec_error.h" - -namespace Libraries::Vdec2 { - -static constexpr u64 kMinimumMemorySize = 32_MB; ///> Fake minimum memory size for querying - -s32 PS4_SYSV_ABI -sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo) { - LOG_INFO(Lib_Vdec2, "called"); - - if (!computeMemInfo) { - return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; - } - if (computeMemInfo->thisSize != sizeof(OrbisVideodec2ComputeMemoryInfo)) { - return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; - } - - computeMemInfo->cpuGpuMemory = nullptr; - computeMemInfo->cpuGpuMemorySize = kMinimumMemorySize; - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI -sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo, - const OrbisVideodec2ComputeMemoryInfo* computeMemInfo, - OrbisVideodec2ComputeQueue* computeQueue) { - LOG_INFO(Lib_Vdec2, "called"); - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue) { - LOG_INFO(Lib_Vdec2, "called"); - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI -sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, - OrbisVideodec2DecoderMemoryInfo* decoderMemInfo) { - LOG_INFO(Lib_Vdec2, "called"); - - if (!decoderCfgInfo || !decoderMemInfo) { - return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; - } - if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) || - decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) { - return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; - } - - decoderMemInfo->cpuMemory = nullptr; - decoderMemInfo->gpuMemory = nullptr; - decoderMemInfo->cpuGpuMemory = nullptr; - - decoderMemInfo->cpuGpuMemorySize = kMinimumMemorySize; - decoderMemInfo->cpuMemorySize = kMinimumMemorySize; - decoderMemInfo->gpuMemorySize = kMinimumMemorySize; - - decoderMemInfo->maxFrameBufferSize = kMinimumMemorySize; - decoderMemInfo->frameBufferAlignment = 0x100; - - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, - const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo, - OrbisVideodec2Decoder* decoder) { - LOG_INFO(Lib_Vdec2, "called"); - - if (!decoderCfgInfo || !decoderMemInfo || !decoder) { - return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; - } - if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) || - decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) { - return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; - } - - *decoder = new VdecDecoder(*decoderCfgInfo, *decoderMemInfo); - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder) { - LOG_INFO(Lib_Vdec2, "called"); - - if (!decoder) { - return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; - } - - delete decoder; - return ORBIS_OK; -} - -s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder, - const OrbisVideodec2InputData* inputData, - OrbisVideodec2FrameBuffer* frameBuffer, - OrbisVideodec2OutputInfo* outputInfo) { - LOG_TRACE(Lib_Vdec2, "called"); - - if (!decoder) { - return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; - } - if (!inputData || !frameBuffer || !outputInfo) { - return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; - } - if (inputData->thisSize != sizeof(OrbisVideodec2InputData) || - frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer)) { - return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; - } - - return decoder->Decode(*inputData, *frameBuffer, *outputInfo); -} - -s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder, - OrbisVideodec2FrameBuffer* frameBuffer, - OrbisVideodec2OutputInfo* outputInfo) { - LOG_INFO(Lib_Vdec2, "called"); - - if (!decoder) { - return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; - } - if (!frameBuffer || !outputInfo) { - return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; - } - if (frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer) || - outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) { - return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; - } - - return decoder->Flush(*frameBuffer, *outputInfo); -} - -s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder) { - LOG_INFO(Lib_Vdec2, "called"); - - if (!decoder) { - return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; - } - - return decoder->Reset(); -} - -s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo, - void* p1stPictureInfoOut, void* p2ndPictureInfoOut) { - LOG_TRACE(Lib_Vdec2, "called"); - - if (!outputInfo) { - return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; - } - if (outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) { - return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; - } - if (outputInfo->pictureCount == 0 || gPictureInfos.empty()) { - return ORBIS_OK; - } - - if (p1stPictureInfoOut) { - OrbisVideodec2AvcPictureInfo* picInfo = - static_cast(p1stPictureInfoOut); - if (picInfo->thisSize != sizeof(OrbisVideodec2AvcPictureInfo)) { - return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; - } - *picInfo = gPictureInfos.back(); - } - - if (outputInfo->pictureCount > 1) { - UNREACHABLE(); - } - - return ORBIS_OK; -} - -void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym) { - LIB_FUNCTION("RnDibcGCPKw", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, - sceVideodec2QueryComputeMemoryInfo); - LIB_FUNCTION("eD+X2SmxUt4", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, - sceVideodec2AllocateComputeQueue); - LIB_FUNCTION("UvtA3FAiF4Y", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, - sceVideodec2ReleaseComputeQueue); - - LIB_FUNCTION("qqMCwlULR+E", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, - sceVideodec2QueryDecoderMemoryInfo); - LIB_FUNCTION("CNNRoRYd8XI", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, - sceVideodec2CreateDecoder); - LIB_FUNCTION("jwImxXRGSKA", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, - sceVideodec2DeleteDecoder); - LIB_FUNCTION("852F5+q6+iM", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Decode); - LIB_FUNCTION("l1hXwscLuCY", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Flush); - LIB_FUNCTION("wJXikG6QFN8", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Reset); - LIB_FUNCTION("NtXRa3dRzU0", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, - sceVideodec2GetPictureInfo); -} - -} // namespace Libraries::Vdec2 +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/libraries/libs.h" +#include "core/libraries/videodec/videodec2.h" +#include "core/libraries/videodec/videodec2_impl.h" +#include "core/libraries/videodec/videodec_error.h" + +namespace Libraries::Vdec2 { + +static constexpr u64 kMinimumMemorySize = 32_MB; ///> Fake minimum memory size for querying + +s32 PS4_SYSV_ABI +sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo) { + LOG_INFO(Lib_Vdec2, "called"); + + if (!computeMemInfo) { + return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; + } + if (computeMemInfo->thisSize != sizeof(OrbisVideodec2ComputeMemoryInfo)) { + return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; + } + + computeMemInfo->cpuGpuMemory = nullptr; + computeMemInfo->cpuGpuMemorySize = kMinimumMemorySize; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI +sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo, + const OrbisVideodec2ComputeMemoryInfo* computeMemInfo, + OrbisVideodec2ComputeQueue* computeQueue) { + LOG_INFO(Lib_Vdec2, "called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue) { + LOG_INFO(Lib_Vdec2, "called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI +sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, + OrbisVideodec2DecoderMemoryInfo* decoderMemInfo) { + LOG_INFO(Lib_Vdec2, "called"); + + if (!decoderCfgInfo || !decoderMemInfo) { + return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; + } + if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) || + decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) { + return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; + } + + decoderMemInfo->cpuMemory = nullptr; + decoderMemInfo->gpuMemory = nullptr; + decoderMemInfo->cpuGpuMemory = nullptr; + + decoderMemInfo->cpuGpuMemorySize = kMinimumMemorySize; + decoderMemInfo->cpuMemorySize = kMinimumMemorySize; + decoderMemInfo->gpuMemorySize = kMinimumMemorySize; + + decoderMemInfo->maxFrameBufferSize = kMinimumMemorySize; + decoderMemInfo->frameBufferAlignment = 0x100; + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, + const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo, + OrbisVideodec2Decoder* decoder) { + LOG_INFO(Lib_Vdec2, "called"); + + if (!decoderCfgInfo || !decoderMemInfo || !decoder) { + return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; + } + if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) || + decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) { + return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; + } + + *decoder = new VdecDecoder(*decoderCfgInfo, *decoderMemInfo); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder) { + LOG_INFO(Lib_Vdec2, "called"); + + if (!decoder) { + return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; + } + + delete decoder; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder, + const OrbisVideodec2InputData* inputData, + OrbisVideodec2FrameBuffer* frameBuffer, + OrbisVideodec2OutputInfo* outputInfo) { + LOG_TRACE(Lib_Vdec2, "called"); + + if (!decoder) { + return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; + } + if (!inputData || !frameBuffer || !outputInfo) { + return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; + } + if (inputData->thisSize != sizeof(OrbisVideodec2InputData) || + frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer)) { + return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; + } + + return decoder->Decode(*inputData, *frameBuffer, *outputInfo); +} + +s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder, + OrbisVideodec2FrameBuffer* frameBuffer, + OrbisVideodec2OutputInfo* outputInfo) { + LOG_INFO(Lib_Vdec2, "called"); + + if (!decoder) { + return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; + } + if (!frameBuffer || !outputInfo) { + return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; + } + if (frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer) || + outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) { + return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; + } + + return decoder->Flush(*frameBuffer, *outputInfo); +} + +s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder) { + LOG_INFO(Lib_Vdec2, "called"); + + if (!decoder) { + return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; + } + + return decoder->Reset(); +} + +s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo, + void* p1stPictureInfoOut, void* p2ndPictureInfoOut) { + LOG_TRACE(Lib_Vdec2, "called"); + + if (!outputInfo) { + return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; + } + if (outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) { + return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; + } + if (outputInfo->pictureCount == 0 || gPictureInfos.empty()) { + return ORBIS_OK; + } + + if (p1stPictureInfoOut) { + OrbisVideodec2AvcPictureInfo* picInfo = + static_cast(p1stPictureInfoOut); + if (picInfo->thisSize != sizeof(OrbisVideodec2AvcPictureInfo)) { + return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; + } + *picInfo = gPictureInfos.back(); + } + + if (outputInfo->pictureCount > 1) { + UNREACHABLE(); + } + + return ORBIS_OK; +} + +void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("RnDibcGCPKw", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, + sceVideodec2QueryComputeMemoryInfo); + LIB_FUNCTION("eD+X2SmxUt4", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, + sceVideodec2AllocateComputeQueue); + LIB_FUNCTION("UvtA3FAiF4Y", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, + sceVideodec2ReleaseComputeQueue); + + LIB_FUNCTION("qqMCwlULR+E", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, + sceVideodec2QueryDecoderMemoryInfo); + LIB_FUNCTION("CNNRoRYd8XI", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, + sceVideodec2CreateDecoder); + LIB_FUNCTION("jwImxXRGSKA", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, + sceVideodec2DeleteDecoder); + LIB_FUNCTION("852F5+q6+iM", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Decode); + LIB_FUNCTION("l1hXwscLuCY", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Flush); + LIB_FUNCTION("wJXikG6QFN8", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Reset); + LIB_FUNCTION("NtXRa3dRzU0", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, + sceVideodec2GetPictureInfo); +} + +} // namespace Libraries::Vdec2 diff --git a/src/core/libraries/videodec/videodec2.h b/src/core/libraries/videodec/videodec2.h index 4617e1c2..abc8f8ab 100644 --- a/src/core/libraries/videodec/videodec2.h +++ b/src/core/libraries/videodec/videodec2.h @@ -1,139 +1,139 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "common/types.h" - -#include "videodec2_avc.h" - -namespace Core::Loader { -class SymbolsResolver; -} -namespace Libraries::Vdec2 { - -class VdecDecoder; - -using OrbisVideodec2Decoder = VdecDecoder*; -using OrbisVideodec2ComputeQueue = void*; - -struct OrbisVideodec2DecoderConfigInfo { - u64 thisSize; - u32 resourceType; - u32 codecType; - u32 profile; - u32 maxLevel; - s32 maxFrameWidth; - s32 maxFrameHeight; - s32 maxDpbFrameCount; - u32 decodePipelineDepth; - OrbisVideodec2ComputeQueue computeQueue; - u64 cpuAffinityMask; - s32 cpuThreadPriority; - bool optimizeProgressiveVideo; - bool checkMemoryType; - u8 reserved0; - u8 reserved1; - void* extraConfigInfo; -}; -static_assert(sizeof(OrbisVideodec2DecoderConfigInfo) == 0x48); - -struct OrbisVideodec2DecoderMemoryInfo { - u64 thisSize; - u64 cpuMemorySize; - void* cpuMemory; - u64 gpuMemorySize; - void* gpuMemory; - u64 cpuGpuMemorySize; - void* cpuGpuMemory; - u64 maxFrameBufferSize; - u32 frameBufferAlignment; - u32 reserved0; -}; -static_assert(sizeof(OrbisVideodec2DecoderMemoryInfo) == 0x48); - -struct OrbisVideodec2InputData { - u64 thisSize; - void* auData; - u64 auSize; - u64 ptsData; - u64 dtsData; - u64 attachedData; -}; -static_assert(sizeof(OrbisVideodec2InputData) == 0x30); - -struct OrbisVideodec2OutputInfo { - u64 thisSize; - bool isValid; - bool isErrorFrame; - u8 pictureCount; - u32 codecType; - u32 frameWidth; - u32 framePitch; - u32 frameHeight; - void* frameBuffer; - u64 frameBufferSize; -}; -static_assert(sizeof(OrbisVideodec2OutputInfo) == 0x30); - -struct OrbisVideodec2FrameBuffer { - u64 thisSize; - void* frameBuffer; - u64 frameBufferSize; - bool isAccepted; -}; -static_assert(sizeof(OrbisVideodec2FrameBuffer) == 0x20); - -struct OrbisVideodec2ComputeMemoryInfo { - u64 thisSize; - u64 cpuGpuMemorySize; - void* cpuGpuMemory; -}; -static_assert(sizeof(OrbisVideodec2ComputeMemoryInfo) == 0x18); - -struct OrbisVideodec2ComputeConfigInfo { - u64 thisSize; - u16 computePipeId; - u16 computeQueueId; - bool checkMemoryType; - u8 reserved0; - u16 reserved1; -}; -static_assert(sizeof(OrbisVideodec2ComputeConfigInfo) == 0x10); - -s32 PS4_SYSV_ABI -sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo); - -s32 PS4_SYSV_ABI -sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo, - const OrbisVideodec2ComputeMemoryInfo* computeMemInfo, - OrbisVideodec2ComputeQueue* computeQueue); - -s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue); - -s32 PS4_SYSV_ABI -sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, - OrbisVideodec2DecoderMemoryInfo* decoderMemInfo); - -s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, - const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo, - OrbisVideodec2Decoder* decoder); - -s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder); - -s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder, - const OrbisVideodec2InputData* inputData, - OrbisVideodec2FrameBuffer* frameBuffer, - OrbisVideodec2OutputInfo* outputInfo); - -s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder, - OrbisVideodec2FrameBuffer* frameBuffer, - OrbisVideodec2OutputInfo* outputInfo); - -s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder); - -s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo, - void* p1stPictureInfo, void* p2ndPictureInfo); - -void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym); +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +#include "videodec2_avc.h" + +namespace Core::Loader { +class SymbolsResolver; +} +namespace Libraries::Vdec2 { + +class VdecDecoder; + +using OrbisVideodec2Decoder = VdecDecoder*; +using OrbisVideodec2ComputeQueue = void*; + +struct OrbisVideodec2DecoderConfigInfo { + u64 thisSize; + u32 resourceType; + u32 codecType; + u32 profile; + u32 maxLevel; + s32 maxFrameWidth; + s32 maxFrameHeight; + s32 maxDpbFrameCount; + u32 decodePipelineDepth; + OrbisVideodec2ComputeQueue computeQueue; + u64 cpuAffinityMask; + s32 cpuThreadPriority; + bool optimizeProgressiveVideo; + bool checkMemoryType; + u8 reserved0; + u8 reserved1; + void* extraConfigInfo; +}; +static_assert(sizeof(OrbisVideodec2DecoderConfigInfo) == 0x48); + +struct OrbisVideodec2DecoderMemoryInfo { + u64 thisSize; + u64 cpuMemorySize; + void* cpuMemory; + u64 gpuMemorySize; + void* gpuMemory; + u64 cpuGpuMemorySize; + void* cpuGpuMemory; + u64 maxFrameBufferSize; + u32 frameBufferAlignment; + u32 reserved0; +}; +static_assert(sizeof(OrbisVideodec2DecoderMemoryInfo) == 0x48); + +struct OrbisVideodec2InputData { + u64 thisSize; + void* auData; + u64 auSize; + u64 ptsData; + u64 dtsData; + u64 attachedData; +}; +static_assert(sizeof(OrbisVideodec2InputData) == 0x30); + +struct OrbisVideodec2OutputInfo { + u64 thisSize; + bool isValid; + bool isErrorFrame; + u8 pictureCount; + u32 codecType; + u32 frameWidth; + u32 framePitch; + u32 frameHeight; + void* frameBuffer; + u64 frameBufferSize; +}; +static_assert(sizeof(OrbisVideodec2OutputInfo) == 0x30); + +struct OrbisVideodec2FrameBuffer { + u64 thisSize; + void* frameBuffer; + u64 frameBufferSize; + bool isAccepted; +}; +static_assert(sizeof(OrbisVideodec2FrameBuffer) == 0x20); + +struct OrbisVideodec2ComputeMemoryInfo { + u64 thisSize; + u64 cpuGpuMemorySize; + void* cpuGpuMemory; +}; +static_assert(sizeof(OrbisVideodec2ComputeMemoryInfo) == 0x18); + +struct OrbisVideodec2ComputeConfigInfo { + u64 thisSize; + u16 computePipeId; + u16 computeQueueId; + bool checkMemoryType; + u8 reserved0; + u16 reserved1; +}; +static_assert(sizeof(OrbisVideodec2ComputeConfigInfo) == 0x10); + +s32 PS4_SYSV_ABI +sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo); + +s32 PS4_SYSV_ABI +sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo, + const OrbisVideodec2ComputeMemoryInfo* computeMemInfo, + OrbisVideodec2ComputeQueue* computeQueue); + +s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue); + +s32 PS4_SYSV_ABI +sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, + OrbisVideodec2DecoderMemoryInfo* decoderMemInfo); + +s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, + const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo, + OrbisVideodec2Decoder* decoder); + +s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder); + +s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder, + const OrbisVideodec2InputData* inputData, + OrbisVideodec2FrameBuffer* frameBuffer, + OrbisVideodec2OutputInfo* outputInfo); + +s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder, + OrbisVideodec2FrameBuffer* frameBuffer, + OrbisVideodec2OutputInfo* outputInfo); + +s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder); + +s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo, + void* p1stPictureInfo, void* p2ndPictureInfo); + +void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Vdec2 \ No newline at end of file diff --git a/src/core/libraries/videodec/videodec2_avc.h b/src/core/libraries/videodec/videodec2_avc.h index 4109b5fd..22293ee9 100644 --- a/src/core/libraries/videodec/videodec2_avc.h +++ b/src/core/libraries/videodec/videodec2_avc.h @@ -1,60 +1,60 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "common/types.h" - -namespace Libraries::Vdec2 { - -struct OrbisVideodec2AvcPictureInfo { - u64 thisSize; - - bool isValid; - - u64 ptsData; - u64 dtsData; - u64 attachedData; - - u8 idrPictureflag; - - u8 profile_idc; - u8 level_idc; - u32 pic_width_in_mbs_minus1; - u32 pic_height_in_map_units_minus1; - u8 frame_mbs_only_flag; - - u8 frame_cropping_flag; - u32 frameCropLeftOffset; - u32 frameCropRightOffset; - u32 frameCropTopOffset; - u32 frameCropBottomOffset; - - u8 aspect_ratio_info_present_flag; - u8 aspect_ratio_idc; - u16 sar_width; - u16 sar_height; - - u8 video_signal_type_present_flag; - u8 video_format; - u8 video_full_range_flag; - u8 colour_description_present_flag; - u8 colour_primaries; - u8 transfer_characteristics; - u8 matrix_coefficients; - - u8 timing_info_present_flag; - u32 num_units_in_tick; - u32 time_scale; - u8 fixed_frame_rate_flag; - - u8 bitstream_restriction_flag; - u8 max_dec_frame_buffering; - - u8 pic_struct_present_flag; - u8 pic_struct; - u8 field_pic_flag; - u8 bottom_field_flag; -}; - +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Libraries::Vdec2 { + +struct OrbisVideodec2AvcPictureInfo { + u64 thisSize; + + bool isValid; + + u64 ptsData; + u64 dtsData; + u64 attachedData; + + u8 idrPictureflag; + + u8 profile_idc; + u8 level_idc; + u32 pic_width_in_mbs_minus1; + u32 pic_height_in_map_units_minus1; + u8 frame_mbs_only_flag; + + u8 frame_cropping_flag; + u32 frameCropLeftOffset; + u32 frameCropRightOffset; + u32 frameCropTopOffset; + u32 frameCropBottomOffset; + + u8 aspect_ratio_info_present_flag; + u8 aspect_ratio_idc; + u16 sar_width; + u16 sar_height; + + u8 video_signal_type_present_flag; + u8 video_format; + u8 video_full_range_flag; + u8 colour_description_present_flag; + u8 colour_primaries; + u8 transfer_characteristics; + u8 matrix_coefficients; + + u8 timing_info_present_flag; + u32 num_units_in_tick; + u32 time_scale; + u8 fixed_frame_rate_flag; + + u8 bitstream_restriction_flag; + u8 max_dec_frame_buffering; + + u8 pic_struct_present_flag; + u8 pic_struct; + u8 field_pic_flag; + u8 bottom_field_flag; +}; + } // namespace Libraries::Vdec2 \ No newline at end of file diff --git a/src/core/libraries/videodec/videodec2_impl.cpp b/src/core/libraries/videodec/videodec2_impl.cpp index 138d78af..22b17c86 100644 --- a/src/core/libraries/videodec/videodec2_impl.cpp +++ b/src/core/libraries/videodec/videodec2_impl.cpp @@ -1,229 +1,229 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "videodec2_impl.h" - -#include "common/assert.h" -#include "common/logging/log.h" -#include "core/libraries/videodec/videodec_error.h" - -#include "common/support/avdec.h" - -namespace Libraries::Vdec2 { - -std::vector gPictureInfos; - -static inline void CopyNV12Data(u8* dst, const AVFrame& src) { - std::memcpy(dst, src.data[0], src.width * src.height); - std::memcpy(dst + (src.width * src.height), src.data[1], (src.width * src.height) / 2); -} - -VdecDecoder::VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo, - const OrbisVideodec2DecoderMemoryInfo& memoryInfo) { - ASSERT(configInfo.codecType == 1); /* AVC */ - - const AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H264); - ASSERT(codec); - - mCodecContext = avcodec_alloc_context3(codec); - ASSERT(mCodecContext); - mCodecContext->width = configInfo.maxFrameWidth; - mCodecContext->height = configInfo.maxFrameHeight; - - avcodec_open2(mCodecContext, codec, nullptr); -} - -VdecDecoder::~VdecDecoder() { - avcodec_free_context(&mCodecContext); - sws_freeContext(mSwsContext); - - gPictureInfos.clear(); -} - -s32 VdecDecoder::Decode(const OrbisVideodec2InputData& inputData, - OrbisVideodec2FrameBuffer& frameBuffer, - OrbisVideodec2OutputInfo& outputInfo) { - frameBuffer.isAccepted = false; - outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo); - outputInfo.isValid = false; - outputInfo.isErrorFrame = true; - outputInfo.pictureCount = 0; - - if (!inputData.auData) { - return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_POINTER; - } - if (inputData.auSize == 0) { - return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_SIZE; - } - - AVPacket* packet = av_packet_alloc(); - if (!packet) { - LOG_ERROR(Lib_Vdec2, "Failed to allocate packet"); - return ORBIS_VIDEODEC2_ERROR_API_FAIL; - } - - packet->data = (u8*)inputData.auData; - packet->size = inputData.auSize; - packet->pts = inputData.ptsData; - packet->dts = inputData.dtsData; - - int ret = avcodec_send_packet(mCodecContext, packet); - if (ret < 0) { - LOG_ERROR(Lib_Vdec2, "Error sending packet to decoder: {}", ret); - av_packet_free(&packet); - return ORBIS_VIDEODEC2_ERROR_API_FAIL; - } - - AVFrame* frame = av_frame_alloc(); - if (frame == nullptr) { - LOG_ERROR(Lib_Vdec2, "Failed to allocate frame"); - av_packet_free(&packet); - return ORBIS_VIDEODEC2_ERROR_API_FAIL; - } - - while (true) { - ret = avcodec_receive_frame(mCodecContext, frame); - if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { - break; - } else if (ret < 0) { - LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret); - av_packet_free(&packet); - av_frame_free(&frame); - return ORBIS_VIDEODEC2_ERROR_API_FAIL; - } - - if (frame->format != AV_PIX_FMT_NV12) { - AVFrame* nv12_frame = ConvertNV12Frame(*frame); - ASSERT(nv12_frame); - av_frame_free(&frame); - frame = nv12_frame; - } - - CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame); - frameBuffer.isAccepted = true; - - outputInfo.codecType = 1; // FIXME: Hardcoded to AVC - outputInfo.frameWidth = frame->width; - outputInfo.frameHeight = frame->height; - outputInfo.framePitch = frame->linesize[0]; - outputInfo.frameBufferSize = frameBuffer.frameBufferSize; - outputInfo.frameBuffer = frameBuffer.frameBuffer; - - outputInfo.isValid = true; - outputInfo.isErrorFrame = false; - outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video - - if (outputInfo.isValid) { - OrbisVideodec2AvcPictureInfo pictureInfo = {}; - - pictureInfo.thisSize = sizeof(OrbisVideodec2AvcPictureInfo); - pictureInfo.isValid = true; - - pictureInfo.ptsData = inputData.ptsData; - pictureInfo.dtsData = inputData.dtsData; - pictureInfo.attachedData = inputData.attachedData; - - pictureInfo.frameCropLeftOffset = frame->crop_left; - pictureInfo.frameCropRightOffset = frame->crop_right; - pictureInfo.frameCropTopOffset = frame->crop_top; - pictureInfo.frameCropBottomOffset = frame->crop_bottom; - - gPictureInfos.push_back(pictureInfo); - } - } - - av_packet_free(&packet); - av_frame_free(&frame); - return ORBIS_OK; -} - -s32 VdecDecoder::Flush(OrbisVideodec2FrameBuffer& frameBuffer, - OrbisVideodec2OutputInfo& outputInfo) { - frameBuffer.isAccepted = false; - outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo); - outputInfo.isValid = false; - outputInfo.isErrorFrame = true; - outputInfo.pictureCount = 0; - - AVFrame* frame = av_frame_alloc(); - if (!frame) { - LOG_ERROR(Lib_Vdec2, "Failed to allocate frame"); - return ORBIS_VIDEODEC2_ERROR_API_FAIL; - } - - while (true) { - int ret = avcodec_receive_frame(mCodecContext, frame); - if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { - break; - } else if (ret < 0) { - LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret); - av_frame_free(&frame); - return ORBIS_VIDEODEC2_ERROR_API_FAIL; - } - - if (frame->format != AV_PIX_FMT_NV12) { - AVFrame* nv12_frame = ConvertNV12Frame(*frame); - ASSERT(nv12_frame); - av_frame_free(&frame); - frame = nv12_frame; - } - - CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame); - frameBuffer.isAccepted = true; - - outputInfo.codecType = 1; // FIXME: Hardcoded to AVC - outputInfo.frameWidth = frame->width; - outputInfo.frameHeight = frame->height; - outputInfo.framePitch = frame->linesize[0]; - outputInfo.frameBufferSize = frameBuffer.frameBufferSize; - outputInfo.frameBuffer = frameBuffer.frameBuffer; - - outputInfo.isValid = true; - outputInfo.isErrorFrame = false; - outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video - - // FIXME: Should we add picture info here too? - } - - av_frame_free(&frame); - return ORBIS_OK; -} - -s32 VdecDecoder::Reset() { - avcodec_flush_buffers(mCodecContext); - gPictureInfos.clear(); - return ORBIS_OK; -} - -AVFrame* VdecDecoder::ConvertNV12Frame(AVFrame& frame) { - AVFrame* nv12_frame = av_frame_alloc(); - nv12_frame->pts = frame.pts; - nv12_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts; - nv12_frame->format = AV_PIX_FMT_NV12; - nv12_frame->width = frame.width; - nv12_frame->height = frame.height; - nv12_frame->sample_aspect_ratio = frame.sample_aspect_ratio; - nv12_frame->crop_top = frame.crop_top; - nv12_frame->crop_bottom = frame.crop_bottom; - nv12_frame->crop_left = frame.crop_left; - nv12_frame->crop_right = frame.crop_right; - - av_frame_get_buffer(nv12_frame, 0); - - if (mSwsContext == nullptr) { - mSwsContext = sws_getContext(frame.width, frame.height, AVPixelFormat(frame.format), - nv12_frame->width, nv12_frame->height, AV_PIX_FMT_NV12, - SWS_FAST_BILINEAR, nullptr, nullptr, nullptr); - } - - const auto res = sws_scale(mSwsContext, frame.data, frame.linesize, 0, frame.height, - nv12_frame->data, nv12_frame->linesize); - if (res < 0) { - LOG_ERROR(Lib_Vdec2, "Could not convert to NV12: {}", av_err2str(res)); - return nullptr; - } - - return nv12_frame; -} - -} // namespace Libraries::Vdec2 +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "videodec2_impl.h" + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/libraries/videodec/videodec_error.h" + +#include "common/support/avdec.h" + +namespace Libraries::Vdec2 { + +std::vector gPictureInfos; + +static inline void CopyNV12Data(u8* dst, const AVFrame& src) { + std::memcpy(dst, src.data[0], src.width * src.height); + std::memcpy(dst + (src.width * src.height), src.data[1], (src.width * src.height) / 2); +} + +VdecDecoder::VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo, + const OrbisVideodec2DecoderMemoryInfo& memoryInfo) { + ASSERT(configInfo.codecType == 1); /* AVC */ + + const AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H264); + ASSERT(codec); + + mCodecContext = avcodec_alloc_context3(codec); + ASSERT(mCodecContext); + mCodecContext->width = configInfo.maxFrameWidth; + mCodecContext->height = configInfo.maxFrameHeight; + + avcodec_open2(mCodecContext, codec, nullptr); +} + +VdecDecoder::~VdecDecoder() { + avcodec_free_context(&mCodecContext); + sws_freeContext(mSwsContext); + + gPictureInfos.clear(); +} + +s32 VdecDecoder::Decode(const OrbisVideodec2InputData& inputData, + OrbisVideodec2FrameBuffer& frameBuffer, + OrbisVideodec2OutputInfo& outputInfo) { + frameBuffer.isAccepted = false; + outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo); + outputInfo.isValid = false; + outputInfo.isErrorFrame = true; + outputInfo.pictureCount = 0; + + if (!inputData.auData) { + return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_POINTER; + } + if (inputData.auSize == 0) { + return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_SIZE; + } + + AVPacket* packet = av_packet_alloc(); + if (!packet) { + LOG_ERROR(Lib_Vdec2, "Failed to allocate packet"); + return ORBIS_VIDEODEC2_ERROR_API_FAIL; + } + + packet->data = (u8*)inputData.auData; + packet->size = inputData.auSize; + packet->pts = inputData.ptsData; + packet->dts = inputData.dtsData; + + int ret = avcodec_send_packet(mCodecContext, packet); + if (ret < 0) { + LOG_ERROR(Lib_Vdec2, "Error sending packet to decoder: {}", ret); + av_packet_free(&packet); + return ORBIS_VIDEODEC2_ERROR_API_FAIL; + } + + AVFrame* frame = av_frame_alloc(); + if (frame == nullptr) { + LOG_ERROR(Lib_Vdec2, "Failed to allocate frame"); + av_packet_free(&packet); + return ORBIS_VIDEODEC2_ERROR_API_FAIL; + } + + while (true) { + ret = avcodec_receive_frame(mCodecContext, frame); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { + break; + } else if (ret < 0) { + LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret); + av_packet_free(&packet); + av_frame_free(&frame); + return ORBIS_VIDEODEC2_ERROR_API_FAIL; + } + + if (frame->format != AV_PIX_FMT_NV12) { + AVFrame* nv12_frame = ConvertNV12Frame(*frame); + ASSERT(nv12_frame); + av_frame_free(&frame); + frame = nv12_frame; + } + + CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame); + frameBuffer.isAccepted = true; + + outputInfo.codecType = 1; // FIXME: Hardcoded to AVC + outputInfo.frameWidth = frame->width; + outputInfo.frameHeight = frame->height; + outputInfo.framePitch = frame->linesize[0]; + outputInfo.frameBufferSize = frameBuffer.frameBufferSize; + outputInfo.frameBuffer = frameBuffer.frameBuffer; + + outputInfo.isValid = true; + outputInfo.isErrorFrame = false; + outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video + + if (outputInfo.isValid) { + OrbisVideodec2AvcPictureInfo pictureInfo = {}; + + pictureInfo.thisSize = sizeof(OrbisVideodec2AvcPictureInfo); + pictureInfo.isValid = true; + + pictureInfo.ptsData = inputData.ptsData; + pictureInfo.dtsData = inputData.dtsData; + pictureInfo.attachedData = inputData.attachedData; + + pictureInfo.frameCropLeftOffset = frame->crop_left; + pictureInfo.frameCropRightOffset = frame->crop_right; + pictureInfo.frameCropTopOffset = frame->crop_top; + pictureInfo.frameCropBottomOffset = frame->crop_bottom; + + gPictureInfos.push_back(pictureInfo); + } + } + + av_packet_free(&packet); + av_frame_free(&frame); + return ORBIS_OK; +} + +s32 VdecDecoder::Flush(OrbisVideodec2FrameBuffer& frameBuffer, + OrbisVideodec2OutputInfo& outputInfo) { + frameBuffer.isAccepted = false; + outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo); + outputInfo.isValid = false; + outputInfo.isErrorFrame = true; + outputInfo.pictureCount = 0; + + AVFrame* frame = av_frame_alloc(); + if (!frame) { + LOG_ERROR(Lib_Vdec2, "Failed to allocate frame"); + return ORBIS_VIDEODEC2_ERROR_API_FAIL; + } + + while (true) { + int ret = avcodec_receive_frame(mCodecContext, frame); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { + break; + } else if (ret < 0) { + LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret); + av_frame_free(&frame); + return ORBIS_VIDEODEC2_ERROR_API_FAIL; + } + + if (frame->format != AV_PIX_FMT_NV12) { + AVFrame* nv12_frame = ConvertNV12Frame(*frame); + ASSERT(nv12_frame); + av_frame_free(&frame); + frame = nv12_frame; + } + + CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame); + frameBuffer.isAccepted = true; + + outputInfo.codecType = 1; // FIXME: Hardcoded to AVC + outputInfo.frameWidth = frame->width; + outputInfo.frameHeight = frame->height; + outputInfo.framePitch = frame->linesize[0]; + outputInfo.frameBufferSize = frameBuffer.frameBufferSize; + outputInfo.frameBuffer = frameBuffer.frameBuffer; + + outputInfo.isValid = true; + outputInfo.isErrorFrame = false; + outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video + + // FIXME: Should we add picture info here too? + } + + av_frame_free(&frame); + return ORBIS_OK; +} + +s32 VdecDecoder::Reset() { + avcodec_flush_buffers(mCodecContext); + gPictureInfos.clear(); + return ORBIS_OK; +} + +AVFrame* VdecDecoder::ConvertNV12Frame(AVFrame& frame) { + AVFrame* nv12_frame = av_frame_alloc(); + nv12_frame->pts = frame.pts; + nv12_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts; + nv12_frame->format = AV_PIX_FMT_NV12; + nv12_frame->width = frame.width; + nv12_frame->height = frame.height; + nv12_frame->sample_aspect_ratio = frame.sample_aspect_ratio; + nv12_frame->crop_top = frame.crop_top; + nv12_frame->crop_bottom = frame.crop_bottom; + nv12_frame->crop_left = frame.crop_left; + nv12_frame->crop_right = frame.crop_right; + + av_frame_get_buffer(nv12_frame, 0); + + if (mSwsContext == nullptr) { + mSwsContext = sws_getContext(frame.width, frame.height, AVPixelFormat(frame.format), + nv12_frame->width, nv12_frame->height, AV_PIX_FMT_NV12, + SWS_FAST_BILINEAR, nullptr, nullptr, nullptr); + } + + const auto res = sws_scale(mSwsContext, frame.data, frame.linesize, 0, frame.height, + nv12_frame->data, nv12_frame->linesize); + if (res < 0) { + LOG_ERROR(Lib_Vdec2, "Could not convert to NV12: {}", av_err2str(res)); + return nullptr; + } + + return nv12_frame; +} + +} // namespace Libraries::Vdec2 diff --git a/src/core/libraries/videodec/videodec2_impl.h b/src/core/libraries/videodec/videodec2_impl.h index 1bcece6e..c8e8ea25 100644 --- a/src/core/libraries/videodec/videodec2_impl.h +++ b/src/core/libraries/videodec/videodec2_impl.h @@ -1,39 +1,39 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include - -#include "videodec2.h" - -extern "C" { -#include -#include -#include -} - -namespace Libraries::Vdec2 { - -extern std::vector gPictureInfos; - -class VdecDecoder { -public: - VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo, - const OrbisVideodec2DecoderMemoryInfo& memoryInfo); - ~VdecDecoder(); - - s32 Decode(const OrbisVideodec2InputData& inputData, OrbisVideodec2FrameBuffer& frameBuffer, - OrbisVideodec2OutputInfo& outputInfo); - s32 Flush(OrbisVideodec2FrameBuffer& frameBuffer, OrbisVideodec2OutputInfo& outputInfo); - s32 Reset(); - -private: - AVFrame* ConvertNV12Frame(AVFrame& frame); - -private: - AVCodecContext* mCodecContext = nullptr; - SwsContext* mSwsContext = nullptr; -}; - +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "videodec2.h" + +extern "C" { +#include +#include +#include +} + +namespace Libraries::Vdec2 { + +extern std::vector gPictureInfos; + +class VdecDecoder { +public: + VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo, + const OrbisVideodec2DecoderMemoryInfo& memoryInfo); + ~VdecDecoder(); + + s32 Decode(const OrbisVideodec2InputData& inputData, OrbisVideodec2FrameBuffer& frameBuffer, + OrbisVideodec2OutputInfo& outputInfo); + s32 Flush(OrbisVideodec2FrameBuffer& frameBuffer, OrbisVideodec2OutputInfo& outputInfo); + s32 Reset(); + +private: + AVFrame* ConvertNV12Frame(AVFrame& frame); + +private: + AVCodecContext* mCodecContext = nullptr; + SwsContext* mSwsContext = nullptr; +}; + } // namespace Libraries::Vdec2 \ No newline at end of file diff --git a/src/core/thread.cpp b/src/core/thread.cpp index 07681e6b..8a0a4cfd 100644 --- a/src/core/thread.cpp +++ b/src/core/thread.cpp @@ -1,151 +1,151 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/alignment.h" -#include "core/libraries/kernel/threads/pthread.h" -#include "thread.h" - -#ifdef _WIN64 -#include -#include "common/ntapi.h" -#else -#include -#include -#endif - -namespace Core { - -#ifdef _WIN64 -#define KGDT64_R3_DATA (0x28) -#define KGDT64_R3_CODE (0x30) -#define KGDT64_R3_CMTEB (0x50) -#define RPL_MASK (0x03) - -#define INITIAL_FPUCW (0x037f) -#define INITIAL_MXCSR_MASK (0xffbf) -#define EFLAGS_INTERRUPT_MASK (0x200) - -void InitializeTeb(INITIAL_TEB* teb, const ::Libraries::Kernel::PthreadAttr* attr) { - teb->StackBase = (void*)((u64)attr->stackaddr_attr + attr->stacksize_attr); - teb->StackLimit = nullptr; - teb->StackAllocationBase = attr->stackaddr_attr; -} - -void InitializeContext(CONTEXT* ctx, ThreadFunc func, void* arg, - const ::Libraries::Kernel::PthreadAttr* attr) { - /* Note: The stack has to be reversed */ - ctx->Rsp = (u64)attr->stackaddr_attr + attr->stacksize_attr; - ctx->Rbp = (u64)attr->stackaddr_attr + attr->stacksize_attr; - ctx->Rcx = (u64)arg; - ctx->Rip = (u64)func; - - ctx->SegGs = KGDT64_R3_DATA | RPL_MASK; - ctx->SegEs = KGDT64_R3_DATA | RPL_MASK; - ctx->SegDs = KGDT64_R3_DATA | RPL_MASK; - ctx->SegCs = KGDT64_R3_CODE | RPL_MASK; - ctx->SegSs = KGDT64_R3_DATA | RPL_MASK; - ctx->SegFs = KGDT64_R3_CMTEB | RPL_MASK; - - ctx->EFlags = 0x3000 | EFLAGS_INTERRUPT_MASK; - ctx->MxCsr = INITIAL_MXCSR; - - ctx->FltSave.ControlWord = INITIAL_FPUCW; - ctx->FltSave.MxCsr = INITIAL_MXCSR; - ctx->FltSave.MxCsr_Mask = INITIAL_MXCSR_MASK; - - ctx->ContextFlags = - CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT; -} -#endif - -NativeThread::NativeThread() : native_handle{0} {} - -NativeThread::~NativeThread() {} - -int NativeThread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) { -#ifndef _WIN64 - pthread_t* pthr = reinterpret_cast(&native_handle); - pthread_attr_t pattr; - pthread_attr_init(&pattr); - pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr); - return pthread_create(pthr, &pattr, (PthreadFunc)func, arg); -#else - CLIENT_ID clientId{}; - INITIAL_TEB teb{}; - CONTEXT ctx{}; - - clientId.UniqueProcess = GetCurrentProcess(); - clientId.UniqueThread = GetCurrentThread(); - - InitializeTeb(&teb, attr); - InitializeContext(&ctx, func, arg, attr); - - return NtCreateThread(&native_handle, THREAD_ALL_ACCESS, nullptr, GetCurrentProcess(), - &clientId, &ctx, &teb, false); -#endif -} - -void NativeThread::Exit() { - if (!native_handle) { - return; - } - - tid = 0; - -#ifdef _WIN64 - NtClose(native_handle); - native_handle = nullptr; - - /* The Windows kernel will free the stack - given at thread creation via INITIAL_TEB - (StackAllocationBase) upon thread termination. - - In earlier Windows versions (NT4 to Windows Server 2003), - you could get around this via disabling FreeStackOnTermination - on the TEB. This has been removed since then. - - To avoid this, we must forcefully set the TEB - deallocation stack pointer to NULL so ZwFreeVirtualMemory fails - in the kernel and our stack is not freed. - */ - auto* teb = reinterpret_cast(NtCurrentTeb()); - teb->DeallocationStack = nullptr; - - NtTerminateThread(nullptr, 0); -#else - // Disable and free the signal stack. - constexpr stack_t sig_stack = { - .ss_flags = SS_DISABLE, - }; - sigaltstack(&sig_stack, nullptr); - - if (sig_stack_ptr) { - free(sig_stack_ptr); - sig_stack_ptr = nullptr; - } - - pthread_exit(nullptr); -#endif -} - -void NativeThread::Initialize() { -#if _WIN64 - tid = GetCurrentThreadId(); -#else - tid = (u64)pthread_self(); - - // Set up an alternate signal handler stack to avoid overflowing small thread stacks. - const size_t page_size = getpagesize(); - const size_t sig_stack_size = Common::AlignUp(std::max(64_KB, MINSIGSTKSZ), page_size); - ASSERT_MSG(posix_memalign(&sig_stack_ptr, page_size, sig_stack_size) == 0, - "Failed to allocate signal stack: {}", errno); - - stack_t sig_stack; - sig_stack.ss_sp = sig_stack_ptr; - sig_stack.ss_size = sig_stack_size; - sig_stack.ss_flags = 0; - ASSERT_MSG(sigaltstack(&sig_stack, nullptr) == 0, "Failed to set signal stack: {}", errno); -#endif -} - -} // namespace Core +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/alignment.h" +#include "core/libraries/kernel/threads/pthread.h" +#include "thread.h" + +#ifdef _WIN64 +#include +#include "common/ntapi.h" +#else +#include +#include +#endif + +namespace Core { + +#ifdef _WIN64 +#define KGDT64_R3_DATA (0x28) +#define KGDT64_R3_CODE (0x30) +#define KGDT64_R3_CMTEB (0x50) +#define RPL_MASK (0x03) + +#define INITIAL_FPUCW (0x037f) +#define INITIAL_MXCSR_MASK (0xffbf) +#define EFLAGS_INTERRUPT_MASK (0x200) + +void InitializeTeb(INITIAL_TEB* teb, const ::Libraries::Kernel::PthreadAttr* attr) { + teb->StackBase = (void*)((u64)attr->stackaddr_attr + attr->stacksize_attr); + teb->StackLimit = nullptr; + teb->StackAllocationBase = attr->stackaddr_attr; +} + +void InitializeContext(CONTEXT* ctx, ThreadFunc func, void* arg, + const ::Libraries::Kernel::PthreadAttr* attr) { + /* Note: The stack has to be reversed */ + ctx->Rsp = (u64)attr->stackaddr_attr + attr->stacksize_attr; + ctx->Rbp = (u64)attr->stackaddr_attr + attr->stacksize_attr; + ctx->Rcx = (u64)arg; + ctx->Rip = (u64)func; + + ctx->SegGs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegEs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegDs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegCs = KGDT64_R3_CODE | RPL_MASK; + ctx->SegSs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegFs = KGDT64_R3_CMTEB | RPL_MASK; + + ctx->EFlags = 0x3000 | EFLAGS_INTERRUPT_MASK; + ctx->MxCsr = INITIAL_MXCSR; + + ctx->FltSave.ControlWord = INITIAL_FPUCW; + ctx->FltSave.MxCsr = INITIAL_MXCSR; + ctx->FltSave.MxCsr_Mask = INITIAL_MXCSR_MASK; + + ctx->ContextFlags = + CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT; +} +#endif + +NativeThread::NativeThread() : native_handle{0} {} + +NativeThread::~NativeThread() {} + +int NativeThread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) { +#ifndef _WIN64 + pthread_t* pthr = reinterpret_cast(&native_handle); + pthread_attr_t pattr; + pthread_attr_init(&pattr); + pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr); + return pthread_create(pthr, &pattr, (PthreadFunc)func, arg); +#else + CLIENT_ID clientId{}; + INITIAL_TEB teb{}; + CONTEXT ctx{}; + + clientId.UniqueProcess = GetCurrentProcess(); + clientId.UniqueThread = GetCurrentThread(); + + InitializeTeb(&teb, attr); + InitializeContext(&ctx, func, arg, attr); + + return NtCreateThread(&native_handle, THREAD_ALL_ACCESS, nullptr, GetCurrentProcess(), + &clientId, &ctx, &teb, false); +#endif +} + +void NativeThread::Exit() { + if (!native_handle) { + return; + } + + tid = 0; + +#ifdef _WIN64 + NtClose(native_handle); + native_handle = nullptr; + + /* The Windows kernel will free the stack + given at thread creation via INITIAL_TEB + (StackAllocationBase) upon thread termination. + + In earlier Windows versions (NT4 to Windows Server 2003), + you could get around this via disabling FreeStackOnTermination + on the TEB. This has been removed since then. + + To avoid this, we must forcefully set the TEB + deallocation stack pointer to NULL so ZwFreeVirtualMemory fails + in the kernel and our stack is not freed. + */ + auto* teb = reinterpret_cast(NtCurrentTeb()); + teb->DeallocationStack = nullptr; + + NtTerminateThread(nullptr, 0); +#else + // Disable and free the signal stack. + constexpr stack_t sig_stack = { + .ss_flags = SS_DISABLE, + }; + sigaltstack(&sig_stack, nullptr); + + if (sig_stack_ptr) { + free(sig_stack_ptr); + sig_stack_ptr = nullptr; + } + + pthread_exit(nullptr); +#endif +} + +void NativeThread::Initialize() { +#if _WIN64 + tid = GetCurrentThreadId(); +#else + tid = (u64)pthread_self(); + + // Set up an alternate signal handler stack to avoid overflowing small thread stacks. + const size_t page_size = getpagesize(); + const size_t sig_stack_size = Common::AlignUp(std::max(64_KB, MINSIGSTKSZ), page_size); + ASSERT_MSG(posix_memalign(&sig_stack_ptr, page_size, sig_stack_size) == 0, + "Failed to allocate signal stack: {}", errno); + + stack_t sig_stack; + sig_stack.ss_sp = sig_stack_ptr; + sig_stack.ss_size = sig_stack_size; + sig_stack.ss_flags = 0; + ASSERT_MSG(sigaltstack(&sig_stack, nullptr) == 0, "Failed to set signal stack: {}", errno); +#endif +} + +} // namespace Core diff --git a/src/core/thread.h b/src/core/thread.h index bd777a2e..6ea7dfad 100644 --- a/src/core/thread.h +++ b/src/core/thread.h @@ -1,45 +1,45 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "common/types.h" - -namespace Libraries::Kernel { -struct PthreadAttr; -} // namespace Libraries::Kernel - -namespace Core { - -using ThreadFunc = void (*)(void*); -using PthreadFunc = void* (*)(void*); - -class NativeThread { -public: - NativeThread(); - ~NativeThread(); - - int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr); - void Exit(); - - void Initialize(); - - uintptr_t GetHandle() { - return reinterpret_cast(native_handle); - } - - u64 GetTid() { - return tid; - } - -private: -#ifdef _WIN64 - void* native_handle; -#else - uintptr_t native_handle; - void* sig_stack_ptr; -#endif - u64 tid; -}; - +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Libraries::Kernel { +struct PthreadAttr; +} // namespace Libraries::Kernel + +namespace Core { + +using ThreadFunc = void (*)(void*); +using PthreadFunc = void* (*)(void*); + +class NativeThread { +public: + NativeThread(); + ~NativeThread(); + + int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr); + void Exit(); + + void Initialize(); + + uintptr_t GetHandle() { + return reinterpret_cast(native_handle); + } + + u64 GetTid() { + return tid; + } + +private: +#ifdef _WIN64 + void* native_handle; +#else + uintptr_t native_handle; + void* sig_stack_ptr; +#endif + u64 tid; +}; + } // namespace Core \ No newline at end of file diff --git a/src/shader_recompiler/ir/breadth_first_search.h b/src/shader_recompiler/ir/breadth_first_search.h index b3ed8720..390dffb5 100644 --- a/src/shader_recompiler/ir/breadth_first_search.h +++ b/src/shader_recompiler/ir/breadth_first_search.h @@ -1,74 +1,74 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include -#include "shader_recompiler/ir/value.h" - -namespace Shader::IR { - -// Use typename Instruction so the function can be used to return either const or mutable -// Insts depending on the context. -template -auto BreadthFirstSearch(Instruction* inst, - Pred&& pred) -> std::invoke_result_t { - // Most often case the instruction is the desired already. - if (std::optional result = pred(inst)) { - return result; - } - - // Breadth-first search visiting the right most arguments first - boost::container::small_vector visited; - std::queue queue; - queue.push(inst); - - while (!queue.empty()) { - // Pop one instruction from the queue - Instruction* inst{queue.front()}; - queue.pop(); - if (std::optional result = pred(inst)) { - // This is the instruction we were looking for - return result; - } - // Visit the right most arguments first - for (size_t arg = inst->NumArgs(); arg--;) { - Value arg_value{inst->Arg(arg)}; - if (arg_value.IsImmediate()) { - continue; - } - // Queue instruction if it hasn't been visited - Instruction* arg_inst{arg_value.InstRecursive()}; - if (std::ranges::find(visited, arg_inst) == visited.end()) { - visited.push_back(arg_inst); - queue.push(arg_inst); - } - } - } - // SSA tree has been traversed and the result hasn't been found - return std::nullopt; -} - -template -auto BreadthFirstSearch(const Value& value, - Pred&& pred) -> std::invoke_result_t { - if (value.IsImmediate()) { - // Nothing to do with immediates - return std::nullopt; - } - return BreadthFirstSearch(value.InstRecursive(), pred); -} - -template -auto BreadthFirstSearch(Value value, Pred&& pred) -> std::invoke_result_t { - if (value.IsImmediate()) { - // Nothing to do with immediates - return std::nullopt; - } - return BreadthFirstSearch(value.InstRecursive(), pred); -} - -} // namespace Shader::IR +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +// Use typename Instruction so the function can be used to return either const or mutable +// Insts depending on the context. +template +auto BreadthFirstSearch(Instruction* inst, + Pred&& pred) -> std::invoke_result_t { + // Most often case the instruction is the desired already. + if (std::optional result = pred(inst)) { + return result; + } + + // Breadth-first search visiting the right most arguments first + boost::container::small_vector visited; + std::queue queue; + queue.push(inst); + + while (!queue.empty()) { + // Pop one instruction from the queue + Instruction* inst{queue.front()}; + queue.pop(); + if (std::optional result = pred(inst)) { + // This is the instruction we were looking for + return result; + } + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + Value arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { + continue; + } + // Queue instruction if it hasn't been visited + Instruction* arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + // SSA tree has been traversed and the result hasn't been found + return std::nullopt; +} + +template +auto BreadthFirstSearch(const Value& value, + Pred&& pred) -> std::invoke_result_t { + if (value.IsImmediate()) { + // Nothing to do with immediates + return std::nullopt; + } + return BreadthFirstSearch(value.InstRecursive(), pred); +} + +template +auto BreadthFirstSearch(Value value, Pred&& pred) -> std::invoke_result_t { + if (value.IsImmediate()) { + // Nothing to do with immediates + return std::nullopt; + } + return BreadthFirstSearch(value.InstRecursive(), pred); +} + +} // namespace Shader::IR diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index f67278f6..feeafd9b 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -1,203 +1,203 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include -#include "common/types.h" -#include "video_core/amdgpu/resource.h" -#include "video_core/renderer_vulkan/vk_common.h" - -namespace Vulkan { -class Instance; -class Scheduler; -} // namespace Vulkan - -VK_DEFINE_HANDLE(VmaAllocation) -VK_DEFINE_HANDLE(VmaAllocator) - -struct VmaAllocationInfo; - -namespace VideoCore { - -/// Hints and requirements for the backing memory type of a commit -enum class MemoryUsage { - DeviceLocal, ///< Requests device local buffer. - Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads - Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks - Stream, ///< Requests device local host visible buffer, falling back host memory. -}; - -constexpr vk::BufferUsageFlags ReadFlags = - vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer | - vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer | - vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer; - -constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eStorageTexelBuffer | - vk::BufferUsageFlagBits::eStorageBuffer; - -struct UniqueBuffer { - explicit UniqueBuffer(vk::Device device, VmaAllocator allocator); - ~UniqueBuffer(); - - UniqueBuffer(const UniqueBuffer&) = delete; - UniqueBuffer& operator=(const UniqueBuffer&) = delete; - - UniqueBuffer(UniqueBuffer&& other) - : allocator{std::exchange(other.allocator, VK_NULL_HANDLE)}, - allocation{std::exchange(other.allocation, VK_NULL_HANDLE)}, - buffer{std::exchange(other.buffer, VK_NULL_HANDLE)} {} - UniqueBuffer& operator=(UniqueBuffer&& other) { - buffer = std::exchange(other.buffer, VK_NULL_HANDLE); - allocator = std::exchange(other.allocator, VK_NULL_HANDLE); - allocation = std::exchange(other.allocation, VK_NULL_HANDLE); - return *this; - } - - void Create(const vk::BufferCreateInfo& image_ci, MemoryUsage usage, - VmaAllocationInfo* out_alloc_info); - - operator vk::Buffer() const { - return buffer; - } - - vk::Device device; - VmaAllocator allocator; - VmaAllocation allocation; - vk::Buffer buffer{}; -}; - -class Buffer { -public: - explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags, - u64 size_bytes_); - - Buffer& operator=(const Buffer&) = delete; - Buffer(const Buffer&) = delete; - - Buffer& operator=(Buffer&&) = default; - Buffer(Buffer&&) = default; - - vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt, - AmdGpu::NumberFormat nfmt); - - /// Increases the likeliness of this being a stream buffer - void IncreaseStreamScore(int score) noexcept { - stream_score += score; - } - - /// Returns the likeliness of this being a stream buffer - [[nodiscard]] int StreamScore() const noexcept { - return stream_score; - } - - /// Returns true when vaddr -> vaddr+size is fully contained in the buffer - [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { - return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); - } - - /// Returns the base CPU address of the buffer - [[nodiscard]] VAddr CpuAddr() const noexcept { - return cpu_addr; - } - - /// Returns the offset relative to the given CPU address - [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept { - return static_cast(other_cpu_addr - cpu_addr); - } - - size_t SizeBytes() const { - return size_bytes; - } - - vk::Buffer Handle() const noexcept { - return buffer; - } - - std::optional GetBarrier(vk::AccessFlagBits2 dst_acess_mask, - vk::PipelineStageFlagBits2 dst_stage) { - if (dst_acess_mask == access_mask && stage == dst_stage) { - return {}; - } - - auto barrier = vk::BufferMemoryBarrier2{ - .srcStageMask = stage, - .srcAccessMask = access_mask, - .dstStageMask = dst_stage, - .dstAccessMask = dst_acess_mask, - .buffer = buffer.buffer, - .size = size_bytes, - }; - access_mask = dst_acess_mask; - stage = dst_stage; - return barrier; - } - -public: - VAddr cpu_addr = 0; - bool is_picked{}; - bool is_coherent{}; - bool is_deleted{}; - int stream_score = 0; - size_t size_bytes = 0; - std::span mapped_data; - const Vulkan::Instance* instance; - Vulkan::Scheduler* scheduler; - MemoryUsage usage; - UniqueBuffer buffer; - vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; - vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; -}; - -class StreamBuffer : public Buffer { -public: - explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - MemoryUsage usage, u64 size_bytes_); - - /// Reserves a region of memory from the stream buffer. - std::pair Map(u64 size, u64 alignment = 0); - - /// Ensures that reserved bytes of memory are available to the GPU. - void Commit(); - - /// Maps and commits a memory region with user provided data - u64 Copy(VAddr src, size_t size, size_t alignment = 0) { - const auto [data, offset] = Map(size, alignment); - std::memcpy(data, reinterpret_cast(src), size); - Commit(); - return offset; - } - - u64 GetFreeSize() const { - return size_bytes - offset - mapped_size; - } - -private: - struct Watch { - u64 tick{}; - u64 upper_bound{}; - }; - - /// Increases the amount of watches available. - void ReserveWatches(std::vector& watches, std::size_t grow_size); - - /// Waits pending watches until requested upper bound. - void WaitPendingOperations(u64 requested_upper_bound); - -private: - u64 offset{}; - u64 mapped_size{}; - std::vector current_watches; - std::size_t current_watch_cursor{}; - std::optional invalidation_mark; - std::vector previous_watches; - std::size_t wait_cursor{}; - u64 wait_bound{}; -}; - -} // namespace VideoCore +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "common/types.h" +#include "video_core/amdgpu/resource.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { +class Instance; +class Scheduler; +} // namespace Vulkan + +VK_DEFINE_HANDLE(VmaAllocation) +VK_DEFINE_HANDLE(VmaAllocator) + +struct VmaAllocationInfo; + +namespace VideoCore { + +/// Hints and requirements for the backing memory type of a commit +enum class MemoryUsage { + DeviceLocal, ///< Requests device local buffer. + Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads + Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks + Stream, ///< Requests device local host visible buffer, falling back host memory. +}; + +constexpr vk::BufferUsageFlags ReadFlags = + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer | + vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer; + +constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eStorageTexelBuffer | + vk::BufferUsageFlagBits::eStorageBuffer; + +struct UniqueBuffer { + explicit UniqueBuffer(vk::Device device, VmaAllocator allocator); + ~UniqueBuffer(); + + UniqueBuffer(const UniqueBuffer&) = delete; + UniqueBuffer& operator=(const UniqueBuffer&) = delete; + + UniqueBuffer(UniqueBuffer&& other) + : allocator{std::exchange(other.allocator, VK_NULL_HANDLE)}, + allocation{std::exchange(other.allocation, VK_NULL_HANDLE)}, + buffer{std::exchange(other.buffer, VK_NULL_HANDLE)} {} + UniqueBuffer& operator=(UniqueBuffer&& other) { + buffer = std::exchange(other.buffer, VK_NULL_HANDLE); + allocator = std::exchange(other.allocator, VK_NULL_HANDLE); + allocation = std::exchange(other.allocation, VK_NULL_HANDLE); + return *this; + } + + void Create(const vk::BufferCreateInfo& image_ci, MemoryUsage usage, + VmaAllocationInfo* out_alloc_info); + + operator vk::Buffer() const { + return buffer; + } + + vk::Device device; + VmaAllocator allocator; + VmaAllocation allocation; + vk::Buffer buffer{}; +}; + +class Buffer { +public: + explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags, + u64 size_bytes_); + + Buffer& operator=(const Buffer&) = delete; + Buffer(const Buffer&) = delete; + + Buffer& operator=(Buffer&&) = default; + Buffer(Buffer&&) = default; + + vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt, + AmdGpu::NumberFormat nfmt); + + /// Increases the likeliness of this being a stream buffer + void IncreaseStreamScore(int score) noexcept { + stream_score += score; + } + + /// Returns the likeliness of this being a stream buffer + [[nodiscard]] int StreamScore() const noexcept { + return stream_score; + } + + /// Returns true when vaddr -> vaddr+size is fully contained in the buffer + [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { + return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); + } + + /// Returns the base CPU address of the buffer + [[nodiscard]] VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + /// Returns the offset relative to the given CPU address + [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept { + return static_cast(other_cpu_addr - cpu_addr); + } + + size_t SizeBytes() const { + return size_bytes; + } + + vk::Buffer Handle() const noexcept { + return buffer; + } + + std::optional GetBarrier(vk::AccessFlagBits2 dst_acess_mask, + vk::PipelineStageFlagBits2 dst_stage) { + if (dst_acess_mask == access_mask && stage == dst_stage) { + return {}; + } + + auto barrier = vk::BufferMemoryBarrier2{ + .srcStageMask = stage, + .srcAccessMask = access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_acess_mask, + .buffer = buffer.buffer, + .size = size_bytes, + }; + access_mask = dst_acess_mask; + stage = dst_stage; + return barrier; + } + +public: + VAddr cpu_addr = 0; + bool is_picked{}; + bool is_coherent{}; + bool is_deleted{}; + int stream_score = 0; + size_t size_bytes = 0; + std::span mapped_data; + const Vulkan::Instance* instance; + Vulkan::Scheduler* scheduler; + MemoryUsage usage; + UniqueBuffer buffer; + vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; + vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; +}; + +class StreamBuffer : public Buffer { +public: + explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + MemoryUsage usage, u64 size_bytes_); + + /// Reserves a region of memory from the stream buffer. + std::pair Map(u64 size, u64 alignment = 0); + + /// Ensures that reserved bytes of memory are available to the GPU. + void Commit(); + + /// Maps and commits a memory region with user provided data + u64 Copy(VAddr src, size_t size, size_t alignment = 0) { + const auto [data, offset] = Map(size, alignment); + std::memcpy(data, reinterpret_cast(src), size); + Commit(); + return offset; + } + + u64 GetFreeSize() const { + return size_bytes - offset - mapped_size; + } + +private: + struct Watch { + u64 tick{}; + u64 upper_bound{}; + }; + + /// Increases the amount of watches available. + void ReserveWatches(std::vector& watches, std::size_t grow_size); + + /// Waits pending watches until requested upper bound. + void WaitPendingOperations(u64 requested_upper_bound); + +private: + u64 offset{}; + u64 mapped_size{}; + std::vector current_watches; + std::size_t current_watch_cursor{}; + std::optional invalidation_mark; + std::vector previous_watches; + std::size_t wait_cursor{}; + u64 wait_bound{}; +}; + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 75f06365..59c1e0bc 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -1,696 +1,696 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include "common/alignment.h" -#include "common/scope_exit.h" -#include "common/types.h" -#include "shader_recompiler/frontend/fetch_shader.h" -#include "shader_recompiler/info.h" -#include "video_core/amdgpu/liverpool.h" -#include "video_core/buffer_cache/buffer_cache.h" -#include "video_core/renderer_vulkan/liverpool_to_vk.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/texture_cache/texture_cache.h" - -namespace VideoCore { - -static constexpr size_t NumVertexBuffers = 32; -static constexpr size_t GdsBufferSize = 64_KB; -static constexpr size_t StagingBufferSize = 1_GB; -static constexpr size_t UboStreamBufferSize = 64_MB; - -BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, - AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, - PageManager& tracker_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, - texture_cache{texture_cache_}, tracker{tracker_}, - staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, - stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, - gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize}, - memory_tracker{&tracker} { - Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); - - // Ensure the first slot is used for the null buffer - const auto null_id = - slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1); - ASSERT(null_id.index == 0); - const vk::Buffer& null_buffer = slot_buffers[null_id].buffer; - Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer"); - - const vk::BufferViewCreateInfo null_view_ci = { - .buffer = null_buffer, - .format = vk::Format::eR8Unorm, - .offset = 0, - .range = VK_WHOLE_SIZE, - }; - const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci); - ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view."); - null_buffer_view = null_view; - Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View"); -} - -BufferCache::~BufferCache() = default; - -void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { - std::scoped_lock lk{mutex}; - const bool is_tracked = IsRegionRegistered(device_addr, size); - if (!is_tracked) { - return; - } - // Mark the page as CPU modified to stop tracking writes. - SCOPE_EXIT { - memory_tracker.MarkRegionAsCpuModified(device_addr, size); - }; - if (!memory_tracker.IsRegionGpuModified(device_addr, size)) { - // Page has not been modified by the GPU, nothing to do. - return; - } -} - -void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) { - boost::container::small_vector copies; - u64 total_size_bytes = 0; - memory_tracker.ForEachDownloadRange( - device_addr, size, [&](u64 device_addr_out, u64 range_size) { - const VAddr buffer_addr = buffer.CpuAddr(); - const auto add_download = [&](VAddr start, VAddr end) { - const u64 new_offset = start - buffer_addr; - const u64 new_size = end - start; - copies.push_back(vk::BufferCopy{ - .srcOffset = new_offset, - .dstOffset = total_size_bytes, - .size = new_size, - }); - total_size_bytes += new_size; - }; - gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download); - gpu_modified_ranges.Subtract(device_addr_out, range_size); - }); - if (total_size_bytes == 0) { - return; - } - const auto [staging, offset] = staging_buffer.Map(total_size_bytes); - for (auto& copy : copies) { - // Modify copies to have the staging offset in mind - copy.dstOffset += offset; - } - staging_buffer.Commit(); - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyBuffer(buffer.buffer, staging_buffer.Handle(), copies); - scheduler.Finish(); - for (const auto& copy : copies) { - const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; - const u64 dst_offset = copy.dstOffset - offset; - std::memcpy(std::bit_cast(copy_device_addr), staging + dst_offset, copy.size); - } -} - -bool BufferCache::BindVertexBuffers( - const Shader::Info& vs_info, const std::optional& fetch_shader) { - boost::container::small_vector attributes; - boost::container::small_vector bindings; - SCOPE_EXIT { - if (instance.IsVertexInputDynamicState()) { - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.setVertexInputEXT(bindings, attributes); - } else if (bindings.empty()) { - // Required to call bindVertexBuffers2EXT at least once in the current command buffer - // with non-null strides without a non-dynamic stride pipeline in between. Thus even - // when nothing is bound we still need to make a dummy call. Non-null strides in turn - // requires a count greater than 0. - const auto cmdbuf = scheduler.CommandBuffer(); - const std::array null_buffers = {GetBuffer(NULL_BUFFER_ID).buffer.buffer}; - constexpr std::array null_offsets = {static_cast(0)}; - cmdbuf.bindVertexBuffers2EXT(0, null_buffers, null_offsets, null_offsets, null_offsets); - } - }; - - if (!fetch_shader || fetch_shader->attributes.empty()) { - return false; - } - - std::array host_buffers; - std::array host_offsets; - std::array host_sizes; - std::array host_strides; - boost::container::static_vector guest_buffers; - - struct BufferRange { - VAddr base_address; - VAddr end_address; - vk::Buffer vk_buffer; - u64 offset; - - size_t GetSize() const { - return end_address - base_address; - } - }; - - // Calculate buffers memory overlaps - bool has_step_rate = false; - boost::container::static_vector ranges{}; - for (const auto& attrib : fetch_shader->attributes) { - if (attrib.UsesStepRates()) { - has_step_rate = true; - continue; - } - - const auto& buffer = attrib.GetSharp(vs_info); - if (buffer.GetSize() == 0) { - continue; - } - guest_buffers.emplace_back(buffer); - ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); - attributes.push_back({ - .location = attrib.semantic, - .binding = attrib.semantic, - .format = - Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), - .offset = 0, - }); - bindings.push_back({ - .binding = attrib.semantic, - .stride = buffer.GetStride(), - .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None - ? vk::VertexInputRate::eVertex - : vk::VertexInputRate::eInstance, - .divisor = 1, - }); - } - if (ranges.empty()) { - return false; - } - - std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) { - return lhv.base_address < rhv.base_address; - }); - - boost::container::static_vector ranges_merged{ranges[0]}; - for (auto range : ranges) { - auto& prev_range = ranges_merged.back(); - if (prev_range.end_address < range.base_address) { - ranges_merged.emplace_back(range); - } else { - prev_range.end_address = std::max(prev_range.end_address, range.end_address); - } - } - - // Map buffers - for (auto& range : ranges_merged) { - const auto [buffer, offset] = ObtainBuffer(range.base_address, range.GetSize(), false); - range.vk_buffer = buffer->buffer; - range.offset = offset; - } - - // Bind vertex buffers - const size_t num_buffers = guest_buffers.size(); - for (u32 i = 0; i < num_buffers; ++i) { - const auto& buffer = guest_buffers[i]; - const auto host_buffer = std::ranges::find_if(ranges_merged, [&](const BufferRange& range) { - return (buffer.base_address >= range.base_address && - buffer.base_address < range.end_address); - }); - ASSERT(host_buffer != ranges_merged.cend()); - - host_buffers[i] = host_buffer->vk_buffer; - host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address; - host_sizes[i] = buffer.GetSize(); - host_strides[i] = buffer.GetStride(); - } - - if (num_buffers > 0) { - const auto cmdbuf = scheduler.CommandBuffer(); - if (instance.IsVertexInputDynamicState()) { - cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data()); - } else { - cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(), - host_sizes.data(), host_strides.data()); - } - } - - return has_step_rate; -} - -u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { - // Emulate QuadList and Polygon primitive types with CPU made index buffer. - const auto& regs = liverpool->regs; - if (!is_indexed) { - if (regs.primitive_type != AmdGpu::PrimitiveType::Polygon) { - return regs.num_indices; - } - - // Emit indices. - const u32 index_size = 3 * regs.num_indices; - const auto [data, offset] = stream_buffer.Map(index_size); - Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); - stream_buffer.Commit(); - - // Bind index buffer. - is_indexed = true; - - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, vk::IndexType::eUint16); - return index_size / sizeof(u16); - } - - // Figure out index type and size. - const bool is_index16 = - regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16; - const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; - const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); - VAddr index_address = regs.index_base_address.Address(); - index_address += index_offset * index_size; - - if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - UNREACHABLE(); - } - - // Bind index buffer. - const u32 index_buffer_size = regs.num_indices * index_size; - const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); - return regs.num_indices; -} - -void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { - ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); - if (!is_gds && !IsRegionRegistered(address, num_bytes)) { - memcpy(std::bit_cast(address), value, num_bytes); - return; - } - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - const Buffer* buffer = [&] { - if (is_gds) { - return &gds_buffer; - } - const BufferId buffer_id = FindBuffer(address, num_bytes); - return &slot_buffers[buffer_id]; - }(); - const vk::BufferMemoryBarrier2 buf_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = buffer->Handle(), - .offset = buffer->Offset(address), - .size = num_bytes, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier, - }); - cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); -} - -std::pair BufferCache::ObtainHostUBO(std::span data) { - static constexpr u64 StreamThreshold = CACHING_PAGESIZE; - ASSERT(data.size_bytes() <= StreamThreshold); - const u64 offset = stream_buffer.Copy(reinterpret_cast(data.data()), data.size_bytes(), - instance.UniformMinAlignment()); - return {&stream_buffer, offset}; -} - -std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, - bool is_texel_buffer, BufferId buffer_id) { - // For small uniform buffers that have not been modified by gpu - // use device local stream buffer to reduce renderpass breaks. - static constexpr u64 StreamThreshold = CACHING_PAGESIZE; - const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); - if (!is_written && size <= StreamThreshold && !is_gpu_dirty) { - const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); - return {&stream_buffer, offset}; - } - - if (!buffer_id || slot_buffers[buffer_id].is_deleted) { - buffer_id = FindBuffer(device_addr, size); - } - Buffer& buffer = slot_buffers[buffer_id]; - SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer); - if (is_written) { - memory_tracker.MarkRegionAsGpuModified(device_addr, size); - gpu_modified_ranges.Add(device_addr, size); - } - return {&buffer, buffer.Offset(device_addr)}; -} - -std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) { - // Check if any buffer contains the full requested range. - const u64 page = gpu_addr >> CACHING_PAGEBITS; - const BufferId buffer_id = page_table[page]; - if (buffer_id) { - Buffer& buffer = slot_buffers[buffer_id]; - if (buffer.IsInBounds(gpu_addr, size)) { - SynchronizeBuffer(buffer, gpu_addr, size, false); - return {&buffer, buffer.Offset(gpu_addr)}; - } - } - // If no buffer contains the full requested range but some buffer within was GPU-modified, - // fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications. - // This is only done if the request prefers to use GPU memory, otherwise we can skip it. - if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) { - return ObtainBuffer(gpu_addr, size, false, false); - } - // In all other cases, just do a CPU copy to the staging buffer. - const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); - return {&staging_buffer, offset}; -} - -bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { - const VAddr end_addr = addr + size; - const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); - for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { - const BufferId buffer_id = page_table[page]; - if (!buffer_id) { - ++page; - continue; - } - Buffer& buffer = slot_buffers[buffer_id]; - const VAddr buf_start_addr = buffer.CpuAddr(); - const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); - if (buf_start_addr < end_addr && addr < buf_end_addr) { - return true; - } - page = Common::DivCeil(buf_end_addr, CACHING_PAGESIZE); - } - return false; -} - -bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { - return memory_tracker.IsRegionCpuModified(addr, size); -} - -bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { - return memory_tracker.IsRegionGpuModified(addr, size); -} - -BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { - if (device_addr == 0) { - return NULL_BUFFER_ID; - } - const u64 page = device_addr >> CACHING_PAGEBITS; - const BufferId buffer_id = page_table[page]; - if (!buffer_id) { - return CreateBuffer(device_addr, size); - } - const Buffer& buffer = slot_buffers[buffer_id]; - if (buffer.IsInBounds(device_addr, size)) { - return buffer_id; - } - return CreateBuffer(device_addr, size); -} - -BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) { - static constexpr int STREAM_LEAP_THRESHOLD = 16; - boost::container::small_vector overlap_ids; - VAddr begin = device_addr; - VAddr end = device_addr + wanted_size; - int stream_score = 0; - bool has_stream_leap = false; - const auto expand_begin = [&](VAddr add_value) { - static constexpr VAddr min_page = CACHING_PAGESIZE + DEVICE_PAGESIZE; - if (add_value > begin - min_page) { - begin = min_page; - device_addr = DEVICE_PAGESIZE; - return; - } - begin -= add_value; - device_addr = begin - CACHING_PAGESIZE; - }; - const auto expand_end = [&](VAddr add_value) { - static constexpr VAddr max_page = 1ULL << MemoryTracker::MAX_CPU_PAGE_BITS; - if (add_value > max_page - end) { - end = max_page; - return; - } - end += add_value; - }; - if (begin == 0) { - return OverlapResult{ - .ids = std::move(overlap_ids), - .begin = begin, - .end = end, - .has_stream_leap = has_stream_leap, - }; - } - for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); - device_addr += CACHING_PAGESIZE) { - const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS]; - if (!overlap_id) { - continue; - } - Buffer& overlap = slot_buffers[overlap_id]; - if (overlap.is_picked) { - continue; - } - overlap_ids.push_back(overlap_id); - overlap.is_picked = true; - const VAddr overlap_device_addr = overlap.CpuAddr(); - const bool expands_left = overlap_device_addr < begin; - if (expands_left) { - begin = overlap_device_addr; - } - const VAddr overlap_end = overlap_device_addr + overlap.SizeBytes(); - const bool expands_right = overlap_end > end; - if (overlap_end > end) { - end = overlap_end; - } - stream_score += overlap.StreamScore(); - if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) { - // When this memory region has been joined a bunch of times, we assume it's being used - // as a stream buffer. Increase the size to skip constantly recreating buffers. - has_stream_leap = true; - if (expands_right) { - expand_begin(CACHING_PAGESIZE * 128); - } - if (expands_left) { - expand_end(CACHING_PAGESIZE * 128); - } - } - } - return OverlapResult{ - .ids = std::move(overlap_ids), - .begin = begin, - .end = end, - .has_stream_leap = has_stream_leap, - }; -} - -void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, - bool accumulate_stream_score) { - Buffer& new_buffer = slot_buffers[new_buffer_id]; - Buffer& overlap = slot_buffers[overlap_id]; - if (accumulate_stream_score) { - new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); - } - const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); - const vk::BufferCopy copy = { - .srcOffset = 0, - .dstOffset = dst_base_offset, - .size = overlap.SizeBytes(), - }; - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, - }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); - cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); - DeleteBuffer(overlap_id); -} - -BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { - const VAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE); - device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE); - wanted_size = static_cast(device_addr_end - device_addr); - const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); - const u32 size = static_cast(overlap.end - overlap.begin); - const BufferId new_buffer_id = slot_buffers.insert( - instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); - auto& new_buffer = slot_buffers[new_buffer_id]; - const size_t size_bytes = new_buffer.SizeBytes(); - const auto cmdbuf = scheduler.CommandBuffer(); - scheduler.EndRendering(); - cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0); - for (const BufferId overlap_id : overlap.ids) { - JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); - } - Register(new_buffer_id); - return new_buffer_id; -} - -void BufferCache::Register(BufferId buffer_id) { - ChangeRegister(buffer_id); -} - -void BufferCache::Unregister(BufferId buffer_id) { - ChangeRegister(buffer_id); -} - -template -void BufferCache::ChangeRegister(BufferId buffer_id) { - Buffer& buffer = slot_buffers[buffer_id]; - const auto size = buffer.SizeBytes(); - const VAddr device_addr_begin = buffer.CpuAddr(); - const VAddr device_addr_end = device_addr_begin + size; - const u64 page_begin = device_addr_begin / CACHING_PAGESIZE; - const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE); - for (u64 page = page_begin; page != page_end; ++page) { - if constexpr (insert) { - page_table[page] = buffer_id; - } else { - page_table[page] = BufferId{}; - } - } -} - -void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, - bool is_texel_buffer) { - std::scoped_lock lk{mutex}; - boost::container::small_vector copies; - u64 total_size_bytes = 0; - u64 largest_copy = 0; - VAddr buffer_start = buffer.CpuAddr(); - memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { - copies.push_back(vk::BufferCopy{ - .srcOffset = total_size_bytes, - .dstOffset = device_addr_out - buffer_start, - .size = range_size, - }); - total_size_bytes += range_size; - largest_copy = std::max(largest_copy, range_size); - }); - SCOPE_EXIT { - if (is_texel_buffer) { - SynchronizeBufferFromImage(buffer, device_addr, size); - } - }; - if (total_size_bytes == 0) { - return; - } - vk::Buffer src_buffer = staging_buffer.Handle(); - if (total_size_bytes < StagingBufferSize) { - const auto [staging, offset] = staging_buffer.Map(total_size_bytes); - for (auto& copy : copies) { - u8* const src_pointer = staging + copy.srcOffset; - const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; - std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); - // Apply the staging offset - copy.srcOffset += offset; - } - staging_buffer.Commit(); - } else { - // For large one time transfers use a temporary host buffer. - // RenderDoc can lag quite a bit if the stream buffer is too large. - Buffer temp_buffer{instance, - scheduler, - MemoryUsage::Upload, - 0, - vk::BufferUsageFlagBits::eTransferSrc, - total_size_bytes}; - src_buffer = temp_buffer.Handle(); - u8* const staging = temp_buffer.mapped_data.data(); - for (auto& copy : copies) { - u8* const src_pointer = staging + copy.srcOffset; - const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; - std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); - } - scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {}); - } - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, - }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); - cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); -} - -bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { - static constexpr FindFlags find_flags = - FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; - TextureCache::BaseDesc desc{}; - desc.info.guest_address = device_addr; - desc.info.guest_size_bytes = size; - const ImageId image_id = texture_cache.FindImage(desc, find_flags); - if (!image_id) { - return false; - } - Image& image = texture_cache.GetImage(image_id); - if (False(image.flags & ImageFlagBits::GpuModified)) { - return false; - } - ASSERT_MSG(device_addr == image.info.guest_address, - "Texel buffer aliases image subresources {:x} : {:x}", device_addr, - image.info.guest_address); - boost::container::small_vector copies; - u32 offset = buffer.Offset(image.info.guest_address); - const u32 num_layers = image.info.resources.layers; - const u32 max_offset = offset + size; - for (u32 m = 0; m < image.info.resources.levels; m++) { - const u32 width = std::max(image.info.size.width >> m, 1u); - const u32 height = std::max(image.info.size.height >> m, 1u); - const u32 depth = - image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - offset += mip_ofs * num_layers; - if (offset + (mip_size * num_layers) > max_offset) { - break; - } - copies.push_back({ - .bufferOffset = offset, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), - .imageSubresource{ - .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, - .mipLevel = m, - .baseArrayLayer = 0, - .layerCount = num_layers, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, depth}, - }); - } - if (!copies.empty()) { - scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, - copies); - } - return true; -} - -void BufferCache::DeleteBuffer(BufferId buffer_id) { - Buffer& buffer = slot_buffers[buffer_id]; - Unregister(buffer_id); - scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); }); - buffer.is_deleted = true; -} - -} // namespace VideoCore +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/alignment.h" +#include "common/scope_exit.h" +#include "common/types.h" +#include "shader_recompiler/frontend/fetch_shader.h" +#include "shader_recompiler/info.h" +#include "video_core/amdgpu/liverpool.h" +#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace VideoCore { + +static constexpr size_t NumVertexBuffers = 32; +static constexpr size_t GdsBufferSize = 64_KB; +static constexpr size_t StagingBufferSize = 1_GB; +static constexpr size_t UboStreamBufferSize = 64_MB; + +BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, + AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, + PageManager& tracker_) + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, + texture_cache{texture_cache_}, tracker{tracker_}, + staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, + stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, + gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize}, + memory_tracker{&tracker} { + Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); + + // Ensure the first slot is used for the null buffer + const auto null_id = + slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1); + ASSERT(null_id.index == 0); + const vk::Buffer& null_buffer = slot_buffers[null_id].buffer; + Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer"); + + const vk::BufferViewCreateInfo null_view_ci = { + .buffer = null_buffer, + .format = vk::Format::eR8Unorm, + .offset = 0, + .range = VK_WHOLE_SIZE, + }; + const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci); + ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view."); + null_buffer_view = null_view; + Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View"); +} + +BufferCache::~BufferCache() = default; + +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { + std::scoped_lock lk{mutex}; + const bool is_tracked = IsRegionRegistered(device_addr, size); + if (!is_tracked) { + return; + } + // Mark the page as CPU modified to stop tracking writes. + SCOPE_EXIT { + memory_tracker.MarkRegionAsCpuModified(device_addr, size); + }; + if (!memory_tracker.IsRegionGpuModified(device_addr, size)) { + // Page has not been modified by the GPU, nothing to do. + return; + } +} + +void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) { + boost::container::small_vector copies; + u64 total_size_bytes = 0; + memory_tracker.ForEachDownloadRange( + device_addr, size, [&](u64 device_addr_out, u64 range_size) { + const VAddr buffer_addr = buffer.CpuAddr(); + const auto add_download = [&](VAddr start, VAddr end) { + const u64 new_offset = start - buffer_addr; + const u64 new_size = end - start; + copies.push_back(vk::BufferCopy{ + .srcOffset = new_offset, + .dstOffset = total_size_bytes, + .size = new_size, + }); + total_size_bytes += new_size; + }; + gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download); + gpu_modified_ranges.Subtract(device_addr_out, range_size); + }); + if (total_size_bytes == 0) { + return; + } + const auto [staging, offset] = staging_buffer.Map(total_size_bytes); + for (auto& copy : copies) { + // Modify copies to have the staging offset in mind + copy.dstOffset += offset; + } + staging_buffer.Commit(); + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyBuffer(buffer.buffer, staging_buffer.Handle(), copies); + scheduler.Finish(); + for (const auto& copy : copies) { + const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; + const u64 dst_offset = copy.dstOffset - offset; + std::memcpy(std::bit_cast(copy_device_addr), staging + dst_offset, copy.size); + } +} + +bool BufferCache::BindVertexBuffers( + const Shader::Info& vs_info, const std::optional& fetch_shader) { + boost::container::small_vector attributes; + boost::container::small_vector bindings; + SCOPE_EXIT { + if (instance.IsVertexInputDynamicState()) { + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.setVertexInputEXT(bindings, attributes); + } else if (bindings.empty()) { + // Required to call bindVertexBuffers2EXT at least once in the current command buffer + // with non-null strides without a non-dynamic stride pipeline in between. Thus even + // when nothing is bound we still need to make a dummy call. Non-null strides in turn + // requires a count greater than 0. + const auto cmdbuf = scheduler.CommandBuffer(); + const std::array null_buffers = {GetBuffer(NULL_BUFFER_ID).buffer.buffer}; + constexpr std::array null_offsets = {static_cast(0)}; + cmdbuf.bindVertexBuffers2EXT(0, null_buffers, null_offsets, null_offsets, null_offsets); + } + }; + + if (!fetch_shader || fetch_shader->attributes.empty()) { + return false; + } + + std::array host_buffers; + std::array host_offsets; + std::array host_sizes; + std::array host_strides; + boost::container::static_vector guest_buffers; + + struct BufferRange { + VAddr base_address; + VAddr end_address; + vk::Buffer vk_buffer; + u64 offset; + + size_t GetSize() const { + return end_address - base_address; + } + }; + + // Calculate buffers memory overlaps + bool has_step_rate = false; + boost::container::static_vector ranges{}; + for (const auto& attrib : fetch_shader->attributes) { + if (attrib.UsesStepRates()) { + has_step_rate = true; + continue; + } + + const auto& buffer = attrib.GetSharp(vs_info); + if (buffer.GetSize() == 0) { + continue; + } + guest_buffers.emplace_back(buffer); + ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); + attributes.push_back({ + .location = attrib.semantic, + .binding = attrib.semantic, + .format = + Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), + .offset = 0, + }); + bindings.push_back({ + .binding = attrib.semantic, + .stride = buffer.GetStride(), + .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None + ? vk::VertexInputRate::eVertex + : vk::VertexInputRate::eInstance, + .divisor = 1, + }); + } + if (ranges.empty()) { + return false; + } + + std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) { + return lhv.base_address < rhv.base_address; + }); + + boost::container::static_vector ranges_merged{ranges[0]}; + for (auto range : ranges) { + auto& prev_range = ranges_merged.back(); + if (prev_range.end_address < range.base_address) { + ranges_merged.emplace_back(range); + } else { + prev_range.end_address = std::max(prev_range.end_address, range.end_address); + } + } + + // Map buffers + for (auto& range : ranges_merged) { + const auto [buffer, offset] = ObtainBuffer(range.base_address, range.GetSize(), false); + range.vk_buffer = buffer->buffer; + range.offset = offset; + } + + // Bind vertex buffers + const size_t num_buffers = guest_buffers.size(); + for (u32 i = 0; i < num_buffers; ++i) { + const auto& buffer = guest_buffers[i]; + const auto host_buffer = std::ranges::find_if(ranges_merged, [&](const BufferRange& range) { + return (buffer.base_address >= range.base_address && + buffer.base_address < range.end_address); + }); + ASSERT(host_buffer != ranges_merged.cend()); + + host_buffers[i] = host_buffer->vk_buffer; + host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address; + host_sizes[i] = buffer.GetSize(); + host_strides[i] = buffer.GetStride(); + } + + if (num_buffers > 0) { + const auto cmdbuf = scheduler.CommandBuffer(); + if (instance.IsVertexInputDynamicState()) { + cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data()); + } else { + cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(), + host_sizes.data(), host_strides.data()); + } + } + + return has_step_rate; +} + +u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { + // Emulate QuadList and Polygon primitive types with CPU made index buffer. + const auto& regs = liverpool->regs; + if (!is_indexed) { + if (regs.primitive_type != AmdGpu::PrimitiveType::Polygon) { + return regs.num_indices; + } + + // Emit indices. + const u32 index_size = 3 * regs.num_indices; + const auto [data, offset] = stream_buffer.Map(index_size); + Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); + stream_buffer.Commit(); + + // Bind index buffer. + is_indexed = true; + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, vk::IndexType::eUint16); + return index_size / sizeof(u16); + } + + // Figure out index type and size. + const bool is_index16 = + regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16; + const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; + const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); + VAddr index_address = regs.index_base_address.Address(); + index_address += index_offset * index_size; + + if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { + UNREACHABLE(); + } + + // Bind index buffer. + const u32 index_buffer_size = regs.num_indices * index_size; + const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); + return regs.num_indices; +} + +void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { + ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); + if (!is_gds && !IsRegionRegistered(address, num_bytes)) { + memcpy(std::bit_cast(address), value, num_bytes); + return; + } + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + const Buffer* buffer = [&] { + if (is_gds) { + return &gds_buffer; + } + const BufferId buffer_id = FindBuffer(address, num_bytes); + return &slot_buffers[buffer_id]; + }(); + const vk::BufferMemoryBarrier2 buf_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = buffer->Handle(), + .offset = buffer->Offset(address), + .size = num_bytes, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &buf_barrier, + }); + cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); +} + +std::pair BufferCache::ObtainHostUBO(std::span data) { + static constexpr u64 StreamThreshold = CACHING_PAGESIZE; + ASSERT(data.size_bytes() <= StreamThreshold); + const u64 offset = stream_buffer.Copy(reinterpret_cast(data.data()), data.size_bytes(), + instance.UniformMinAlignment()); + return {&stream_buffer, offset}; +} + +std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, + bool is_texel_buffer, BufferId buffer_id) { + // For small uniform buffers that have not been modified by gpu + // use device local stream buffer to reduce renderpass breaks. + static constexpr u64 StreamThreshold = CACHING_PAGESIZE; + const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); + if (!is_written && size <= StreamThreshold && !is_gpu_dirty) { + const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); + return {&stream_buffer, offset}; + } + + if (!buffer_id || slot_buffers[buffer_id].is_deleted) { + buffer_id = FindBuffer(device_addr, size); + } + Buffer& buffer = slot_buffers[buffer_id]; + SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer); + if (is_written) { + memory_tracker.MarkRegionAsGpuModified(device_addr, size); + gpu_modified_ranges.Add(device_addr, size); + } + return {&buffer, buffer.Offset(device_addr)}; +} + +std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) { + // Check if any buffer contains the full requested range. + const u64 page = gpu_addr >> CACHING_PAGEBITS; + const BufferId buffer_id = page_table[page]; + if (buffer_id) { + Buffer& buffer = slot_buffers[buffer_id]; + if (buffer.IsInBounds(gpu_addr, size)) { + SynchronizeBuffer(buffer, gpu_addr, size, false); + return {&buffer, buffer.Offset(gpu_addr)}; + } + } + // If no buffer contains the full requested range but some buffer within was GPU-modified, + // fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications. + // This is only done if the request prefers to use GPU memory, otherwise we can skip it. + if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) { + return ObtainBuffer(gpu_addr, size, false, false); + } + // In all other cases, just do a CPU copy to the staging buffer. + const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); + return {&staging_buffer, offset}; +} + +bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { + const VAddr end_addr = addr + size; + const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); + for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + ++page; + continue; + } + Buffer& buffer = slot_buffers[buffer_id]; + const VAddr buf_start_addr = buffer.CpuAddr(); + const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); + if (buf_start_addr < end_addr && addr < buf_end_addr) { + return true; + } + page = Common::DivCeil(buf_end_addr, CACHING_PAGESIZE); + } + return false; +} + +bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { + return memory_tracker.IsRegionCpuModified(addr, size); +} + +bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { + return memory_tracker.IsRegionGpuModified(addr, size); +} + +BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { + if (device_addr == 0) { + return NULL_BUFFER_ID; + } + const u64 page = device_addr >> CACHING_PAGEBITS; + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + return CreateBuffer(device_addr, size); + } + const Buffer& buffer = slot_buffers[buffer_id]; + if (buffer.IsInBounds(device_addr, size)) { + return buffer_id; + } + return CreateBuffer(device_addr, size); +} + +BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) { + static constexpr int STREAM_LEAP_THRESHOLD = 16; + boost::container::small_vector overlap_ids; + VAddr begin = device_addr; + VAddr end = device_addr + wanted_size; + int stream_score = 0; + bool has_stream_leap = false; + const auto expand_begin = [&](VAddr add_value) { + static constexpr VAddr min_page = CACHING_PAGESIZE + DEVICE_PAGESIZE; + if (add_value > begin - min_page) { + begin = min_page; + device_addr = DEVICE_PAGESIZE; + return; + } + begin -= add_value; + device_addr = begin - CACHING_PAGESIZE; + }; + const auto expand_end = [&](VAddr add_value) { + static constexpr VAddr max_page = 1ULL << MemoryTracker::MAX_CPU_PAGE_BITS; + if (add_value > max_page - end) { + end = max_page; + return; + } + end += add_value; + }; + if (begin == 0) { + return OverlapResult{ + .ids = std::move(overlap_ids), + .begin = begin, + .end = end, + .has_stream_leap = has_stream_leap, + }; + } + for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); + device_addr += CACHING_PAGESIZE) { + const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS]; + if (!overlap_id) { + continue; + } + Buffer& overlap = slot_buffers[overlap_id]; + if (overlap.is_picked) { + continue; + } + overlap_ids.push_back(overlap_id); + overlap.is_picked = true; + const VAddr overlap_device_addr = overlap.CpuAddr(); + const bool expands_left = overlap_device_addr < begin; + if (expands_left) { + begin = overlap_device_addr; + } + const VAddr overlap_end = overlap_device_addr + overlap.SizeBytes(); + const bool expands_right = overlap_end > end; + if (overlap_end > end) { + end = overlap_end; + } + stream_score += overlap.StreamScore(); + if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) { + // When this memory region has been joined a bunch of times, we assume it's being used + // as a stream buffer. Increase the size to skip constantly recreating buffers. + has_stream_leap = true; + if (expands_right) { + expand_begin(CACHING_PAGESIZE * 128); + } + if (expands_left) { + expand_end(CACHING_PAGESIZE * 128); + } + } + } + return OverlapResult{ + .ids = std::move(overlap_ids), + .begin = begin, + .end = end, + .has_stream_leap = has_stream_leap, + }; +} + +void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, + bool accumulate_stream_score) { + Buffer& new_buffer = slot_buffers[new_buffer_id]; + Buffer& overlap = slot_buffers[overlap_id]; + if (accumulate_stream_score) { + new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); + } + const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); + const vk::BufferCopy copy = { + .srcOffset = 0, + .dstOffset = dst_base_offset, + .size = overlap.SizeBytes(), + }; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + static constexpr vk::MemoryBarrier READ_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + }; + static constexpr vk::MemoryBarrier WRITE_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + READ_BARRIER, {}, {}); + cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + DeleteBuffer(overlap_id); +} + +BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { + const VAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE); + device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE); + wanted_size = static_cast(device_addr_end - device_addr); + const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); + const u32 size = static_cast(overlap.end - overlap.begin); + const BufferId new_buffer_id = slot_buffers.insert( + instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); + auto& new_buffer = slot_buffers[new_buffer_id]; + const size_t size_bytes = new_buffer.SizeBytes(); + const auto cmdbuf = scheduler.CommandBuffer(); + scheduler.EndRendering(); + cmdbuf.fillBuffer(new_buffer.buffer, 0, size_bytes, 0); + for (const BufferId overlap_id : overlap.ids) { + JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); + } + Register(new_buffer_id); + return new_buffer_id; +} + +void BufferCache::Register(BufferId buffer_id) { + ChangeRegister(buffer_id); +} + +void BufferCache::Unregister(BufferId buffer_id) { + ChangeRegister(buffer_id); +} + +template +void BufferCache::ChangeRegister(BufferId buffer_id) { + Buffer& buffer = slot_buffers[buffer_id]; + const auto size = buffer.SizeBytes(); + const VAddr device_addr_begin = buffer.CpuAddr(); + const VAddr device_addr_end = device_addr_begin + size; + const u64 page_begin = device_addr_begin / CACHING_PAGESIZE; + const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE); + for (u64 page = page_begin; page != page_end; ++page) { + if constexpr (insert) { + page_table[page] = buffer_id; + } else { + page_table[page] = BufferId{}; + } + } +} + +void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, + bool is_texel_buffer) { + std::scoped_lock lk{mutex}; + boost::container::small_vector copies; + u64 total_size_bytes = 0; + u64 largest_copy = 0; + VAddr buffer_start = buffer.CpuAddr(); + memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { + copies.push_back(vk::BufferCopy{ + .srcOffset = total_size_bytes, + .dstOffset = device_addr_out - buffer_start, + .size = range_size, + }); + total_size_bytes += range_size; + largest_copy = std::max(largest_copy, range_size); + }); + SCOPE_EXIT { + if (is_texel_buffer) { + SynchronizeBufferFromImage(buffer, device_addr, size); + } + }; + if (total_size_bytes == 0) { + return; + } + vk::Buffer src_buffer = staging_buffer.Handle(); + if (total_size_bytes < StagingBufferSize) { + const auto [staging, offset] = staging_buffer.Map(total_size_bytes); + for (auto& copy : copies) { + u8* const src_pointer = staging + copy.srcOffset; + const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; + std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); + // Apply the staging offset + copy.srcOffset += offset; + } + staging_buffer.Commit(); + } else { + // For large one time transfers use a temporary host buffer. + // RenderDoc can lag quite a bit if the stream buffer is too large. + Buffer temp_buffer{instance, + scheduler, + MemoryUsage::Upload, + 0, + vk::BufferUsageFlagBits::eTransferSrc, + total_size_bytes}; + src_buffer = temp_buffer.Handle(); + u8* const staging = temp_buffer.mapped_data.data(); + for (auto& copy : copies) { + u8* const src_pointer = staging + copy.srcOffset; + const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; + std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); + } + scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {}); + } + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + static constexpr vk::MemoryBarrier READ_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + }; + static constexpr vk::MemoryBarrier WRITE_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + READ_BARRIER, {}, {}); + cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); +} + +bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { + static constexpr FindFlags find_flags = + FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; + TextureCache::BaseDesc desc{}; + desc.info.guest_address = device_addr; + desc.info.guest_size_bytes = size; + const ImageId image_id = texture_cache.FindImage(desc, find_flags); + if (!image_id) { + return false; + } + Image& image = texture_cache.GetImage(image_id); + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } + ASSERT_MSG(device_addr == image.info.guest_address, + "Texel buffer aliases image subresources {:x} : {:x}", device_addr, + image.info.guest_address); + boost::container::small_vector copies; + u32 offset = buffer.Offset(image.info.guest_address); + const u32 num_layers = image.info.resources.layers; + const u32 max_offset = offset + size; + for (u32 m = 0; m < image.info.resources.levels; m++) { + const u32 width = std::max(image.info.size.width >> m, 1u); + const u32 height = std::max(image.info.size.height >> m, 1u); + const u32 depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + offset += mip_ofs * num_layers; + if (offset + (mip_size * num_layers) > max_offset) { + break; + } + copies.push_back({ + .bufferOffset = offset, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = num_layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, depth}, + }); + } + if (!copies.empty()) { + scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + copies); + } + return true; +} + +void BufferCache::DeleteBuffer(BufferId buffer_id) { + Buffer& buffer = slot_buffers[buffer_id]; + Unregister(buffer_id); + scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); }); + buffer.is_deleted = true; +} + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e6291341..bcbaa45d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1,168 +1,168 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include -#include "common/div_ceil.h" -#include "common/slot_vector.h" -#include "common/types.h" -#include "video_core/buffer_cache/buffer.h" -#include "video_core/buffer_cache/memory_tracker_base.h" -#include "video_core/buffer_cache/range_set.h" -#include "video_core/multi_level_page_table.h" - -namespace AmdGpu { -struct Liverpool; -} - -namespace Shader { -namespace Gcn { -struct FetchShaderData; -} -struct Info; -} // namespace Shader - -namespace VideoCore { - -using BufferId = Common::SlotId; - -static constexpr BufferId NULL_BUFFER_ID{0}; - -class TextureCache; - -class BufferCache { -public: - static constexpr u32 CACHING_PAGEBITS = 12; - static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; - static constexpr u64 DEVICE_PAGESIZE = 4_KB; - - struct Traits { - using Entry = BufferId; - static constexpr size_t AddressSpaceBits = 40; - static constexpr size_t FirstLevelBits = 14; - static constexpr size_t PageBits = CACHING_PAGEBITS; - }; - using PageTable = MultiLevelPageTable; - - struct OverlapResult { - boost::container::small_vector ids; - VAddr begin; - VAddr end; - bool has_stream_leap = false; - }; - -public: - explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, - PageManager& tracker); - ~BufferCache(); - - /// Returns a pointer to GDS device local buffer. - [[nodiscard]] const Buffer* GetGdsBuffer() const noexcept { - return &gds_buffer; - } - - /// Retrieves the buffer with the specified id. - [[nodiscard]] Buffer& GetBuffer(BufferId id) { - return slot_buffers[id]; - } - - [[nodiscard]] vk::BufferView& NullBufferView() { - return null_buffer_view; - } - - /// Invalidates any buffer in the logical page range. - void InvalidateMemory(VAddr device_addr, u64 size); - - /// Binds host vertex buffers for the current draw. - bool BindVertexBuffers(const Shader::Info& vs_info, - const std::optional& fetch_shader); - - /// Bind host index buffer for the current draw. - u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); - - /// Writes a value to GPU buffer. - void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - - [[nodiscard]] std::pair ObtainHostUBO(std::span data); - - /// Obtains a buffer for the specified region. - [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, - bool is_texel_buffer = false, - BufferId buffer_id = {}); - - /// Attempts to obtain a buffer without modifying the cache contents. - [[nodiscard]] std::pair ObtainViewBuffer(VAddr gpu_addr, u32 size, - bool prefer_gpu); - - /// Return true when a region is registered on the cache - [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); - - /// Return true when a CPU region is modified from the CPU - [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); - - /// Return true when a CPU region is modified from the GPU - [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); - - [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); - -private: - template - void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { - const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); - for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { - const BufferId buffer_id = page_table[page]; - if (!buffer_id) { - ++page; - continue; - } - Buffer& buffer = slot_buffers[buffer_id]; - func(buffer_id, buffer); - - const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); - page = Common::DivCeil(end_addr, CACHING_PAGESIZE); - } - } - - void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size); - - [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); - - void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); - - [[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size); - - void Register(BufferId buffer_id); - - void Unregister(BufferId buffer_id); - - template - void ChangeRegister(BufferId buffer_id); - - void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer); - - bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); - - void DeleteBuffer(BufferId buffer_id); - - const Vulkan::Instance& instance; - Vulkan::Scheduler& scheduler; - AmdGpu::Liverpool* liverpool; - TextureCache& texture_cache; - PageManager& tracker; - StreamBuffer staging_buffer; - StreamBuffer stream_buffer; - Buffer gds_buffer; - std::mutex mutex; - Common::SlotVector slot_buffers; - RangeSet gpu_modified_ranges; - vk::BufferView null_buffer_view; - MemoryTracker memory_tracker; - PageTable page_table; -}; - -} // namespace VideoCore +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "common/div_ceil.h" +#include "common/slot_vector.h" +#include "common/types.h" +#include "video_core/buffer_cache/buffer.h" +#include "video_core/buffer_cache/memory_tracker_base.h" +#include "video_core/buffer_cache/range_set.h" +#include "video_core/multi_level_page_table.h" + +namespace AmdGpu { +struct Liverpool; +} + +namespace Shader { +namespace Gcn { +struct FetchShaderData; +} +struct Info; +} // namespace Shader + +namespace VideoCore { + +using BufferId = Common::SlotId; + +static constexpr BufferId NULL_BUFFER_ID{0}; + +class TextureCache; + +class BufferCache { +public: + static constexpr u32 CACHING_PAGEBITS = 12; + static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; + static constexpr u64 DEVICE_PAGESIZE = 4_KB; + + struct Traits { + using Entry = BufferId; + static constexpr size_t AddressSpaceBits = 40; + static constexpr size_t FirstLevelBits = 14; + static constexpr size_t PageBits = CACHING_PAGEBITS; + }; + using PageTable = MultiLevelPageTable; + + struct OverlapResult { + boost::container::small_vector ids; + VAddr begin; + VAddr end; + bool has_stream_leap = false; + }; + +public: + explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, + PageManager& tracker); + ~BufferCache(); + + /// Returns a pointer to GDS device local buffer. + [[nodiscard]] const Buffer* GetGdsBuffer() const noexcept { + return &gds_buffer; + } + + /// Retrieves the buffer with the specified id. + [[nodiscard]] Buffer& GetBuffer(BufferId id) { + return slot_buffers[id]; + } + + [[nodiscard]] vk::BufferView& NullBufferView() { + return null_buffer_view; + } + + /// Invalidates any buffer in the logical page range. + void InvalidateMemory(VAddr device_addr, u64 size); + + /// Binds host vertex buffers for the current draw. + bool BindVertexBuffers(const Shader::Info& vs_info, + const std::optional& fetch_shader); + + /// Bind host index buffer for the current draw. + u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); + + /// Writes a value to GPU buffer. + void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + + [[nodiscard]] std::pair ObtainHostUBO(std::span data); + + /// Obtains a buffer for the specified region. + [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, + bool is_texel_buffer = false, + BufferId buffer_id = {}); + + /// Attempts to obtain a buffer without modifying the cache contents. + [[nodiscard]] std::pair ObtainViewBuffer(VAddr gpu_addr, u32 size, + bool prefer_gpu); + + /// Return true when a region is registered on the cache + [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); + + /// Return true when a CPU region is modified from the CPU + [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + + [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); + +private: + template + void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { + const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); + for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + ++page; + continue; + } + Buffer& buffer = slot_buffers[buffer_id]; + func(buffer_id, buffer); + + const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); + page = Common::DivCeil(end_addr, CACHING_PAGESIZE); + } + } + + void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size); + + [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); + + void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); + + [[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size); + + void Register(BufferId buffer_id); + + void Unregister(BufferId buffer_id); + + template + void ChangeRegister(BufferId buffer_id); + + void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer); + + bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); + + void DeleteBuffer(BufferId buffer_id); + + const Vulkan::Instance& instance; + Vulkan::Scheduler& scheduler; + AmdGpu::Liverpool* liverpool; + TextureCache& texture_cache; + PageManager& tracker; + StreamBuffer staging_buffer; + StreamBuffer stream_buffer; + Buffer gds_buffer; + std::mutex mutex; + Common::SlotVector slot_buffers; + RangeSet gpu_modified_ranges; + vk::BufferView null_buffer_view; + MemoryTracker memory_tracker; + PageTable page_table; +}; + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index a59bcfff..ae61b55f 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -1,175 +1,175 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include -#include "common/types.h" -#include "video_core/buffer_cache/word_manager.h" - -namespace VideoCore { - -class MemoryTracker { -public: - static constexpr size_t MAX_CPU_PAGE_BITS = 40; - static constexpr size_t HIGHER_PAGE_BITS = 22; - static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; - static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; - static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); - static constexpr size_t MANAGER_POOL_SIZE = 32; - static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; - using Manager = WordManager; - -public: - explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} - ~MemoryTracker() = default; - - /// Returns true if a region has been modified from the CPU - [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { - return IteratePages( - query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { - return manager->template IsRegionModified(offset, size); - }); - } - - /// Returns true if a region has been modified from the GPU - [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { - return IteratePages( - query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { - return manager->template IsRegionModified(offset, size); - }); - } - - /// Mark region as CPU modified, notifying the device_tracker about this change - void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Unmark region as CPU modified, notifying the device_tracker about this change - void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Mark region as modified from the host GPU - void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Unmark region as modified from the host GPU - void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - template - void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { - IteratePages(query_cpu_range, query_size, - [&func](Manager* manager, u64 offset, size_t size) { - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - }); - } - - /// Call 'func' for each GPU modified range and unmark those pages as GPU modified - template - void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { - IteratePages(query_cpu_range, query_size, - [&func](Manager* manager, u64 offset, size_t size) { - if constexpr (clear) { - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - } else { - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - } - }); - } - -private: - /** - * @brief IteratePages Iterates L2 word manager page table. - * @param cpu_address Start byte cpu address - * @param size Size in bytes of the region of iterate. - * @param func Callback for each word manager. - * @return - */ - template - bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - std::size_t remaining_size{size}; - std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; - u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; - while (remaining_size > 0) { - const std::size_t copy_amount{ - std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; - auto* manager{top_tier[page_index]}; - if (manager) { - if constexpr (BOOL_BREAK) { - if (func(manager, page_offset, copy_amount)) { - return true; - } - } else { - func(manager, page_offset, copy_amount); - } - } else if constexpr (create_region_on_fail) { - CreateRegion(page_index); - manager = top_tier[page_index]; - if constexpr (BOOL_BREAK) { - if (func(manager, page_offset, copy_amount)) { - return true; - } - } else { - func(manager, page_offset, copy_amount); - } - } - page_index++; - page_offset = 0; - remaining_size -= copy_amount; - } - return false; - } - - void CreateRegion(std::size_t page_index) { - const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; - if (free_managers.empty()) { - manager_pool.emplace_back(); - auto& last_pool = manager_pool.back(); - for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { - std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE); - free_managers.push_back(&last_pool[i]); - } - } - // Each manager tracks a 4_MB virtual address space. - auto* new_manager = free_managers.back(); - new_manager->SetCpuAddress(base_cpu_addr); - free_managers.pop_back(); - top_tier[page_index] = new_manager; - } - - PageManager* tracker; - std::deque> manager_pool; - std::vector free_managers; - std::array top_tier{}; -}; - -} // namespace VideoCore +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "common/types.h" +#include "video_core/buffer_cache/word_manager.h" + +namespace VideoCore { + +class MemoryTracker { +public: + static constexpr size_t MAX_CPU_PAGE_BITS = 40; + static constexpr size_t HIGHER_PAGE_BITS = 22; + static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; + static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; + static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); + static constexpr size_t MANAGER_POOL_SIZE = 32; + static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; + using Manager = WordManager; + +public: + explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} + ~MemoryTracker() = default; + + /// Returns true if a region has been modified from the CPU + [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + return IteratePages( + query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + return manager->template IsRegionModified(offset, size); + }); + } + + /// Returns true if a region has been modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + return IteratePages( + query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + return manager->template IsRegionModified(offset, size); + }); + } + + /// Mark region as CPU modified, notifying the device_tracker about this change + void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { + IteratePages(dirty_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Unmark region as CPU modified, notifying the device_tracker about this change + void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { + IteratePages(dirty_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Mark region as modified from the host GPU + void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { + IteratePages(dirty_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Unmark region as modified from the host GPU + void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { + IteratePages(dirty_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified + template + void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + IteratePages(query_cpu_range, query_size, + [&func](Manager* manager, u64 offset, size_t size) { + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); + }); + } + + /// Call 'func' for each GPU modified range and unmark those pages as GPU modified + template + void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + IteratePages(query_cpu_range, query_size, + [&func](Manager* manager, u64 offset, size_t size) { + if constexpr (clear) { + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); + } else { + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); + } + }); + } + +private: + /** + * @brief IteratePages Iterates L2 word manager page table. + * @param cpu_address Start byte cpu address + * @param size Size in bytes of the region of iterate. + * @param func Callback for each word manager. + * @return + */ + template + bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + std::size_t remaining_size{size}; + std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; + u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; + auto* manager{top_tier[page_index]}; + if (manager) { + if constexpr (BOOL_BREAK) { + if (func(manager, page_offset, copy_amount)) { + return true; + } + } else { + func(manager, page_offset, copy_amount); + } + } else if constexpr (create_region_on_fail) { + CreateRegion(page_index); + manager = top_tier[page_index]; + if constexpr (BOOL_BREAK) { + if (func(manager, page_offset, copy_amount)) { + return true; + } + } else { + func(manager, page_offset, copy_amount); + } + } + page_index++; + page_offset = 0; + remaining_size -= copy_amount; + } + return false; + } + + void CreateRegion(std::size_t page_index) { + const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; + if (free_managers.empty()) { + manager_pool.emplace_back(); + auto& last_pool = manager_pool.back(); + for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { + std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE); + free_managers.push_back(&last_pool[i]); + } + } + // Each manager tracks a 4_MB virtual address space. + auto* new_manager = free_managers.back(); + new_manager->SetCpuAddress(base_cpu_addr); + free_managers.pop_back(); + top_tier[page_index] = new_manager; + } + + PageManager* tracker; + std::deque> manager_pool; + std::vector free_managers; + std::array top_tier{}; +}; + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index 549d2a9e..ae85d1eb 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -1,398 +1,398 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include "common/div_ceil.h" -#include "common/types.h" -#include "video_core/page_manager.h" - -namespace VideoCore { - -constexpr u64 PAGES_PER_WORD = 64; -constexpr u64 BYTES_PER_PAGE = 4_KB; -constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; - -enum class Type { - CPU, - GPU, - Untracked, -}; - -/// Vector tracking modified pages tightly packed with small vector optimization -template -struct WordsArray { - /// Returns the pointer to the words state - [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { - return is_short ? stack.data() : heap; - } - - /// Returns the pointer to the words state - [[nodiscard]] u64* Pointer(bool is_short) noexcept { - return is_short ? stack.data() : heap; - } - - std::array stack{}; ///< Small buffers storage - u64* heap; ///< Not-small buffers pointer to the storage -}; - -template -struct Words { - explicit Words() = default; - explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { - num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); - if (IsShort()) { - cpu.stack.fill(~u64{0}); - gpu.stack.fill(0); - untracked.stack.fill(~u64{0}); - } else { - // Share allocation between CPU and GPU pages and set their default values - u64* const alloc = new u64[num_words * 3]; - cpu.heap = alloc; - gpu.heap = alloc + num_words; - untracked.heap = alloc + num_words * 2; - std::fill_n(cpu.heap, num_words, ~u64{0}); - std::fill_n(gpu.heap, num_words, 0); - std::fill_n(untracked.heap, num_words, ~u64{0}); - } - // Clean up tailing bits - const u64 last_word_size = size_bytes % BYTES_PER_WORD; - const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); - const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; - const u64 last_word = (~u64{0} << shift) >> shift; - cpu.Pointer(IsShort())[NumWords() - 1] = last_word; - untracked.Pointer(IsShort())[NumWords() - 1] = last_word; - } - - ~Words() { - Release(); - } - - Words& operator=(Words&& rhs) noexcept { - Release(); - size_bytes = rhs.size_bytes; - num_words = rhs.num_words; - cpu = rhs.cpu; - gpu = rhs.gpu; - untracked = rhs.untracked; - rhs.cpu.heap = nullptr; - return *this; - } - - Words(Words&& rhs) noexcept - : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, - untracked{rhs.untracked} { - rhs.cpu.heap = nullptr; - } - - Words& operator=(const Words&) = delete; - Words(const Words&) = delete; - - /// Returns true when the buffer fits in the small vector optimization - [[nodiscard]] bool IsShort() const noexcept { - return num_words <= stack_words; - } - - /// Returns the number of words of the buffer - [[nodiscard]] size_t NumWords() const noexcept { - return num_words; - } - - /// Release buffer resources - void Release() { - if (!IsShort()) { - // CPU written words is the base for the heap allocation - delete[] cpu.heap; - } - } - - template - std::span Span() noexcept { - if constexpr (type == Type::CPU) { - return std::span(cpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::GPU) { - return std::span(gpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::Untracked) { - return std::span(untracked.Pointer(IsShort()), num_words); - } - } - - template - std::span Span() const noexcept { - if constexpr (type == Type::CPU) { - return std::span(cpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::GPU) { - return std::span(gpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::Untracked) { - return std::span(untracked.Pointer(IsShort()), num_words); - } - } - - u64 size_bytes = 0; - size_t num_words = 0; - WordsArray cpu; - WordsArray gpu; - WordsArray untracked; -}; - -template -class WordManager { -public: - explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes) - : tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {} - - explicit WordManager() = default; - - void SetCpuAddress(VAddr new_cpu_addr) { - cpu_addr = new_cpu_addr; - } - - VAddr GetCpuAddr() const { - return cpu_addr; - } - - static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { - constexpr size_t number_bits = sizeof(u64) * 8; - const size_t limit_page_end = number_bits - std::min(page_end, number_bits); - u64 bits = (word >> page_start) << page_start; - bits = (bits << limit_page_end) >> limit_page_end; - return bits; - } - - static std::pair GetWordPage(VAddr address) { - const size_t converted_address = static_cast(address); - const size_t word_number = converted_address / BYTES_PER_WORD; - const size_t amount_pages = converted_address % BYTES_PER_WORD; - return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE); - } - - template - void IterateWords(size_t offset, size_t size, Func&& func) const { - using FuncReturn = std::invoke_result_t; - static constexpr bool BOOL_BREAK = std::is_same_v; - const size_t start = static_cast(std::max(static_cast(offset), 0LL)); - const size_t end = static_cast(std::max(static_cast(offset + size), 0LL)); - if (start >= SizeBytes() || end <= start) { - return; - } - auto [start_word, start_page] = GetWordPage(start); - auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); - const size_t num_words = NumWords(); - start_word = std::min(start_word, num_words); - end_word = std::min(end_word, num_words); - const size_t diff = end_word - start_word; - end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD; - end_word = std::min(end_word, num_words); - end_page += diff * PAGES_PER_WORD; - constexpr u64 base_mask{~0ULL}; - for (size_t word_index = start_word; word_index < end_word; word_index++) { - const u64 mask = ExtractBits(base_mask, start_page, end_page); - start_page = 0; - end_page -= PAGES_PER_WORD; - if constexpr (BOOL_BREAK) { - if (func(word_index, mask)) { - return; - } - } else { - func(word_index, mask); - } - } - } - - template - void IteratePages(u64 mask, Func&& func) const { - size_t offset = 0; - while (mask != 0) { - const size_t empty_bits = std::countr_zero(mask); - offset += empty_bits; - mask = mask >> empty_bits; - - const size_t continuous_bits = std::countr_one(mask); - func(offset, continuous_bits); - mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0; - offset += continuous_bits; - } - } - - /** - * Change the state of a range of pages - * - * @param dirty_addr Base address to mark or unmark as modified - * @param size Size in bytes to mark or unmark as modified - */ - template - void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { - std::span state_words = words.template Span(); - [[maybe_unused]] std::span untracked_words = words.template Span(); - IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { - if constexpr (type == Type::CPU) { - NotifyPageTracker(index, untracked_words[index], mask); - } - if constexpr (enable) { - state_words[index] |= mask; - if constexpr (type == Type::CPU) { - untracked_words[index] |= mask; - } - } else { - state_words[index] &= ~mask; - if constexpr (type == Type::CPU) { - untracked_words[index] &= ~mask; - } - } - }); - } - - /** - * Loop over each page in the given range, turn off those bits and notify the tracker if - * needed. Call the given function on each turned off range. - * - * @param query_cpu_range Base CPU address to loop over - * @param size Size in bytes of the CPU range to loop over - * @param func Function to call for each turned off region - */ - template - void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { - static_assert(type != Type::Untracked); - - std::span state_words = words.template Span(); - [[maybe_unused]] std::span untracked_words = words.template Span(); - const size_t offset = query_cpu_range - cpu_addr; - bool pending = false; - size_t pending_offset{}; - size_t pending_pointer{}; - const auto release = [&]() { - func(cpu_addr + pending_offset * BYTES_PER_PAGE, - (pending_pointer - pending_offset) * BYTES_PER_PAGE); - }; - IterateWords(offset, size, [&](size_t index, u64 mask) { - if constexpr (type == Type::GPU) { - mask &= ~untracked_words[index]; - } - const u64 word = state_words[index] & mask; - if constexpr (clear) { - if constexpr (type == Type::CPU) { - NotifyPageTracker(index, untracked_words[index], mask); - } - state_words[index] &= ~mask; - if constexpr (type == Type::CPU) { - untracked_words[index] &= ~mask; - } - } - const size_t base_offset = index * PAGES_PER_WORD; - IteratePages(word, [&](size_t pages_offset, size_t pages_size) { - const auto reset = [&]() { - pending_offset = base_offset + pages_offset; - pending_pointer = base_offset + pages_offset + pages_size; - }; - if (!pending) { - reset(); - pending = true; - return; - } - if (pending_pointer == base_offset + pages_offset) { - pending_pointer += pages_size; - return; - } - release(); - reset(); - }); - }); - if (pending) { - release(); - } - } - - /** - * Returns true when a region has been modified - * - * @param offset Offset in bytes from the start of the buffer - * @param size Size in bytes of the region to query for modifications - */ - template - [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { - static_assert(type != Type::Untracked); - - const std::span state_words = words.template Span(); - [[maybe_unused]] const std::span untracked_words = - words.template Span(); - bool result = false; - IterateWords(offset, size, [&](size_t index, u64 mask) { - if constexpr (type == Type::GPU) { - mask &= ~untracked_words[index]; - } - const u64 word = state_words[index] & mask; - if (word != 0) { - result = true; - return true; - } - return false; - }); - return result; - } - - /// Returns the number of words of the manager - [[nodiscard]] size_t NumWords() const noexcept { - return words.NumWords(); - } - - /// Returns the size in bytes of the manager - [[nodiscard]] u64 SizeBytes() const noexcept { - return words.size_bytes; - } - - /// Returns true when the buffer fits in the small vector optimization - [[nodiscard]] bool IsShort() const noexcept { - return words.IsShort(); - } - -private: - template - u64* Array() noexcept { - if constexpr (type == Type::CPU) { - return words.cpu.Pointer(IsShort()); - } else if constexpr (type == Type::GPU) { - return words.gpu.Pointer(IsShort()); - } else if constexpr (type == Type::Untracked) { - return words.untracked.Pointer(IsShort()); - } - } - - template - const u64* Array() const noexcept { - if constexpr (type == Type::CPU) { - return words.cpu.Pointer(IsShort()); - } else if constexpr (type == Type::GPU) { - return words.gpu.Pointer(IsShort()); - } else if constexpr (type == Type::Untracked) { - return words.untracked.Pointer(IsShort()); - } - } - - /** - * Notify tracker about changes in the CPU tracking state of a word in the buffer - * - * @param word_index Index to the word to notify to the tracker - * @param current_bits Current state of the word - * @param new_bits New state of the word - * - * @tparam add_to_tracker True when the tracker should start tracking the new pages - */ - template - void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const { - u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; - VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; - IteratePages(changed_bits, [&](size_t offset, size_t size) { - tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE, - add_to_tracker ? 1 : -1); - }); - } - - PageManager* tracker; - VAddr cpu_addr = 0; - Words words; -}; - -} // namespace VideoCore +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/div_ceil.h" +#include "common/types.h" +#include "video_core/page_manager.h" + +namespace VideoCore { + +constexpr u64 PAGES_PER_WORD = 64; +constexpr u64 BYTES_PER_PAGE = 4_KB; +constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; + +enum class Type { + CPU, + GPU, + Untracked, +}; + +/// Vector tracking modified pages tightly packed with small vector optimization +template +struct WordsArray { + /// Returns the pointer to the words state + [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { + return is_short ? stack.data() : heap; + } + + /// Returns the pointer to the words state + [[nodiscard]] u64* Pointer(bool is_short) noexcept { + return is_short ? stack.data() : heap; + } + + std::array stack{}; ///< Small buffers storage + u64* heap; ///< Not-small buffers pointer to the storage +}; + +template +struct Words { + explicit Words() = default; + explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { + num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); + if (IsShort()) { + cpu.stack.fill(~u64{0}); + gpu.stack.fill(0); + untracked.stack.fill(~u64{0}); + } else { + // Share allocation between CPU and GPU pages and set their default values + u64* const alloc = new u64[num_words * 3]; + cpu.heap = alloc; + gpu.heap = alloc + num_words; + untracked.heap = alloc + num_words * 2; + std::fill_n(cpu.heap, num_words, ~u64{0}); + std::fill_n(gpu.heap, num_words, 0); + std::fill_n(untracked.heap, num_words, ~u64{0}); + } + // Clean up tailing bits + const u64 last_word_size = size_bytes % BYTES_PER_WORD; + const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); + const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; + const u64 last_word = (~u64{0} << shift) >> shift; + cpu.Pointer(IsShort())[NumWords() - 1] = last_word; + untracked.Pointer(IsShort())[NumWords() - 1] = last_word; + } + + ~Words() { + Release(); + } + + Words& operator=(Words&& rhs) noexcept { + Release(); + size_bytes = rhs.size_bytes; + num_words = rhs.num_words; + cpu = rhs.cpu; + gpu = rhs.gpu; + untracked = rhs.untracked; + rhs.cpu.heap = nullptr; + return *this; + } + + Words(Words&& rhs) noexcept + : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, + untracked{rhs.untracked} { + rhs.cpu.heap = nullptr; + } + + Words& operator=(const Words&) = delete; + Words(const Words&) = delete; + + /// Returns true when the buffer fits in the small vector optimization + [[nodiscard]] bool IsShort() const noexcept { + return num_words <= stack_words; + } + + /// Returns the number of words of the buffer + [[nodiscard]] size_t NumWords() const noexcept { + return num_words; + } + + /// Release buffer resources + void Release() { + if (!IsShort()) { + // CPU written words is the base for the heap allocation + delete[] cpu.heap; + } + } + + template + std::span Span() noexcept { + if constexpr (type == Type::CPU) { + return std::span(cpu.Pointer(IsShort()), num_words); + } else if constexpr (type == Type::GPU) { + return std::span(gpu.Pointer(IsShort()), num_words); + } else if constexpr (type == Type::Untracked) { + return std::span(untracked.Pointer(IsShort()), num_words); + } + } + + template + std::span Span() const noexcept { + if constexpr (type == Type::CPU) { + return std::span(cpu.Pointer(IsShort()), num_words); + } else if constexpr (type == Type::GPU) { + return std::span(gpu.Pointer(IsShort()), num_words); + } else if constexpr (type == Type::Untracked) { + return std::span(untracked.Pointer(IsShort()), num_words); + } + } + + u64 size_bytes = 0; + size_t num_words = 0; + WordsArray cpu; + WordsArray gpu; + WordsArray untracked; +}; + +template +class WordManager { +public: + explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes) + : tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {} + + explicit WordManager() = default; + + void SetCpuAddress(VAddr new_cpu_addr) { + cpu_addr = new_cpu_addr; + } + + VAddr GetCpuAddr() const { + return cpu_addr; + } + + static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { + constexpr size_t number_bits = sizeof(u64) * 8; + const size_t limit_page_end = number_bits - std::min(page_end, number_bits); + u64 bits = (word >> page_start) << page_start; + bits = (bits << limit_page_end) >> limit_page_end; + return bits; + } + + static std::pair GetWordPage(VAddr address) { + const size_t converted_address = static_cast(address); + const size_t word_number = converted_address / BYTES_PER_WORD; + const size_t amount_pages = converted_address % BYTES_PER_WORD; + return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE); + } + + template + void IterateWords(size_t offset, size_t size, Func&& func) const { + using FuncReturn = std::invoke_result_t; + static constexpr bool BOOL_BREAK = std::is_same_v; + const size_t start = static_cast(std::max(static_cast(offset), 0LL)); + const size_t end = static_cast(std::max(static_cast(offset + size), 0LL)); + if (start >= SizeBytes() || end <= start) { + return; + } + auto [start_word, start_page] = GetWordPage(start); + auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); + const size_t num_words = NumWords(); + start_word = std::min(start_word, num_words); + end_word = std::min(end_word, num_words); + const size_t diff = end_word - start_word; + end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD; + end_word = std::min(end_word, num_words); + end_page += diff * PAGES_PER_WORD; + constexpr u64 base_mask{~0ULL}; + for (size_t word_index = start_word; word_index < end_word; word_index++) { + const u64 mask = ExtractBits(base_mask, start_page, end_page); + start_page = 0; + end_page -= PAGES_PER_WORD; + if constexpr (BOOL_BREAK) { + if (func(word_index, mask)) { + return; + } + } else { + func(word_index, mask); + } + } + } + + template + void IteratePages(u64 mask, Func&& func) const { + size_t offset = 0; + while (mask != 0) { + const size_t empty_bits = std::countr_zero(mask); + offset += empty_bits; + mask = mask >> empty_bits; + + const size_t continuous_bits = std::countr_one(mask); + func(offset, continuous_bits); + mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0; + offset += continuous_bits; + } + } + + /** + * Change the state of a range of pages + * + * @param dirty_addr Base address to mark or unmark as modified + * @param size Size in bytes to mark or unmark as modified + */ + template + void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { + std::span state_words = words.template Span(); + [[maybe_unused]] std::span untracked_words = words.template Span(); + IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::CPU) { + NotifyPageTracker(index, untracked_words[index], mask); + } + if constexpr (enable) { + state_words[index] |= mask; + if constexpr (type == Type::CPU) { + untracked_words[index] |= mask; + } + } else { + state_words[index] &= ~mask; + if constexpr (type == Type::CPU) { + untracked_words[index] &= ~mask; + } + } + }); + } + + /** + * Loop over each page in the given range, turn off those bits and notify the tracker if + * needed. Call the given function on each turned off range. + * + * @param query_cpu_range Base CPU address to loop over + * @param size Size in bytes of the CPU range to loop over + * @param func Function to call for each turned off region + */ + template + void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + static_assert(type != Type::Untracked); + + std::span state_words = words.template Span(); + [[maybe_unused]] std::span untracked_words = words.template Span(); + const size_t offset = query_cpu_range - cpu_addr; + bool pending = false; + size_t pending_offset{}; + size_t pending_pointer{}; + const auto release = [&]() { + func(cpu_addr + pending_offset * BYTES_PER_PAGE, + (pending_pointer - pending_offset) * BYTES_PER_PAGE); + }; + IterateWords(offset, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::GPU) { + mask &= ~untracked_words[index]; + } + const u64 word = state_words[index] & mask; + if constexpr (clear) { + if constexpr (type == Type::CPU) { + NotifyPageTracker(index, untracked_words[index], mask); + } + state_words[index] &= ~mask; + if constexpr (type == Type::CPU) { + untracked_words[index] &= ~mask; + } + } + const size_t base_offset = index * PAGES_PER_WORD; + IteratePages(word, [&](size_t pages_offset, size_t pages_size) { + const auto reset = [&]() { + pending_offset = base_offset + pages_offset; + pending_pointer = base_offset + pages_offset + pages_size; + }; + if (!pending) { + reset(); + pending = true; + return; + } + if (pending_pointer == base_offset + pages_offset) { + pending_pointer += pages_size; + return; + } + release(); + reset(); + }); + }); + if (pending) { + release(); + } + } + + /** + * Returns true when a region has been modified + * + * @param offset Offset in bytes from the start of the buffer + * @param size Size in bytes of the region to query for modifications + */ + template + [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { + static_assert(type != Type::Untracked); + + const std::span state_words = words.template Span(); + [[maybe_unused]] const std::span untracked_words = + words.template Span(); + bool result = false; + IterateWords(offset, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::GPU) { + mask &= ~untracked_words[index]; + } + const u64 word = state_words[index] & mask; + if (word != 0) { + result = true; + return true; + } + return false; + }); + return result; + } + + /// Returns the number of words of the manager + [[nodiscard]] size_t NumWords() const noexcept { + return words.NumWords(); + } + + /// Returns the size in bytes of the manager + [[nodiscard]] u64 SizeBytes() const noexcept { + return words.size_bytes; + } + + /// Returns true when the buffer fits in the small vector optimization + [[nodiscard]] bool IsShort() const noexcept { + return words.IsShort(); + } + +private: + template + u64* Array() noexcept { + if constexpr (type == Type::CPU) { + return words.cpu.Pointer(IsShort()); + } else if constexpr (type == Type::GPU) { + return words.gpu.Pointer(IsShort()); + } else if constexpr (type == Type::Untracked) { + return words.untracked.Pointer(IsShort()); + } + } + + template + const u64* Array() const noexcept { + if constexpr (type == Type::CPU) { + return words.cpu.Pointer(IsShort()); + } else if constexpr (type == Type::GPU) { + return words.gpu.Pointer(IsShort()); + } else if constexpr (type == Type::Untracked) { + return words.untracked.Pointer(IsShort()); + } + } + + /** + * Notify tracker about changes in the CPU tracking state of a word in the buffer + * + * @param word_index Index to the word to notify to the tracker + * @param current_bits Current state of the word + * @param new_bits New state of the word + * + * @tparam add_to_tracker True when the tracker should start tracking the new pages + */ + template + void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const { + u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; + VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; + IteratePages(changed_bits, [&](size_t offset, size_t size) { + tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE, + add_to_tracker ? 1 : -1); + }); + } + + PageManager* tracker; + VAddr cpu_addr = 0; + Words words; +}; + +} // namespace VideoCore