Merge pull request #217 from shadps4-emu/stabilization7

kernel/shader_recompiler: Fixes and cleanups to improve stability
This commit is contained in:
georgemoralis 2024-06-26 20:39:05 +03:00 committed by GitHub
commit 218c57ae2d
12 changed files with 228 additions and 178 deletions

View file

@ -181,7 +181,7 @@ public:
}
template <typename T>
size_t WriteRaw(void* data, size_t size) const {
size_t WriteRaw(const void* data, size_t size) const {
return std::fwrite(data, sizeof(T), size, file);
}

View file

@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/singleton.h"
#include "core/file_sys/fs.h"
#include "core/libraries/error_codes.h"
@ -12,26 +13,16 @@
namespace Libraries::Kernel {
std::vector<Core::FileSys::DirEntry> GetDirectoryEntries(const std::string& path) {
std::string curpath = path;
if (!curpath.ends_with("/")) {
curpath = std::string(curpath + "/");
}
std::vector<Core::FileSys::DirEntry> files;
for (const auto& entry : std::filesystem::directory_iterator(curpath)) {
Core::FileSys::DirEntry e = {};
if (std::filesystem::is_directory(entry.path().string())) {
e.name = entry.path().filename().string();
e.isFile = false;
} else {
e.name = entry.path().filename().string();
e.isFile = true;
}
files.push_back(e);
for (const auto& entry : std::filesystem::directory_iterator(path)) {
auto& dir_entry = files.emplace_back();
dir_entry.name = entry.path().filename().string();
dir_entry.isFile = !std::filesystem::is_directory(entry.path().string());
}
return files;
}
int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode);
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
@ -135,10 +126,7 @@ int PS4_SYSV_ABI posix_close(int d) {
return ORBIS_OK;
}
size_t PS4_SYSV_ABI sceKernelWrite(int d, void* buf, size_t nbytes) {
if (buf == nullptr) {
return SCE_KERNEL_ERROR_EFAULT;
}
size_t PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) {
if (d <= 2) { // stdin,stdout,stderr
char* str = strdup((const char*)buf);
if (str[nbytes - 1] == '\n')
@ -152,20 +140,19 @@ size_t PS4_SYSV_ABI sceKernelWrite(int d, void* buf, size_t nbytes) {
if (file == nullptr) {
return SCE_KERNEL_ERROR_EBADF;
}
file->m_mutex.lock();
u32 bytes_write = file->f.WriteRaw<u8>(buf, static_cast<u32>(nbytes));
file->m_mutex.unlock();
return bytes_write;
std::scoped_lock lk{file->m_mutex};
return file->f.WriteRaw<u8>(buf, nbytes);
}
size_t PS4_SYSV_ABI _readv(int d, const SceKernelIovec* iov, int iovcnt) {
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(d);
size_t total_read = 0;
file->m_mutex.lock();
std::scoped_lock lk{file->m_mutex};
for (int i = 0; i < iovcnt; i++) {
total_read += file->f.ReadRaw<u8>(iov[i].iov_base, iov[i].iov_len);
}
file->m_mutex.unlock();
return total_read;
}
@ -173,24 +160,18 @@ s64 PS4_SYSV_ABI sceKernelLseek(int d, s64 offset, int whence) {
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(d);
file->m_mutex.lock();
Common::FS::SeekOrigin origin;
Common::FS::SeekOrigin origin{};
if (whence == 0) {
origin = Common::FS::SeekOrigin::SetOrigin;
}
if (whence == 1) {
} else if (whence == 1) {
origin = Common::FS::SeekOrigin::CurrentPosition;
}
if (whence == 2) {
} else if (whence == 2) {
origin = Common::FS::SeekOrigin::End;
}
std::scoped_lock lk{file->m_mutex};
file->f.Seek(offset, origin);
auto pos = static_cast<int64_t>(file->f.Tell());
file->m_mutex.unlock();
return pos;
return file->f.Tell();
}
s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) {
@ -198,19 +179,14 @@ s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) {
}
s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes) {
if (buf == nullptr) {
return SCE_KERNEL_ERROR_EFAULT;
}
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(d);
if (file == nullptr) {
return SCE_KERNEL_ERROR_EBADF;
}
file->m_mutex.lock();
u32 bytes_read = file->f.ReadRaw<u8>(buf, static_cast<u32>(nbytes));
file->m_mutex.unlock();
return bytes_read;
std::scoped_lock lk{file->m_mutex};
return file->f.ReadRaw<u8>(buf, nbytes);
}
int PS4_SYSV_ABI posix_read(int d, void* buf, size_t nbytes) {
@ -245,10 +221,10 @@ int PS4_SYSV_ABI posix_mkdir(const char* path, u16 mode) {
int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) {
LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path);
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
std::string path_name = mnt->GetHostFile(path);
memset(sb, 0, sizeof(OrbisKernelStat));
bool is_dir = std::filesystem::is_directory(path_name);
bool is_file = std::filesystem::is_regular_file(path_name);
const auto& path_name = mnt->GetHostFile(path);
std::memset(sb, 0, sizeof(OrbisKernelStat));
const bool is_dir = std::filesystem::is_directory(path_name);
const bool is_file = std::filesystem::is_regular_file(path_name);
if (!is_dir && !is_file) {
return ORBIS_KERNEL_ERROR_ENOENT;
}
@ -290,35 +266,30 @@ s64 PS4_SYSV_ABI sceKernelPread(int d, void* buf, size_t nbytes, s64 offset) {
if (d < 3) {
return ORBIS_KERNEL_ERROR_EPERM;
}
if (buf == nullptr) {
return ORBIS_KERNEL_ERROR_EFAULT;
}
if (offset < 0) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(d);
if (file == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
}
file->m_mutex.lock();
if (file->f.Tell() != offset) {
file->f.Seek(offset);
}
u32 bytes_read = file->f.ReadRaw<u8>(buf, static_cast<u32>(nbytes));
file->m_mutex.unlock();
return bytes_read;
std::scoped_lock lk{file->m_mutex};
const s64 pos = file->f.Tell();
SCOPE_EXIT {
file->f.Seek(pos);
};
file->f.Seek(offset);
return file->f.ReadRaw<u8>(buf, nbytes);
}
int PS4_SYSV_ABI sceKernelFStat(int fd, OrbisKernelStat* sb) {
LOG_INFO(Kernel_Fs, "(PARTIAL) fd = {}", fd);
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(fd);
memset(sb, 0, sizeof(OrbisKernelStat));
std::memset(sb, 0, sizeof(OrbisKernelStat));
if (file->is_directory) {
sb->st_mode = 0000777u | 0040000u;
@ -347,13 +318,14 @@ s32 PS4_SYSV_ABI sceKernelFsync(int fd) {
return ORBIS_OK;
}
int GetDents(int fd, char* buf, int nbytes, s64* basep) {
static int GetDents(int fd, char* buf, int nbytes, s64* basep) {
// TODO error codes
ASSERT(buf != nullptr);
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(fd);
if (file->dirents_index == file->dirents.size()) {
return 0;
return ORBIS_OK;
}
const auto& entry = file->dirents.at(file->dirents_index++);
@ -388,31 +360,23 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {
if (d < 3) {
return ORBIS_KERNEL_ERROR_EPERM;
}
if (buf == nullptr) {
return ORBIS_KERNEL_ERROR_EFAULT;
}
if (offset < 0) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* file = h->GetFile(d);
if (file == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
}
file->m_mutex.lock();
auto pos = file->f.Tell();
std::scoped_lock lk{file->m_mutex};
const s64 pos = file->f.Tell();
SCOPE_EXIT {
file->f.Seek(pos);
};
file->f.Seek(offset);
u32 bytes_write = file->f.WriteRaw<u8>(buf, static_cast<u32>(nbytes));
file->f.Seek(pos);
file->m_mutex.unlock();
return bytes_write;
return file->f.WriteRaw<u8>(buf, nbytes);
}
void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) {

View file

@ -3,12 +3,13 @@
#include <condition_variable>
#include <mutex>
#include <utility>
#include <boost/intrusive/list.hpp>
#include <pthread.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/kernel/thread_management.h"
#include "core/libraries/libs.h"
namespace Libraries::Kernel {
@ -18,26 +19,30 @@ using ListBaseHook =
class Semaphore {
public:
Semaphore(s32 init_count, s32 max_count, const char* name, bool is_fifo)
: name{name}, token_count{init_count}, max_count{max_count}, is_fifo{is_fifo} {}
Semaphore(s32 init_count, s32 max_count, std::string_view name, bool is_fifo)
: name{name}, token_count{init_count}, max_count{max_count}, init_count{init_count},
is_fifo{is_fifo} {}
~Semaphore() {
ASSERT(wait_list.empty());
}
bool Wait(bool can_block, s32 need_count, u64* timeout) {
if (HasAvailableTokens(need_count)) {
return true;
int Wait(bool can_block, s32 need_count, u32* timeout) {
std::unique_lock lk{mutex};
if (token_count >= need_count) {
token_count -= need_count;
return ORBIS_OK;
}
if (!can_block) {
return false;
return ORBIS_KERNEL_ERROR_EBUSY;
}
// Create waiting thread object and add it into the list of waiters.
WaitingThread waiter{need_count, is_fifo};
AddWaiter(waiter);
SCOPE_EXIT {
PopWaiter(waiter);
};
// Perform the wait.
return waiter.Wait(timeout);
std::exchange(lk, std::unique_lock{waiter.mutex});
return waiter.Wait(lk, timeout);
}
bool Signal(s32 signal_count) {
@ -48,25 +53,47 @@ public:
token_count += signal_count;
// Wake up threads in order of priority.
for (auto& waiter : wait_list) {
for (auto it = wait_list.begin(); it != wait_list.end();) {
auto& waiter = *it;
if (waiter.need_count > token_count) {
it++;
continue;
}
std::scoped_lock lk2{waiter.mutex};
token_count -= waiter.need_count;
waiter.cv.notify_one();
it = wait_list.erase(it);
}
return true;
}
private:
int Cancel(s32 set_count, s32* num_waiters) {
std::scoped_lock lk{mutex};
if (num_waiters) {
*num_waiters = wait_list.size();
}
for (auto& waiter : wait_list) {
waiter.was_cancled = true;
waiter.cv.notify_one();
}
wait_list.clear();
token_count = set_count < 0 ? init_count : set_count;
return ORBIS_OK;
}
public:
struct WaitingThread : public ListBaseHook {
std::mutex mutex;
std::string name;
std::condition_variable cv;
u32 priority;
s32 need_count;
bool was_deleted{};
bool was_cancled{};
explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} {
name = scePthreadSelf()->name;
if (is_fifo) {
return;
}
@ -77,12 +104,24 @@ private:
priority = param.sched_priority;
}
bool Wait(u64* timeout) {
std::unique_lock lk{mutex};
int GetResult(bool timed_out) {
if (timed_out) {
return SCE_KERNEL_ERROR_ETIMEDOUT;
}
if (was_deleted) {
return SCE_KERNEL_ERROR_EACCES;
}
if (was_cancled) {
return SCE_KERNEL_ERROR_ECANCELED;
}
return SCE_OK;
}
int Wait(std::unique_lock<std::mutex>& lk, u32* timeout) {
if (!timeout) {
// Wait indefinitely until we are woken up.
cv.wait(lk);
return true;
return GetResult(false);
}
// Wait until timeout runs out, recording how much remaining time there was.
const auto start = std::chrono::high_resolution_clock::now();
@ -91,16 +130,11 @@ private:
const auto time =
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
*timeout -= time;
return status != std::cv_status::timeout;
}
bool operator<(const WaitingThread& other) const {
return priority < other.priority;
return GetResult(status == std::cv_status::timeout);
}
};
void AddWaiter(WaitingThread& waiter) {
std::scoped_lock lk{mutex};
// Insert at the end of the list for FIFO order.
if (is_fifo) {
wait_list.push_back(waiter);
@ -114,20 +148,6 @@ private:
wait_list.insert(it, waiter);
}
void PopWaiter(WaitingThread& waiter) {
std::scoped_lock lk{mutex};
wait_list.erase(WaitingThreads::s_iterator_to(waiter));
}
bool HasAvailableTokens(s32 need_count) {
std::scoped_lock lk{mutex};
if (token_count >= need_count) {
token_count -= need_count;
return true;
}
return false;
}
using WaitingThreads =
boost::intrusive::list<WaitingThread, boost::intrusive::base_hook<ListBaseHook>,
boost::intrusive::constant_time_size<false>>;
@ -136,6 +156,7 @@ private:
std::atomic<s32> token_count;
std::mutex mutex;
s32 max_count;
s32 init_count;
bool is_fifo;
};
@ -151,9 +172,8 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u64* pTimeout) {
ASSERT(sem->Wait(true, needCount, pTimeout));
return ORBIS_OK;
s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) {
return sem->Wait(true, needCount, pTimeout);
}
s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
@ -164,9 +184,18 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
}
s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) {
if (!sem->Wait(false, needCount, nullptr)) {
return ORBIS_KERNEL_ERROR_EBUSY;
return sem->Wait(false, needCount, nullptr);
}
int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) {
return sem->Cancel(setCount, pNumWaitThreads);
}
int PS4_SYSV_ABI sceKernelDeleteSema(OrbisKernelSema sem) {
if (!sem) {
return SCE_KERNEL_ERROR_ESRCH;
}
delete sem;
return ORBIS_OK;
}
@ -175,6 +204,8 @@ void SemaphoreSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("Zxa0VhQVTsk", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitSema);
LIB_FUNCTION("4czppHBiriw", "libkernel", 1, "libkernel", 1, 1, sceKernelSignalSema);
LIB_FUNCTION("12wOHk8ywb0", "libkernel", 1, "libkernel", 1, 1, sceKernelPollSema);
LIB_FUNCTION("4DM06U2BNEY", "libkernel", 1, "libkernel", 1, 1, sceKernelCancelSema);
LIB_FUNCTION("R1Jvn8bSCW8", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteSema);
}
} // namespace Libraries::Kernel

View file

@ -68,8 +68,10 @@ void Linker::Execute() {
}
// Configure used flexible memory size.
if (u64* flexible_size = GetProcParam()->mem_param->flexible_memory_size) {
memory->SetTotalFlexibleSize(*flexible_size);
if (auto* mem_param = GetProcParam()->mem_param) {
if (u64* flexible_size = mem_param->flexible_memory_size) {
memory->SetTotalFlexibleSize(*flexible_size);
}
}
// Init primary thread.

View file

@ -206,9 +206,15 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr
const auto& vma = it->second;
ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped");
*start = reinterpret_cast<void*>(vma.base);
*end = reinterpret_cast<void*>(vma.base + vma.size);
*prot = static_cast<u32>(vma.prot);
if (start != nullptr) {
*start = reinterpret_cast<void*>(vma.base);
}
if (end != nullptr) {
*end = reinterpret_cast<void*>(vma.base + vma.size);
}
if (prot != nullptr) {
*prot = static_cast<u32>(vma.prot);
}
return ORBIS_OK;
}

View file

@ -11,7 +11,6 @@
#include "core/memory.h"
#include "core/module.h"
#include "core/tls.h"
#include "core/virtual_memory.h"
namespace Core {

View file

@ -46,40 +46,6 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
ir.SetScc(result);
}
void Translator::S_ANDN2_B64(const GcnInst& inst) {
// TODO: What if this is used for something other than EXEC masking?
const auto get_src = [&](const InstOperand& operand) {
switch (operand.field) {
case OperandField::VccLo:
return ir.GetVcc();
case OperandField::ExecLo:
return ir.GetExec();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
default:
UNREACHABLE();
}
};
const IR::U1 src0{get_src(inst.src[0])};
const IR::U1 src1{get_src(inst.src[1])};
const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))};
ir.SetScc(result);
switch (inst.dst[0].field) {
case OperandField::VccLo:
ir.SetVcc(result);
break;
case OperandField::ExecLo:
ir.SetExec(result);
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
break;
default:
UNREACHABLE();
}
}
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
@ -138,7 +104,7 @@ void Translator::S_MOV_B64(const GcnInst& inst) {
}
}
void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
void Translator::S_OR_B64(NegateMode negate, const GcnInst& inst) {
const auto get_src = [&](const InstOperand& operand) {
switch (operand.field) {
case OperandField::VccLo:
@ -151,9 +117,12 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
};
const IR::U1 src0{get_src(inst.src[0])};
const IR::U1 src1{get_src(inst.src[1])};
IR::U1 src1{get_src(inst.src[1])};
if (negate == NegateMode::Src1) {
src1 = ir.LogicalNot(src1);
}
IR::U1 result = ir.LogicalOr(src0, src1);
if (negate) {
if (negate == NegateMode::Result) {
result = ir.LogicalNot(result);
}
ir.SetScc(result);
@ -169,7 +138,7 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
}
}
void Translator::S_AND_B64(bool negate, const GcnInst& inst) {
void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) {
const auto get_src = [&](const InstOperand& operand) {
switch (operand.field) {
case OperandField::VccLo:
@ -183,9 +152,12 @@ void Translator::S_AND_B64(bool negate, const GcnInst& inst) {
}
};
const IR::U1 src0{get_src(inst.src[0])};
const IR::U1 src1{get_src(inst.src[1])};
IR::U1 src1{get_src(inst.src[1])};
if (negate == NegateMode::Src1) {
src1 = ir.LogicalNot(src1);
}
IR::U1 result = ir.LogicalAnd(src0, src1);
if (negate) {
if (negate == NegateMode::Result) {
result = ir.LogicalNot(result);
}
ir.SetScc(result);
@ -196,6 +168,9 @@ void Translator::S_AND_B64(bool negate, const GcnInst& inst) {
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
break;
case OperandField::ExecLo:
ir.SetExec(result);
break;
default:
UNREACHABLE();
}
@ -325,4 +300,20 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
}
void Translator::S_ADD_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IAdd(src0, src1));
// TODO: Carry out
ir.SetScc(ir.Imm1(false));
}
void Translator::S_SUB_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.ISub(src0, src1));
// TODO: Carry out
ir.SetScc(ir.Imm1(false));
}
} // namespace Shader::Gcn

View file

@ -105,7 +105,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
}
break;
case OperandField::ConstFloatPos_1_0:
value = ir.Imm32(1.f);
if (force_flt) {
value = ir.Imm32(1.f);
} else {
value = ir.Imm32(std::bit_cast<u32>(1.f));
}
break;
case OperandField::ConstFloatPos_0_5:
value = ir.Imm32(0.5f);
@ -274,6 +278,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_LOAD_DWORDX8:
translator.S_LOAD_DWORD(8, inst);
break;
case Opcode::S_LOAD_DWORDX16:
translator.S_LOAD_DWORD(16, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORD:
translator.S_BUFFER_LOAD_DWORD(1, inst);
break;
@ -324,6 +331,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_SAMPLE_C_LZ:
case Opcode::IMAGE_SAMPLE_LZ:
case Opcode::IMAGE_SAMPLE:
case Opcode::IMAGE_SAMPLE_L:
translator.IMAGE_SAMPLE(inst);
break;
case Opcode::IMAGE_STORE:
@ -437,9 +445,18 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::BUFFER_LOAD_FORMAT_X:
translator.BUFFER_LOAD_FORMAT(1, false, inst);
break;
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
translator.BUFFER_LOAD_FORMAT(3, false, inst);
break;
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
translator.BUFFER_LOAD_FORMAT(4, false, inst);
break;
case Opcode::BUFFER_STORE_FORMAT_X:
translator.BUFFER_STORE_FORMAT(1, false, inst);
break;
case Opcode::BUFFER_STORE_FORMAT_XYZW:
translator.BUFFER_STORE_FORMAT(4, false, inst);
break;
case Opcode::V_MAX_F32:
translator.V_MAX_F32(inst);
break;
@ -453,7 +470,10 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
translator.V_RSQ_F32(inst);
break;
case Opcode::S_ANDN2_B64:
translator.S_ANDN2_B64(inst);
translator.S_AND_B64(NegateMode::Src1, inst);
break;
case Opcode::S_ORN2_B64:
translator.S_OR_B64(NegateMode::Src1, inst);
break;
case Opcode::V_SIN_F32:
translator.V_SIN_F32(inst);
@ -592,19 +612,19 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
translator.V_CMP_U32(ConditionOp::TRU, false, true, inst);
break;
case Opcode::S_OR_B64:
translator.S_OR_B64(false, inst);
translator.S_OR_B64(NegateMode::None, inst);
break;
case Opcode::S_NOR_B64:
translator.S_OR_B64(true, inst);
translator.S_OR_B64(NegateMode::Result, inst);
break;
case Opcode::S_AND_B64:
translator.S_AND_B64(false, inst);
translator.S_AND_B64(NegateMode::None, inst);
break;
case Opcode::S_NOT_B64:
translator.S_NOT_B64(inst);
break;
case Opcode::S_NAND_B64:
translator.S_AND_B64(true, inst);
translator.S_AND_B64(NegateMode::Result, inst);
break;
case Opcode::V_LSHRREV_B32:
translator.V_LSHRREV_B32(inst);
@ -696,6 +716,29 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_BREV_B32:
translator.S_BREV_B32(inst);
break;
case Opcode::S_ADD_U32:
translator.S_ADD_U32(inst);
break;
case Opcode::S_SUB_U32:
translator.S_SUB_U32(inst);
break;
// TODO: Separate implementation for legacy variants.
case Opcode::V_MUL_LEGACY_F32:
translator.V_MUL_F32(inst);
break;
case Opcode::V_MAC_LEGACY_F32:
translator.V_MAC_F32(inst);
break;
case Opcode::V_MAD_LEGACY_F32:
translator.V_MAD_F32(inst);
break;
case Opcode::V_RSQ_LEGACY_F32:
case Opcode::V_RSQ_CLAMP_F32:
translator.V_RSQ_F32(inst);
break;
case Opcode::V_RCP_IFLAG_F32:
translator.V_RCP_F32(inst);
break;
case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
break;

View file

@ -26,6 +26,12 @@ enum class ConditionOp : u32 {
TRU,
};
enum class NegateMode : u32 {
None,
Src1,
Result,
};
class Translator {
public:
explicit Translator(IR::Block* block_, Info& info);
@ -38,11 +44,10 @@ public:
void S_MOV(const GcnInst& inst);
void S_MUL_I32(const GcnInst& inst);
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
void S_ANDN2_B64(const GcnInst& inst);
void S_AND_SAVEEXEC_B64(const GcnInst& inst);
void S_MOV_B64(const GcnInst& inst);
void S_OR_B64(bool negate, const GcnInst& inst);
void S_AND_B64(bool negate, const GcnInst& inst);
void S_OR_B64(NegateMode negate, const GcnInst& inst);
void S_AND_B64(NegateMode negate, const GcnInst& inst);
void S_ADD_I32(const GcnInst& inst);
void S_AND_B32(const GcnInst& inst);
void S_OR_B32(const GcnInst& inst);
@ -54,6 +59,8 @@ public:
void S_BFM_B32(const GcnInst& inst);
void S_NOT_B64(const GcnInst& inst);
void S_BREV_B32(const GcnInst& inst);
void S_ADD_U32(const GcnInst& inst);
void S_SUB_U32(const GcnInst& inst);
// Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);

View file

@ -315,8 +315,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
inst.SetArg(arg_pos, arg);
}
if (inst_info.explicit_lod && inst.GetOpcode() == IR::Opcode::ImageFetch) {
inst.SetArg(3, arg);
if (inst_info.explicit_lod) {
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch ||
inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod);
const u32 pos = inst.GetOpcode() == IR::Opcode::ImageFetch ? 3 : 2;
inst.SetArg(pos, arg);
}
}

View file

@ -485,7 +485,7 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
}
void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
ASSERT_MSG(vqid > 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
auto& queue = mapped_queues[vqid];
const auto& task = ProcessCompute(acb);

View file

@ -354,6 +354,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc2UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16 &&
num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR16G16Snorm;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}