mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-28 02:26:07 +00:00
shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)
* shader_recompiler: Add LDEXP * shader_recompiler: Add most image integer atomic ops * shader_recompiler: Implement shared memory load/store * shader_recompiler: More image atomics * externals: Update sirit * clang format * cmake: Add missing files * shader_recompiler: Fix some atomic bugs * shader_recompiler: Vs outputs * shader_recompiler: Shared mem has side-effects, fix format component order * shader_recompiler: Inline constant buffer impl * video_core: Fix regressions * Work * Fixup a few things
This commit is contained in:
parent
af3bbc33e9
commit
6ceab6dfac
|
@ -349,6 +349,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/runtime_info.h
|
||||
src/shader_recompiler/backend/spirv/emit_spirv.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv.h
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
|
||||
|
@ -359,6 +361,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
|
||||
src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
|
||||
|
|
2
externals/sirit
vendored
2
externals/sirit
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 505cc66a2be70b268c1700fef4d5327a5fe46494
|
||||
Subproject commit 8db09231c448b913ae905d5237ce2eca46e3fe87
|
25
src/common/div_ceil.h
Executable file
25
src/common/div_ceil.h
Executable file
|
@ -0,0 +1,25 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Common {
|
||||
|
||||
/// Ceiled integer division.
|
||||
template <typename N, typename D>
|
||||
requires std::is_integral_v<N> && std::is_unsigned_v<D>
|
||||
[[nodiscard]] constexpr N DivCeil(N number, D divisor) {
|
||||
return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
|
||||
}
|
||||
|
||||
/// Ceiled integer division with logarithmic divisor in base 2
|
||||
template <typename N, typename D>
|
||||
requires std::is_integral_v<N> && std::is_unsigned_v<D>
|
||||
[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) {
|
||||
return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
|
||||
}
|
||||
|
||||
} // namespace Common
|
|
@ -14,6 +14,12 @@
|
|||
|
||||
namespace Common {
|
||||
|
||||
std::string ToLower(std::string str) {
|
||||
std::transform(str.begin(), str.end(), str.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
return str;
|
||||
}
|
||||
|
||||
std::vector<std::string> SplitString(const std::string& str, char delimiter) {
|
||||
std::istringstream iss(str);
|
||||
std::vector<std::string> output(1);
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
|
||||
namespace Common {
|
||||
|
||||
/// Make a string lowercase
|
||||
[[nodiscard]] std::string ToLower(std::string str);
|
||||
|
||||
std::vector<std::string> SplitString(const std::string& str, char delimiter);
|
||||
|
||||
#ifdef _WIN32
|
||||
|
|
|
@ -28,7 +28,7 @@ constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
|
|||
// User area size is normally larger than this. However games are unlikely to map to high
|
||||
// regions of that area, so by default we allocate a smaller virtual address space (about 1/4th).
|
||||
// to save space on page tables.
|
||||
static constexpr size_t UserSize = 1ULL << 38;
|
||||
static constexpr size_t UserSize = 1ULL << 39;
|
||||
static constexpr size_t SystemSize = USER_MIN - SYSTEM_MANAGED_MIN;
|
||||
|
||||
/**
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include "common/string_util.h"
|
||||
#include "core/file_sys/fs.h"
|
||||
|
||||
namespace Core::FileSys {
|
||||
|
@ -13,6 +14,7 @@ void MntPoints::Mount(const std::filesystem::path& host_folder, const std::strin
|
|||
|
||||
MntPair pair;
|
||||
pair.host_path = host_folder.string();
|
||||
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
|
||||
pair.guest_path = guest_folder;
|
||||
|
||||
m_mnt_pairs.push_back(pair);
|
||||
|
@ -46,11 +48,24 @@ std::string MntPoints::GetHostFile(const std::string& guest_file) {
|
|||
for (auto& pair : m_mnt_pairs) {
|
||||
// horrible code but it works :D
|
||||
int find = guest_file.find(pair.guest_path);
|
||||
if (find == 0) {
|
||||
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
|
||||
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
|
||||
return pair.host_path + npath;
|
||||
if (find != 0) {
|
||||
continue;
|
||||
}
|
||||
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
|
||||
const auto host_path = pair.host_path + npath;
|
||||
#ifndef _WIN64
|
||||
const std::filesystem::path path{host_path};
|
||||
if (!std::filesystem::exists(path)) {
|
||||
const auto filename = Common::ToLower(path.filename());
|
||||
for (const auto& file : std::filesystem::directory_iterator(path.parent_path())) {
|
||||
const auto exist_filename = Common::ToLower(file.path().filename());
|
||||
if (filename == exist_filename) {
|
||||
return file.path();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return host_path;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
|
|
@ -803,9 +803,9 @@ int PS4_SYSV_ABI sceGnmDrawOpaqueAuto() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
return false;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface() {
|
||||
|
@ -1962,7 +1962,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
|
|||
if (Config::dumpPM4()) {
|
||||
static auto last_frame_num = -1LL;
|
||||
static u32 seq_num{};
|
||||
if (last_frame_num == frames_submitted) {
|
||||
if (last_frame_num == frames_submitted && cbpair == 0) {
|
||||
++seq_num;
|
||||
} else {
|
||||
last_frame_num = frames_submitted;
|
||||
|
|
|
@ -63,7 +63,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size);
|
|||
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size);
|
||||
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size);
|
||||
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto();
|
||||
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
|
||||
bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
|
||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface();
|
||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuDebugger();
|
||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuException();
|
||||
|
|
|
@ -161,7 +161,6 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
|
||||
LOG_WARNING(Kernel_Vmm, "called");
|
||||
auto* memory = Core::Memory::Instance();
|
||||
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <semaphore.h>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/error.h"
|
||||
#include "common/logging/log.h"
|
||||
|
@ -16,6 +17,8 @@
|
|||
#include "core/linker.h"
|
||||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
namespace Libraries::Kernel {
|
||||
|
@ -46,7 +49,8 @@ void init_pthreads() {
|
|||
}
|
||||
|
||||
void pthreadInitSelfMainThread() {
|
||||
g_pthread_self = new PthreadInternal{};
|
||||
auto* pthread_pool = g_pthread_cxt->GetPthreadPool();
|
||||
g_pthread_self = pthread_pool->Create();
|
||||
scePthreadAttrInit(&g_pthread_self->attr);
|
||||
g_pthread_self->pth = pthread_self();
|
||||
g_pthread_self->name = "Main_Thread";
|
||||
|
@ -926,31 +930,25 @@ int PS4_SYSV_ABI scePthreadCreate(ScePthread* thread, const ScePthreadAttr* attr
|
|||
if ((*thread)->attr != nullptr) {
|
||||
scePthreadAttrDestroy(&(*thread)->attr);
|
||||
}
|
||||
|
||||
scePthreadAttrInit(&(*thread)->attr);
|
||||
|
||||
int result = pthread_copy_attributes(&(*thread)->attr, attr);
|
||||
ASSERT(result == 0);
|
||||
|
||||
if (result == 0) {
|
||||
if (name != NULL) {
|
||||
(*thread)->name = name;
|
||||
} else {
|
||||
(*thread)->name = "no-name";
|
||||
}
|
||||
(*thread)->entry = start_routine;
|
||||
(*thread)->arg = arg;
|
||||
(*thread)->is_almost_done = false;
|
||||
(*thread)->is_detached = (*attr)->detached;
|
||||
(*thread)->is_started = false;
|
||||
|
||||
result = pthread_create(&(*thread)->pth, &(*attr)->pth_attr, run_thread, *thread);
|
||||
if (name != NULL) {
|
||||
(*thread)->name = name;
|
||||
} else {
|
||||
(*thread)->name = "no-name";
|
||||
}
|
||||
(*thread)->entry = start_routine;
|
||||
(*thread)->arg = arg;
|
||||
(*thread)->is_almost_done = false;
|
||||
(*thread)->is_detached = (*attr)->detached;
|
||||
(*thread)->is_started = false;
|
||||
|
||||
pthread_attr_setstacksize(&(*attr)->pth_attr, 2_MB);
|
||||
result = pthread_create(&(*thread)->pth, &(*attr)->pth_attr, run_thread, *thread);
|
||||
|
||||
if (result == 0) {
|
||||
while (!(*thread)->is_started) {
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(1000));
|
||||
}
|
||||
}
|
||||
LOG_INFO(Kernel_Pthread, "thread create name = {}", (*thread)->name);
|
||||
|
||||
switch (result) {
|
||||
|
@ -979,7 +977,15 @@ ScePthread PThreadPool::Create() {
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef _WIN64
|
||||
auto* ret = new PthreadInternal{};
|
||||
#else
|
||||
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
|
||||
auto* ret = reinterpret_cast<PthreadInternal*>(
|
||||
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
|
||||
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
|
||||
#endif
|
||||
|
||||
ret->is_free = false;
|
||||
ret->is_detached = false;
|
||||
|
|
|
@ -129,7 +129,11 @@ public:
|
|||
const auto end = std::chrono::high_resolution_clock::now();
|
||||
const auto time =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
||||
*timeout -= time;
|
||||
if (status == std::cv_status::timeout) {
|
||||
*timeout = 0;
|
||||
} else {
|
||||
*timeout -= time;
|
||||
}
|
||||
return GetResult(status == std::cv_status::timeout);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -341,6 +341,7 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
|
|||
switch (mount_mode) {
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF: {
|
||||
if (!std::filesystem::exists(mount_dir)) {
|
||||
return ORBIS_SAVE_DATA_ERROR_NOT_FOUND;
|
||||
|
@ -349,11 +350,14 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
|
|||
mnt->Mount(mount_dir, g_mount_point);
|
||||
|
||||
mount_result->mount_status = 0;
|
||||
strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
|
||||
} break;
|
||||
std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
|
||||
break;
|
||||
}
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
|
||||
ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
|
||||
ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF |
|
||||
|
|
|
@ -85,7 +85,7 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) {
|
|||
#include "shader_recompiler/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
}
|
||||
throw LogicError("Invalid opcode {}", inst->GetOpcode());
|
||||
UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode());
|
||||
}
|
||||
|
||||
Id TypeId(const EmitContext& ctx, IR::Type type) {
|
||||
|
@ -176,7 +176,12 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
|
|||
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
||||
spv::ExecutionModel execution_model{};
|
||||
ctx.AddCapability(spv::Capability::Image1D);
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
ctx.AddCapability(spv::Capability::Float16);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
|
||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
|
||||
|
@ -272,47 +277,55 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
|
|||
void EmitReference(EmitContext&) {}
|
||||
|
||||
void EmitPhiMove(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetScc(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetExec(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVcc(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetSccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVccLo(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVccHi(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetScc(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetExec(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVcc(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetSccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVccLo(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVccHi(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
70
src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
Normal file
70
src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
Normal file
|
@ -0,0 +1,70 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
|
||||
const Id scope{ctx.ConstU32(static_cast<u32>(spv::Scope::Device))};
|
||||
const Id semantics{ctx.u32_zero_value};
|
||||
return {scope, semantics};
|
||||
}
|
||||
|
||||
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, texture.id, coords, ctx.ConstU32(0U))};
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicSMin);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMin);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicSMax);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
|
||||
// TODO: This is not yet implemented
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) {
|
||||
// TODO: This is not yet implemented
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicAnd);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicOr);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicXor);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicExchange);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
37
src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
Normal file
37
src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
Normal file
|
@ -0,0 +1,37 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
|
||||
const auto semantics{
|
||||
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
|
||||
spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
|
||||
spv::MemorySemanticsMask::ImageMemory};
|
||||
ctx.OpMemoryBarrier(ctx.ConstU32(static_cast<u32>(scope)),
|
||||
ctx.ConstU32(static_cast<u32>(semantics)));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void EmitBarrier(EmitContext& ctx) {
|
||||
const auto execution{spv::Scope::Workgroup};
|
||||
const auto memory{spv::Scope::Workgroup};
|
||||
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
|
||||
spv::MemorySemanticsMask::WorkgroupMemory};
|
||||
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
|
||||
ctx.ConstU32(static_cast<u32>(memory)),
|
||||
ctx.ConstU32(static_cast<u32>(memory_semantics)));
|
||||
}
|
||||
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
|
||||
MemoryBarrier(ctx, spv::Scope::Workgroup);
|
||||
}
|
||||
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx) {
|
||||
MemoryBarrier(ctx, spv::Scope::Device);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
|
@ -18,8 +18,8 @@ void EmitBitCastU64F64(EmitContext&) {
|
|||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitBitCastF16U16(EmitContext&) {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
Id EmitBitCastF16U16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpBitcast(ctx.F16[1], value);
|
||||
}
|
||||
|
||||
Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
|
||||
|
|
|
@ -7,6 +7,37 @@
|
|||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
|
||||
Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
||||
switch (output) {
|
||||
case VsOutput::ClipDist0:
|
||||
case VsOutput::ClipDist1:
|
||||
case VsOutput::ClipDist2:
|
||||
case VsOutput::ClipDist3:
|
||||
case VsOutput::ClipDist4:
|
||||
case VsOutput::ClipDist5:
|
||||
case VsOutput::ClipDist6:
|
||||
case VsOutput::ClipDist7: {
|
||||
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
|
||||
const Id clip_num{ctx.ConstU32(index)};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
|
||||
}
|
||||
case VsOutput::CullDist0:
|
||||
case VsOutput::CullDist1:
|
||||
case VsOutput::CullDist2:
|
||||
case VsOutput::CullDist3:
|
||||
case VsOutput::CullDist4:
|
||||
case VsOutput::CullDist5:
|
||||
case VsOutput::CullDist6:
|
||||
case VsOutput::CullDist7: {
|
||||
const u32 index = u32(output) - u32(VsOutput::CullDist0);
|
||||
const Id cull_num{ctx.ConstU32(index)};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
|
@ -20,10 +51,20 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
switch (attr) {
|
||||
case IR::Attribute::Position0: {
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
|
||||
case IR::Attribute::Position1:
|
||||
case IR::Attribute::Position2:
|
||||
case IR::Attribute::Position3: {
|
||||
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
|
||||
return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]);
|
||||
}
|
||||
case IR::Attribute::RenderTarget0:
|
||||
case IR::Attribute::RenderTarget1:
|
||||
case IR::Attribute::RenderTarget2:
|
||||
case IR::Attribute::RenderTarget3: {
|
||||
case IR::Attribute::RenderTarget3:
|
||||
case IR::Attribute::RenderTarget4:
|
||||
case IR::Attribute::RenderTarget5:
|
||||
case IR::Attribute::RenderTarget6:
|
||||
case IR::Attribute::RenderTarget7: {
|
||||
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
|
||||
if (ctx.frag_num_comp[index] > 1) {
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
|
||||
|
@ -45,39 +86,39 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
|||
}
|
||||
|
||||
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetScalarRegister(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetScalarRegister(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVectorRegister(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVectorRegister(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetGotoVariable(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetGotoVariable(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
|
@ -159,7 +200,15 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
|||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
UNREACHABLE();
|
||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
boost::container::static_vector<Id, 2> ids;
|
||||
for (u32 i = 0; i < 2; i++) {
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
|
||||
}
|
||||
return ctx.OpCompositeConstruct(buffer.data_types->Get(2), ids);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
|
|
|
@ -68,11 +68,7 @@ Id EmitConvertS32F16(EmitContext& ctx, Id value) {
|
|||
}
|
||||
|
||||
Id EmitConvertS32F32(EmitContext& ctx, Id value) {
|
||||
if (ctx.profile.has_broken_signed_operations) {
|
||||
return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
|
||||
} else {
|
||||
return ctx.OpConvertFToS(ctx.U32[1], value);
|
||||
}
|
||||
return ctx.OpConvertFToS(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertS32F64(EmitContext& ctx, Id value) {
|
||||
|
@ -259,4 +255,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value) {
|
|||
return ctx.OpConvertUToF(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertU16U32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpUConvert(ctx.U16, value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -6,6 +6,11 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
|
||||
ctx.Decorate(op, spv::Decoration::NoContraction);
|
||||
return op;
|
||||
}
|
||||
|
||||
Id EmitFPAbs16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFAbs(ctx.F16[1], value);
|
||||
}
|
||||
|
@ -19,31 +24,31 @@ Id EmitFPAbs64(EmitContext& ctx, Id value) {
|
|||
}
|
||||
|
||||
Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFAdd(ctx.F16[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFAdd(ctx.F32[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFAdd(ctx.F64[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFSub(ctx.F32[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFSub(ctx.F32[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
|
||||
return ctx.OpFma(ctx.F16[1], a, b, c);
|
||||
return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
|
||||
}
|
||||
|
||||
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
|
||||
return ctx.OpFma(ctx.F32[1], a, b, c);
|
||||
return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
|
||||
}
|
||||
|
||||
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
|
||||
return ctx.OpFma(ctx.F64[1], a, b, c);
|
||||
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
|
||||
}
|
||||
|
||||
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
|
||||
|
@ -63,15 +68,15 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
|
|||
}
|
||||
|
||||
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFMul(ctx.F16[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFMul(ctx.F32[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFMul(ctx.F64[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPNeg16(EmitContext& ctx, Id value) {
|
||||
|
@ -98,6 +103,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) {
|
|||
return ctx.OpExp2(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) {
|
||||
return ctx.OpLdexp(ctx.F32[1], value, exp);
|
||||
}
|
||||
|
||||
Id EmitFPLog2(EmitContext& ctx, Id value) {
|
||||
return ctx.OpLog2(ctx.F32[1], value);
|
||||
}
|
||||
|
@ -360,4 +369,12 @@ Id EmitFPIsNan64(EmitContext& ctx, Id value) {
|
|||
return ctx.OpIsNan(ctx.U1[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPIsInf32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpIsInf(ctx.U1[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPIsInf64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpIsInf(ctx.U1[1], value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -79,10 +79,12 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id of
|
|||
Id ms) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id result_type = texture.data_types->Get(4);
|
||||
if (Sirit::ValidId(lod)) {
|
||||
return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod);
|
||||
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords,
|
||||
spv::ImageOperandsMask::Lod, lod));
|
||||
} else {
|
||||
return ctx.OpImageFetch(ctx.F32[4], image, coords);
|
||||
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -134,7 +136,8 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
|
|||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), color);
|
||||
const Id color_type = texture.data_types->Get(4);
|
||||
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color));
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -33,16 +33,21 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
|||
void EmitGetScc(EmitContext& ctx);
|
||||
void EmitGetExec(EmitContext& ctx);
|
||||
void EmitGetVcc(EmitContext& ctx);
|
||||
void EmitGetSccLo(EmitContext& ctx);
|
||||
void EmitGetVccLo(EmitContext& ctx);
|
||||
void EmitGetVccHi(EmitContext& ctx);
|
||||
void EmitSetScc(EmitContext& ctx);
|
||||
void EmitSetExec(EmitContext& ctx);
|
||||
void EmitSetVcc(EmitContext& ctx);
|
||||
void EmitSetSccLo(EmitContext& ctx);
|
||||
void EmitSetVccLo(EmitContext& ctx);
|
||||
void EmitSetVccHi(EmitContext& ctx);
|
||||
void EmitPrologue(EmitContext& ctx);
|
||||
void EmitEpilogue(EmitContext& ctx);
|
||||
void EmitDiscard(EmitContext& ctx);
|
||||
void EmitBarrier(EmitContext& ctx);
|
||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
|
||||
void EmitGetThreadBitScalarReg(EmitContext& ctx);
|
||||
void EmitSetThreadBitScalarReg(EmitContext& ctx);
|
||||
|
@ -82,12 +87,13 @@ Id EmitUndefU8(EmitContext& ctx);
|
|||
Id EmitUndefU16(EmitContext& ctx);
|
||||
Id EmitUndefU32(EmitContext& ctx);
|
||||
Id EmitUndefU64(EmitContext& ctx);
|
||||
Id EmitReadSharedU8(EmitContext& ctx, Id offset);
|
||||
Id EmitReadSharedS8(EmitContext& ctx, Id offset);
|
||||
Id EmitReadSharedU16(EmitContext& ctx, Id offset);
|
||||
Id EmitReadSharedS16(EmitContext& ctx, Id offset);
|
||||
Id EmitReadSharedU32(EmitContext& ctx, Id offset);
|
||||
Id EmitReadSharedU64(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
|
||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
|
||||
|
@ -140,7 +146,7 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
|||
void EmitBitCastU16F16(EmitContext& ctx);
|
||||
Id EmitBitCastU32F32(EmitContext& ctx, Id value);
|
||||
void EmitBitCastU64F64(EmitContext& ctx);
|
||||
void EmitBitCastF16U16(EmitContext&);
|
||||
Id EmitBitCastF16U16(EmitContext& ctx, Id value);
|
||||
Id EmitBitCastF32U32(EmitContext& ctx, Id value);
|
||||
void EmitBitCastF64U64(EmitContext& ctx);
|
||||
Id EmitPackUint2x32(EmitContext& ctx, Id value);
|
||||
|
@ -172,6 +178,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value);
|
|||
Id EmitFPSin(EmitContext& ctx, Id value);
|
||||
Id EmitFPCos(EmitContext& ctx, Id value);
|
||||
Id EmitFPExp2(EmitContext& ctx, Id value);
|
||||
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
|
||||
Id EmitFPLog2(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecip32(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecip64(EmitContext& ctx, Id value);
|
||||
|
@ -236,8 +243,11 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
|||
Id EmitFPIsNan16(EmitContext& ctx, Id value);
|
||||
Id EmitFPIsNan32(EmitContext& ctx, Id value);
|
||||
Id EmitFPIsNan64(EmitContext& ctx, Id value);
|
||||
Id EmitFPIsInf32(EmitContext& ctx, Id value);
|
||||
Id EmitFPIsInf64(EmitContext& ctx, Id value);
|
||||
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitISub64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
|
||||
|
@ -333,6 +343,7 @@ Id EmitConvertF64U8(EmitContext& ctx, Id value);
|
|||
Id EmitConvertF64U16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU16U32(EmitContext& ctx, Id value);
|
||||
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset);
|
||||
|
@ -355,6 +366,18 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
|
|||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
|
||||
|
||||
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx);
|
||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||
|
||||
|
|
|
@ -60,6 +60,10 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
|
|||
return ctx.OpIAdd(ctx.U64, a, b);
|
||||
}
|
||||
|
||||
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpIAddCarry(ctx.full_result_u32x2, a, b);
|
||||
}
|
||||
|
||||
Id EmitISub32(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpISub(ctx.U32[1], a, b);
|
||||
}
|
||||
|
|
165
src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
Normal file
165
src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
Normal file
|
@ -0,0 +1,165 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
|
||||
const Id shift_id{ctx.ConstU32(shift)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
|
||||
}
|
||||
|
||||
Id Word(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
|
||||
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
|
||||
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))};
|
||||
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))};
|
||||
const Id count_id{ctx.ConstU32(count)};
|
||||
return {bit, count_id};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
} else {
|
||||
return Word(ctx, offset);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||
return ctx.OpLoad(ctx.U32[2], pointer);
|
||||
} else {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||
return ctx.OpLoad(ctx.U32[4], pointer);
|
||||
}
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
std::array<Id, 4> values{};
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
|
||||
}
|
||||
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
|
||||
}
|
||||
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||
Id pointer{};
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
|
||||
} else {
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||
}
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return;
|
||||
}
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||
}
|
||||
|
||||
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return;
|
||||
}
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <fmt/format.h>
|
||||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
@ -41,8 +42,9 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
|
|||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces(program);
|
||||
DefineBuffers(program.info);
|
||||
DefineImagesAndSamplers(program.info);
|
||||
DefineBuffers(info);
|
||||
DefineImagesAndSamplers(info);
|
||||
DefineSharedMemory(info);
|
||||
}
|
||||
|
||||
EmitContext::~EmitContext() = default;
|
||||
|
@ -72,19 +74,19 @@ Id EmitContext::Def(const IR::Value& value) {
|
|||
void EmitContext::DefineArithmeticTypes() {
|
||||
void_id = Name(TypeVoid(), "void_id");
|
||||
U1[1] = Name(TypeBool(), "bool_id");
|
||||
// F16[1] = Name(TypeFloat(16), "f16_id");
|
||||
F16[1] = Name(TypeFloat(16), "f16_id");
|
||||
F32[1] = Name(TypeFloat(32), "f32_id");
|
||||
// F64[1] = Name(TypeFloat(64), "f64_id");
|
||||
S32[1] = Name(TypeSInt(32), "i32_id");
|
||||
U32[1] = Name(TypeUInt(32), "u32_id");
|
||||
// U8 = Name(TypeSInt(8), "u8");
|
||||
// S8 = Name(TypeUInt(8), "s8");
|
||||
// U16 = Name(TypeUInt(16), "u16_id");
|
||||
U16 = Name(TypeUInt(16), "u16_id");
|
||||
// S16 = Name(TypeSInt(16), "s16_id");
|
||||
// U64 = Name(TypeUInt(64), "u64_id");
|
||||
|
||||
for (u32 i = 2; i <= 4; i++) {
|
||||
// F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
|
||||
F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
|
||||
F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i));
|
||||
// F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i));
|
||||
S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i));
|
||||
|
@ -222,8 +224,17 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
|
||||
void EmitContext::DefineOutputs(const Info& info) {
|
||||
switch (stage) {
|
||||
case Stage::Vertex:
|
||||
case Stage::Vertex: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value,
|
||||
f32_zero_value, f32_zero_value, f32_zero_value,
|
||||
f32_zero_value, f32_zero_value};
|
||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||
const Id initializer{ConstantComposite(type, zero)};
|
||||
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output,
|
||||
initializer);
|
||||
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output,
|
||||
initializer);
|
||||
for (u32 i = 0; i < IR::NumParams; i++) {
|
||||
const IR::Attribute param{IR::Attribute::Param0 + i};
|
||||
if (!info.stores.GetAny(param)) {
|
||||
|
@ -236,6 +247,7 @@ void EmitContext::DefineOutputs(const Info& info) {
|
|||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||
|
@ -294,8 +306,49 @@ void EmitContext::DefineBuffers(const Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
||||
return spv::ImageFormat::R32ui;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::R32f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::Rg32f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::R16f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::Rg16f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||
return spv::ImageFormat::Rg8;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::Rgba16f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||
return spv::ImageFormat::R8;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||
return spv::ImageFormat::Rgba8;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||
const auto format = spv::ImageFormat::Unknown;
|
||||
const auto image = ctx.info.ReadUd<AmdGpu::Image>(desc.sgpr_base, desc.dword_offset);
|
||||
const auto format = desc.is_storage ? GetFormat(image) : spv::ImageFormat::Unknown;
|
||||
const u32 sampled = desc.is_storage ? 2 : 1;
|
||||
switch (desc.type) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
|
@ -320,7 +373,17 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
|
||||
void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
||||
for (const auto& image_desc : info.images) {
|
||||
const Id sampled_type{image_desc.nfmt == AmdGpu::NumberFormat::Uint ? U32[1] : F32[1]};
|
||||
const VectorIds* data_types = [&] {
|
||||
switch (image_desc.nfmt) {
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return &U32;
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return &S32;
|
||||
default:
|
||||
return &F32;
|
||||
}
|
||||
}();
|
||||
const Id sampled_type = data_types->Get(1);
|
||||
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
||||
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
||||
|
@ -330,6 +393,7 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
|||
image_desc.dword_offset));
|
||||
images.push_back({
|
||||
.id = id,
|
||||
.data_types = data_types,
|
||||
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
|
||||
.pointer_type = pointer_type,
|
||||
.image_type = image_type,
|
||||
|
@ -338,6 +402,8 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
|||
++binding;
|
||||
}
|
||||
|
||||
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
||||
|
||||
if (info.samplers.empty()) {
|
||||
return;
|
||||
}
|
||||
|
@ -356,4 +422,50 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineSharedMemory(const Info& info) {
|
||||
if (info.shared_memory_size == 0) {
|
||||
return;
|
||||
}
|
||||
const auto make{[&](Id element_type, u32 element_size) {
|
||||
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
|
||||
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
||||
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
||||
|
||||
const Id struct_type{TypeStruct(array_type)};
|
||||
MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
|
||||
const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
|
||||
const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
|
||||
const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
|
||||
Decorate(variable, spv::Decoration::Aliased);
|
||||
interfaces.push_back(variable);
|
||||
|
||||
return std::make_tuple(variable, element_pointer, pointer);
|
||||
}};
|
||||
if (profile.support_explicit_workgroup_layout) {
|
||||
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
||||
if (info.uses_shared_u8) {
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
|
||||
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
|
||||
}
|
||||
if (info.uses_shared_u16) {
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
||||
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
|
||||
}
|
||||
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
|
||||
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
|
||||
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
|
||||
return;
|
||||
}
|
||||
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
|
||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||
|
||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||
interfaces.push_back(shared_memory_u32);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -66,15 +66,17 @@ public:
|
|||
}
|
||||
|
||||
template <bool global = true>
|
||||
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
|
||||
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class,
|
||||
std::optional<Id> initializer = std::nullopt) {
|
||||
const Id pointer_type_id{TypePointer(storage_class, type)};
|
||||
return global ? AddGlobalVariable(pointer_type_id, storage_class)
|
||||
: AddLocalVariable(pointer_type_id, storage_class);
|
||||
return global ? AddGlobalVariable(pointer_type_id, storage_class, initializer)
|
||||
: AddLocalVariable(pointer_type_id, storage_class, initializer);
|
||||
}
|
||||
|
||||
[[nodiscard]] Id DefineVariable(Id type, std::optional<spv::BuiltIn> builtin,
|
||||
spv::StorageClass storage_class) {
|
||||
const Id id{DefineVar(type, storage_class)};
|
||||
spv::StorageClass storage_class,
|
||||
std::optional<Id> initializer = std::nullopt) {
|
||||
const Id id{DefineVar(type, storage_class, initializer)};
|
||||
if (builtin) {
|
||||
Decorate(id, spv::Decoration::BuiltIn, *builtin);
|
||||
}
|
||||
|
@ -147,6 +149,12 @@ public:
|
|||
Id u32_zero_value{};
|
||||
Id f32_zero_value{};
|
||||
|
||||
Id shared_u8{};
|
||||
Id shared_u16{};
|
||||
Id shared_u32{};
|
||||
Id shared_u32x2{};
|
||||
Id shared_u32x4{};
|
||||
|
||||
Id input_u32{};
|
||||
Id input_f32{};
|
||||
Id input_s32{};
|
||||
|
@ -163,13 +171,25 @@ public:
|
|||
Id frag_depth{};
|
||||
std::array<Id, 8> frag_color{};
|
||||
std::array<u32, 8> frag_num_comp{};
|
||||
Id clip_distances{};
|
||||
Id cull_distances{};
|
||||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id image_u32{};
|
||||
|
||||
Id shared_memory_u8{};
|
||||
Id shared_memory_u16{};
|
||||
Id shared_memory_u32{};
|
||||
Id shared_memory_u32x2{};
|
||||
Id shared_memory_u32x4{};
|
||||
|
||||
Id shared_memory_u32_type{};
|
||||
|
||||
struct TextureDefinition {
|
||||
Id id;
|
||||
const VectorIds* data_types;
|
||||
Id sampled_type;
|
||||
Id pointer_type;
|
||||
Id image_type;
|
||||
|
@ -205,6 +225,7 @@ private:
|
|||
void DefineOutputs(const Info& info);
|
||||
void DefineBuffers(const Info& info);
|
||||
void DefineImagesAndSamplers(const Info& info);
|
||||
void DefineSharedMemory(const Info& info);
|
||||
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||
};
|
||||
|
|
|
@ -149,9 +149,15 @@ void CFG::LinkBlocks() {
|
|||
block.end_class = EndClass::Branch;
|
||||
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
||||
const auto& prev_inst = inst_list[block.end_index - 1];
|
||||
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0 &&
|
||||
prev_inst.control.exp.target != 9) {
|
||||
block.end_class = EndClass::Kill;
|
||||
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
|
||||
if (prev_inst.control.exp.target != 9) {
|
||||
block.end_class = EndClass::Kill;
|
||||
} else if (const auto& exec_mask = inst_list[block.end_index - 2];
|
||||
exec_mask.src[0].field == OperandField::ConstZero) {
|
||||
block.end_class = EndClass::Kill;
|
||||
} else {
|
||||
block.end_class = EndClass::Exit;
|
||||
}
|
||||
} else {
|
||||
block.end_class = EndClass::Exit;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace Shader::Gcn {
|
|||
* We take the reverse way, extract the original input semantics from these instructions.
|
||||
**/
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
|
||||
std::vector<VertexAttribute> attributes;
|
||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
@ -47,6 +47,8 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
|||
u32 semantic_index = 0;
|
||||
while (!code_slice.atEnd()) {
|
||||
const auto inst = decoder.decodeInstruction(code_slice);
|
||||
*out_size += inst.length;
|
||||
|
||||
if (inst.opcode == Opcode::S_SETPC_B64) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,6 @@ struct VertexAttribute {
|
|||
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
||||
};
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -3429,48 +3429,48 @@ constexpr std::array<InstFormat, 112> InstructionFormatMIMG = {{
|
|||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
// 17 = IMAGE_ATOMIC_ADD
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 18 = IMAGE_ATOMIC_SUB
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
{},
|
||||
// 20 = IMAGE_ATOMIC_SMIN
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Sint32,
|
||||
ScalarType::Sint32},
|
||||
// 21 = IMAGE_ATOMIC_UMIN
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 22 = IMAGE_ATOMIC_SMAX
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Sint32,
|
||||
ScalarType::Sint32},
|
||||
// 23 = IMAGE_ATOMIC_UMAX
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 24 = IMAGE_ATOMIC_AND
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 25 = IMAGE_ATOMIC_OR
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 26 = IMAGE_ATOMIC_XOR
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 27 = IMAGE_ATOMIC_INC
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 28 = IMAGE_ATOMIC_DEC
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 29 = IMAGE_ATOMIC_FCMPSWAP
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 30 = IMAGE_ATOMIC_FMIN
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 31 = IMAGE_ATOMIC_FMAX
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 32 = IMAGE_SAMPLE
|
||||
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
|
|
|
@ -187,7 +187,7 @@ std::string DumpExpr(const Statement* stmt) {
|
|||
case StatementType::Not:
|
||||
case StatementType::Or:
|
||||
case StatementType::Variable:
|
||||
throw LogicError("Statement can't be printed");
|
||||
UNREACHABLE_MSG("Statement can't be printed");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
@ -335,7 +335,7 @@ private:
|
|||
}
|
||||
// Expensive operation:
|
||||
if (!AreSiblings(goto_stmt, label_stmt)) {
|
||||
throw LogicError("Goto is not a sibling with the label");
|
||||
UNREACHABLE_MSG("Goto is not a sibling with the label");
|
||||
}
|
||||
// goto_stmt and label_stmt are guaranteed to be siblings, eliminate
|
||||
if (std::next(goto_stmt) == label_stmt) {
|
||||
|
@ -451,7 +451,7 @@ private:
|
|||
case StatementType::Loop:
|
||||
return MoveOutwardLoop(goto_stmt);
|
||||
default:
|
||||
throw LogicError("Invalid outward movement");
|
||||
UNREACHABLE_MSG("Invalid outward movement");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -486,7 +486,7 @@ private:
|
|||
case StatementType::Loop:
|
||||
break;
|
||||
default:
|
||||
throw LogicError("Invalid inward movement");
|
||||
UNREACHABLE_MSG("Invalid inward movement");
|
||||
}
|
||||
Tree& nested_tree{label_nested_stmt->children};
|
||||
Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
|
||||
|
@ -633,7 +633,8 @@ private:
|
|||
if (!stmt.block->is_dummy) {
|
||||
const u32 start = stmt.block->begin_index;
|
||||
const u32 size = stmt.block->end_index - start + 1;
|
||||
Translate(current_block, inst_list.subspan(start, size), info);
|
||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
||||
info);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -22,16 +22,18 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
|
|||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
if (is_pair) {
|
||||
// Pair loads are either 32 or 64-bit. We assume 32-bit for now.
|
||||
ASSERT(bit_size == 32);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
ir.SetVectorReg(dst_reg, ir.ReadShared(32, is_signed, addr0));
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)});
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
||||
ir.SetVectorReg(dst_reg + 1, ir.ReadShared(32, is_signed, addr1));
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)});
|
||||
} else if (bit_size == 64) {
|
||||
const IR::Value data = ir.UnpackUint2x32(ir.ReadShared(bit_size, is_signed, addr));
|
||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
||||
} else {
|
||||
const IR::U32 data = ir.ReadShared(bit_size, is_signed, addr);
|
||||
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)};
|
||||
ir.SetVectorReg(dst_reg, data);
|
||||
}
|
||||
}
|
||||
|
@ -41,17 +43,26 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
|||
const IR::VectorReg data0{inst.src[1].code};
|
||||
const IR::VectorReg data1{inst.src[2].code};
|
||||
if (is_pair) {
|
||||
ASSERT(bit_size == 32);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||
} else if (bit_size == 64) {
|
||||
const IR::U64 data = ir.PackUint2x32(
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)));
|
||||
const IR::Value data =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||
ir.WriteShared(bit_size, data, addr);
|
||||
} else {
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -318,4 +318,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
|
|||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
SetDst(inst.dst[0], ir.Imm32(pc));
|
||||
}
|
||||
|
||||
void Translator::S_ADDC_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -5,20 +5,29 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
||||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
|
||||
const u32 dword_offset = [&] -> u32 {
|
||||
if (smrd.imm) {
|
||||
return smrd.offset;
|
||||
}
|
||||
if (smrd.offset == SQ_SRC_LITERAL) {
|
||||
return inst.src[1].code;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}();
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::Value base =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 dword_offset = [&] -> IR::U32 {
|
||||
|
@ -30,7 +39,9 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
|||
}
|
||||
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
|
||||
}();
|
||||
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||
const IR::Value vsharp =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
|
||||
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
@ -190,7 +193,20 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
|
||||
|
||||
// Parse the assembly to generate a list of attributes.
|
||||
const auto attribs = ParseFetchShader(code);
|
||||
u32 fetch_size{};
|
||||
const auto attribs = ParseFetchShader(code, &fetch_size);
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
using namespace Common::FS;
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("vs_fetch_{:#018x}.bin", info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteRaw<u8>(code, fetch_size);
|
||||
}
|
||||
|
||||
for (const auto& attrib : attribs) {
|
||||
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
|
@ -224,9 +240,9 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
attrib.instance_data);
|
||||
}
|
||||
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||
info.vs_inputs.push_back({
|
||||
.fmt = buffer.num_format,
|
||||
.fmt = buffer.GetNumberFmt(),
|
||||
.binding = attrib.semantic,
|
||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
|
@ -236,12 +252,13 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
Translator translator{block, info};
|
||||
for (const auto& inst : inst_list) {
|
||||
block_base += inst.length;
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOVK_I32:
|
||||
translator.S_MOVK(inst);
|
||||
|
@ -345,6 +362,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_BFREV_B32:
|
||||
translator.V_BFREV_B32(inst);
|
||||
break;
|
||||
case Opcode::V_LDEXP_F32:
|
||||
translator.V_LDEXP_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FRACT_F32:
|
||||
translator.V_FRACT_F32(inst);
|
||||
break;
|
||||
|
@ -374,8 +394,40 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::IMAGE_SAMPLE_LZ:
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
case Opcode::IMAGE_SAMPLE_C_O:
|
||||
case Opcode::IMAGE_SAMPLE_B:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_ADD:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Add, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_AND:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::And, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_OR:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Or, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_XOR:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Xor, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_UMAX:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Umax, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_SMAX:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Smax, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_UMIN:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Umin, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_SMIN:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Smin, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_INC:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Inc, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_DEC:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Dec, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_GET_LOD:
|
||||
translator.IMAGE_GET_LOD(inst);
|
||||
break;
|
||||
|
@ -457,9 +509,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CMP_NGT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NGE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LT_U32:
|
||||
translator.S_CMP(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LE_U32:
|
||||
translator.S_CMP(ConditionOp::LE, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
|
@ -487,6 +545,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_X:
|
||||
translator.BUFFER_LOAD_FORMAT(1, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XY:
|
||||
translator.BUFFER_LOAD_FORMAT(2, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
|
||||
translator.BUFFER_LOAD_FORMAT(3, true, inst);
|
||||
break;
|
||||
|
@ -581,6 +645,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CVT_I32_F32:
|
||||
translator.V_CVT_I32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_FLR_I32_F32:
|
||||
translator.V_CVT_FLR_I32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SUBREV_F32:
|
||||
translator.V_SUBREV_F32(inst);
|
||||
break;
|
||||
|
@ -715,6 +782,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
translator.V_MAD_I32_I24(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_I32_I24:
|
||||
case Opcode::V_MUL_U32_U24:
|
||||
translator.V_MUL_I32_I24(inst);
|
||||
break;
|
||||
case Opcode::V_SUB_I32:
|
||||
|
@ -771,6 +839,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CMP_NE_U64:
|
||||
translator.V_CMP_NE_U64(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_CLASS_F32:
|
||||
translator.V_CMP_CLASS_F32(inst);
|
||||
break;
|
||||
case Opcode::V_TRUNC_F32:
|
||||
translator.V_TRUNC_F32(inst);
|
||||
break;
|
||||
|
@ -786,7 +857,11 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_ADD_U32:
|
||||
translator.S_ADD_U32(inst);
|
||||
break;
|
||||
case Opcode::S_ADDC_U32:
|
||||
translator.S_ADDC_U32(inst);
|
||||
break;
|
||||
case Opcode::S_SUB_U32:
|
||||
case Opcode::S_SUB_I32:
|
||||
translator.S_SUB_U32(inst);
|
||||
break;
|
||||
// TODO: Separate implementation for legacy variants.
|
||||
|
@ -809,9 +884,30 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::IMAGE_GET_RESINFO:
|
||||
translator.IMAGE_GET_RESINFO(inst);
|
||||
break;
|
||||
case Opcode::S_BARRIER:
|
||||
translator.S_BARRIER();
|
||||
break;
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
break;
|
||||
case Opcode::DS_READ_B32:
|
||||
translator.DS_READ(32, false, false, inst);
|
||||
break;
|
||||
case Opcode::DS_READ2_B32:
|
||||
translator.DS_READ(32, false, true, inst);
|
||||
break;
|
||||
case Opcode::DS_WRITE_B32:
|
||||
translator.DS_WRITE(32, false, false, inst);
|
||||
break;
|
||||
case Opcode::DS_WRITE2_B32:
|
||||
translator.DS_WRITE(32, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_READFIRSTLANE_B32:
|
||||
translator.V_READFIRSTLANE_B32(inst);
|
||||
break;
|
||||
case Opcode::S_GETPC_B64:
|
||||
translator.S_GETPC_B64(block_base, inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
|
|
|
@ -26,6 +26,25 @@ enum class ConditionOp : u32 {
|
|||
TRU,
|
||||
};
|
||||
|
||||
enum class AtomicOp : u32 {
|
||||
Swap,
|
||||
CmpSwap,
|
||||
Add,
|
||||
Sub,
|
||||
Smin,
|
||||
Umin,
|
||||
Smax,
|
||||
Umax,
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
Inc,
|
||||
Dec,
|
||||
FCmpSwap,
|
||||
Fmin,
|
||||
Fmax,
|
||||
};
|
||||
|
||||
enum class NegateMode : u32 {
|
||||
None,
|
||||
Src1,
|
||||
|
@ -61,6 +80,8 @@ public:
|
|||
void S_BREV_B32(const GcnInst& inst);
|
||||
void S_ADD_U32(const GcnInst& inst);
|
||||
void S_SUB_U32(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_ADDC_U32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -133,6 +154,9 @@ public:
|
|||
void V_NOT_B32(const GcnInst& inst);
|
||||
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
|
||||
void V_BFREV_B32(const GcnInst& inst);
|
||||
void V_LDEXP_F32(const GcnInst& inst);
|
||||
void V_CVT_FLR_I32_F32(const GcnInst& inst);
|
||||
void V_CMP_CLASS_F32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
@ -145,6 +169,8 @@ public:
|
|||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void S_BARRIER();
|
||||
|
||||
// MIMG
|
||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||
|
@ -153,6 +179,7 @@ public:
|
|||
void IMAGE_STORE(const GcnInst& inst);
|
||||
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||
void IMAGE_GET_LOD(const GcnInst& inst);
|
||||
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
|
||||
// Export
|
||||
void EXP(const GcnInst& inst);
|
||||
|
@ -167,6 +194,6 @@ private:
|
|||
static std::array<bool, IR::NumScalarRegs> exec_contexts;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -28,7 +28,8 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
||||
const IR::U32 src0 = GetSrc(inst.src[0]);
|
||||
SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0));
|
||||
const IR::U16 src0l = ir.UConvert(16, src0);
|
||||
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||
|
@ -50,11 +51,14 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
|||
};
|
||||
const bool has_flt_source =
|
||||
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
|
||||
const IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
|
||||
IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
|
||||
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
|
||||
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
|
||||
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
|
||||
}
|
||||
if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) {
|
||||
src0 = ir.BitCast<IR::F32, IR::U32>(src0);
|
||||
}
|
||||
const IR::Value result = ir.Select(flag, src1, src0);
|
||||
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
||||
}
|
||||
|
@ -502,4 +506,19 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.BitReverse(src0));
|
||||
}
|
||||
|
||||
void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -212,10 +212,15 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
|
|||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
ASSERT(mimg.dmask == 0xF);
|
||||
const IR::Value value = ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(data_reg), ir.GetVectorReg<IR::F32>(data_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(data_reg + 2), ir.GetVectorReg<IR::F32>(data_reg + 3));
|
||||
boost::container::static_vector<IR::F32, 4> comps;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
comps.push_back(ir.Imm32(0.f));
|
||||
continue;
|
||||
}
|
||||
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
|
||||
}
|
||||
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
|
||||
ir.ImageWrite(handle, body, value, {});
|
||||
}
|
||||
|
||||
|
@ -245,7 +250,10 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst
|
|||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
}
|
||||
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::F32{value});
|
||||
|
@ -304,7 +312,10 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
|
|||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||
|
@ -322,4 +333,48 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
|
||||
}
|
||||
|
||||
void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg val_reg{inst.dst[0].code};
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value value = ir.GetVectorReg(val_reg);
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
const IR::Value prev = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.ImageAtomicExchange(handle, body, value, {});
|
||||
case AtomicOp::Add:
|
||||
return ir.ImageAtomicIAdd(handle, body, value, {});
|
||||
case AtomicOp::Smin:
|
||||
return ir.ImageAtomicIMin(handle, body, value, true, {});
|
||||
case AtomicOp::Umin:
|
||||
return ir.ImageAtomicUMin(handle, body, value, {});
|
||||
case AtomicOp::Smax:
|
||||
return ir.ImageAtomicIMax(handle, body, value, true, {});
|
||||
case AtomicOp::Umax:
|
||||
return ir.ImageAtomicUMax(handle, body, value, {});
|
||||
case AtomicOp::And:
|
||||
return ir.ImageAtomicAnd(handle, body, value, {});
|
||||
case AtomicOp::Or:
|
||||
return ir.ImageAtomicOr(handle, body, value, {});
|
||||
case AtomicOp::Xor:
|
||||
return ir.ImageAtomicXor(handle, body, value, {});
|
||||
case AtomicOp::Inc:
|
||||
return ir.ImageAtomicInc(handle, body, value, {});
|
||||
case AtomicOp::Dec:
|
||||
return ir.ImageAtomicDec(handle, body, value, {});
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
if (mimg.glc) {
|
||||
ir.SetVectorReg(val_reg, IR::U32{prev});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -88,10 +88,10 @@ constexpr size_t NumParams = 32;
|
|||
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
|
||||
const int result{static_cast<int>(attr) + num};
|
||||
if (result > static_cast<int>(Attribute::Param31)) {
|
||||
throw LogicError("Overflow on register arithmetic");
|
||||
UNREACHABLE_MSG("Overflow on register arithmetic");
|
||||
}
|
||||
if (result < static_cast<int>(Attribute::RenderTarget0)) {
|
||||
throw LogicError("Underflow on register arithmetic");
|
||||
UNREACHABLE_MSG("Underflow on register arithmetic");
|
||||
}
|
||||
return static_cast<Attribute>(result);
|
||||
}
|
||||
|
|
|
@ -39,10 +39,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
|
|||
|
||||
void Block::AddBranch(Block* block) {
|
||||
if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
|
||||
throw LogicError("Successor already inserted");
|
||||
UNREACHABLE_MSG("Successor already inserted");
|
||||
}
|
||||
if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
|
||||
throw LogicError("Predecessor already inserted");
|
||||
UNREACHABLE_MSG("Predecessor already inserted");
|
||||
}
|
||||
imm_successors.push_back(block);
|
||||
block->imm_predecessors.push_back(this);
|
||||
|
|
|
@ -115,6 +115,18 @@ void IREmitter::Discard() {
|
|||
Inst(Opcode::Discard);
|
||||
}
|
||||
|
||||
void IREmitter::Barrier() {
|
||||
Inst(Opcode::Barrier);
|
||||
}
|
||||
|
||||
void IREmitter::WorkgroupMemoryBarrier() {
|
||||
Inst(Opcode::WorkgroupMemoryBarrier);
|
||||
}
|
||||
|
||||
void IREmitter::DeviceMemoryBarrier() {
|
||||
Inst(Opcode::DeviceMemoryBarrier);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
|
||||
return Inst<U32>(Opcode::GetUserData, reg);
|
||||
}
|
||||
|
@ -200,6 +212,10 @@ U1 IREmitter::GetVcc() {
|
|||
return Inst<U1>(Opcode::GetVcc);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetSccLo() {
|
||||
return Inst<U32>(Opcode::GetSccLo);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetVccLo() {
|
||||
return Inst<U32>(Opcode::GetVccLo);
|
||||
}
|
||||
|
@ -220,6 +236,10 @@ void IREmitter::SetVcc(const U1& value) {
|
|||
Inst(Opcode::SetVcc, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetSccLo(const U32& value) {
|
||||
Inst(Opcode::SetSccLo, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetVccLo(const U32& value) {
|
||||
Inst(Opcode::SetVccLo, value);
|
||||
}
|
||||
|
@ -240,22 +260,25 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
|||
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
||||
}
|
||||
|
||||
U32U64 IREmitter::ReadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
/*switch (bit_size) {
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
return Inst<U32>(is_signed ? Opcode::ReadSharedS8 : Opcode::ReadSharedU8, offset);
|
||||
return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
|
||||
case 16:
|
||||
return Inst<U32>(is_signed ? Opcode::ReadSharedS16 : Opcode::ReadSharedU16, offset);
|
||||
return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
|
||||
case 32:
|
||||
return Inst<U32>(Opcode::ReadSharedU32, offset);
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
case 64:
|
||||
return Inst<U64>(Opcode::ReadSharedU64, offset);
|
||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||
case 128:
|
||||
return Inst(Opcode::LoadSharedU128, offset);
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);*/
|
||||
}
|
||||
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||
/*switch (bit_size) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
Inst(Opcode::WriteSharedU8, offset, value);
|
||||
break;
|
||||
|
@ -268,9 +291,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
|||
case 64:
|
||||
Inst(Opcode::WriteSharedU64, offset, value);
|
||||
break;
|
||||
case 128:
|
||||
Inst(Opcode::WriteSharedU128, offset, value);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||
|
@ -603,6 +629,10 @@ F32 IREmitter::FPExp2(const F32& value) {
|
|||
return Inst<F32>(Opcode::FPExp2, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::FPLdexp(const F32& value, const U32& exp) {
|
||||
return Inst<F32>(Opcode::FPLdexp, value, exp);
|
||||
}
|
||||
|
||||
F32 IREmitter::FPLog2(const F32& value) {
|
||||
return Inst<F32>(Opcode::FPLog2, value);
|
||||
}
|
||||
|
@ -810,6 +840,17 @@ U1 IREmitter::FPIsNan(const F32F64& value) {
|
|||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::FPIsInf(const F32F64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<U1>(Opcode::FPIsInf32, value);
|
||||
case Type::F64:
|
||||
return Inst<U1>(Opcode::FPIsInf64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
|
||||
if (lhs.Type() != rhs.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
||||
|
@ -866,6 +907,18 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
|
|||
}
|
||||
}
|
||||
|
||||
Value IREmitter::IAddCary(const U32& a, const U32& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::IAddCary32, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
@ -1142,6 +1195,13 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s
|
|||
}
|
||||
|
||||
U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
|
||||
switch (result_bitsize) {
|
||||
case 16:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U16>(Opcode::ConvertU16U32, value);
|
||||
}
|
||||
}
|
||||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||
}
|
||||
|
||||
|
@ -1163,6 +1223,73 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
|
|||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicIAdd32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicSMin32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicUMin32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
|
||||
bool is_signed, TextureInstInfo info) {
|
||||
return is_signed ? ImageAtomicSMin(handle, coords, value, info)
|
||||
: ImageAtomicUMin(handle, coords, value, info);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicSMax32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
|
||||
bool is_signed, TextureInstInfo info) {
|
||||
return is_signed ? ImageAtomicSMax(handle, coords, value, info)
|
||||
: ImageAtomicUMax(handle, coords, value, info);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicInc32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicDec32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicAnd32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicOr32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicXor32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info) {
|
||||
|
|
|
@ -43,6 +43,10 @@ public:
|
|||
void Epilogue();
|
||||
void Discard();
|
||||
|
||||
void Barrier();
|
||||
void WorkgroupMemoryBarrier();
|
||||
void DeviceMemoryBarrier();
|
||||
|
||||
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
|
||||
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
|
||||
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);
|
||||
|
@ -60,11 +64,13 @@ public:
|
|||
[[nodiscard]] U1 GetScc();
|
||||
[[nodiscard]] U1 GetExec();
|
||||
[[nodiscard]] U1 GetVcc();
|
||||
[[nodiscard]] U32 GetSccLo();
|
||||
[[nodiscard]] U32 GetVccLo();
|
||||
[[nodiscard]] U32 GetVccHi();
|
||||
void SetScc(const U1& value);
|
||||
void SetExec(const U1& value);
|
||||
void SetVcc(const U1& value);
|
||||
void SetSccLo(const U32& value);
|
||||
void SetVccLo(const U32& value);
|
||||
void SetVccHi(const U32& value);
|
||||
|
||||
|
@ -74,7 +80,7 @@ public:
|
|||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
|
@ -120,6 +126,7 @@ public:
|
|||
[[nodiscard]] F32 FPSin(const F32& value);
|
||||
[[nodiscard]] F32 FPExp2(const F32& value);
|
||||
[[nodiscard]] F32 FPLog2(const F32& value);
|
||||
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
|
||||
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
|
||||
[[nodiscard]] F32 FPSqrt(const F32& value);
|
||||
|
@ -139,14 +146,16 @@ public:
|
|||
[[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPIsNan(const F32F64& value);
|
||||
[[nodiscard]] U1 FPIsInf(const F32F64& value);
|
||||
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
|
||||
|
||||
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32U64 INeg(const U32U64& value);
|
||||
|
@ -199,6 +208,33 @@ public:
|
|||
[[nodiscard]] U16U32U64 UConvert(size_t result_bitsize, const U16U32U64& value);
|
||||
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
|
||||
|
||||
[[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
|
||||
const Value& value, bool is_signed, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
|
||||
const Value& value, bool is_signed, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& bias, const Value& offset,
|
||||
const F32& lod_clamp, TextureInstInfo info);
|
||||
|
|
|
@ -40,6 +40,9 @@ Inst::~Inst() {
|
|||
|
||||
bool Inst::MayHaveSideEffects() const noexcept {
|
||||
switch (op) {
|
||||
case Opcode::Barrier:
|
||||
case Opcode::WorkgroupMemoryBarrier:
|
||||
case Opcode::DeviceMemoryBarrier:
|
||||
case Opcode::ConditionRef:
|
||||
case Opcode::Reference:
|
||||
case Opcode::PhiMove:
|
||||
|
@ -52,7 +55,23 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::StoreBufferF32x3:
|
||||
case Opcode::StoreBufferF32x4:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::WriteSharedU128:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::WriteSharedU32:
|
||||
case Opcode::WriteSharedU16:
|
||||
case Opcode::WriteSharedU8:
|
||||
case Opcode::ImageWrite:
|
||||
case Opcode::ImageAtomicIAdd32:
|
||||
case Opcode::ImageAtomicSMin32:
|
||||
case Opcode::ImageAtomicUMin32:
|
||||
case Opcode::ImageAtomicSMax32:
|
||||
case Opcode::ImageAtomicUMax32:
|
||||
case Opcode::ImageAtomicInc32:
|
||||
case Opcode::ImageAtomicDec32:
|
||||
case Opcode::ImageAtomicAnd32:
|
||||
case Opcode::ImageAtomicOr32:
|
||||
case Opcode::ImageAtomicXor32:
|
||||
case Opcode::ImageAtomicExchange32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -61,7 +80,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
|
||||
bool Inst::AreAllArgsImmediates() const {
|
||||
if (op == Opcode::Phi) {
|
||||
throw LogicError("Testing for all arguments are immediates on phi instruction");
|
||||
UNREACHABLE_MSG("Testing for all arguments are immediates on phi instruction");
|
||||
}
|
||||
return std::all_of(args.begin(), args.begin() + NumArgs(),
|
||||
[](const IR::Value& value) { return value.IsImmediate(); });
|
||||
|
@ -91,7 +110,7 @@ void Inst::SetArg(size_t index, Value value) {
|
|||
|
||||
Block* Inst::PhiBlock(size_t index) const {
|
||||
if (op != Opcode::Phi) {
|
||||
throw LogicError("{} is not a Phi instruction", op);
|
||||
UNREACHABLE_MSG("{} is not a Phi instruction", op);
|
||||
}
|
||||
if (index >= phi_args.size()) {
|
||||
throw InvalidArgument("Out of bounds argument index {} in phi instruction");
|
||||
|
@ -143,7 +162,7 @@ void Inst::ReplaceUsesWith(Value replacement) {
|
|||
|
||||
void Inst::ReplaceOpcode(IR::Opcode opcode) {
|
||||
if (opcode == IR::Opcode::Phi) {
|
||||
throw LogicError("Cannot transition into Phi");
|
||||
UNREACHABLE_MSG("Cannot transition into Phi");
|
||||
}
|
||||
if (op == Opcode::Phi) {
|
||||
// Transition out of phi arguments into non-phi
|
||||
|
|
|
@ -19,6 +19,25 @@ OPCODE(ReadConst, U32, U32x
|
|||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
||||
|
||||
// Barriers
|
||||
OPCODE(Barrier, Void, )
|
||||
OPCODE(WorkgroupMemoryBarrier, Void, )
|
||||
OPCODE(DeviceMemoryBarrier, Void, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU8, U32, U32, )
|
||||
OPCODE(LoadSharedS8, U32, U32, )
|
||||
OPCODE(LoadSharedU16, U32, U32, )
|
||||
OPCODE(LoadSharedS16, U32, U32, )
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
OPCODE(LoadSharedU128, U32x4, U32, )
|
||||
OPCODE(WriteSharedU8, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU16, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
||||
OPCODE(WriteSharedU128, Void, U32, U32x4, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
|
||||
|
@ -37,11 +56,13 @@ OPCODE(SetAttribute, Void, Attr
|
|||
OPCODE(GetScc, U1, Void, )
|
||||
OPCODE(GetExec, U1, Void, )
|
||||
OPCODE(GetVcc, U1, Void, )
|
||||
OPCODE(GetSccLo, U32, Void, )
|
||||
OPCODE(GetVccLo, U32, Void, )
|
||||
OPCODE(GetVccHi, U32, Void, )
|
||||
OPCODE(SetScc, Void, U1, )
|
||||
OPCODE(SetExec, Void, U1, )
|
||||
OPCODE(SetVcc, Void, U1, )
|
||||
OPCODE(SetSccLo, Void, U32, )
|
||||
OPCODE(SetVccLo, Void, U32, )
|
||||
OPCODE(SetVccHi, Void, U32, )
|
||||
|
||||
|
@ -148,6 +169,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
|
|||
OPCODE(FPSqrt, F32, F32, )
|
||||
OPCODE(FPSin, F32, F32, )
|
||||
OPCODE(FPExp2, F32, F32, )
|
||||
OPCODE(FPLdexp, F32, F32, U32, )
|
||||
OPCODE(FPCos, F32, F32, )
|
||||
OPCODE(FPLog2, F32, F32, )
|
||||
OPCODE(FPSaturate32, F32, F32, )
|
||||
|
@ -190,10 +212,13 @@ OPCODE(FPUnordGreaterThanEqual32, U1, F32,
|
|||
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPIsNan32, U1, F32, )
|
||||
OPCODE(FPIsNan64, U1, F64, )
|
||||
OPCODE(FPIsInf32, U1, F32, )
|
||||
OPCODE(FPIsInf64, U1, F64, )
|
||||
|
||||
// Integer operations
|
||||
OPCODE(IAdd32, U32, U32, U32, )
|
||||
OPCODE(IAdd64, U64, U64, U64, )
|
||||
OPCODE(IAddCary32, U32x2, U32, U32, )
|
||||
OPCODE(ISub32, U32, U32, U32, )
|
||||
OPCODE(ISub64, U64, U64, U64, )
|
||||
OPCODE(IMul32, U32, U32, U32, )
|
||||
|
@ -258,6 +283,7 @@ OPCODE(ConvertF32U32, F32, U32,
|
|||
OPCODE(ConvertF64S32, F64, U32, )
|
||||
OPCODE(ConvertF64U32, F64, U32, )
|
||||
OPCODE(ConvertF32U16, F32, U16, )
|
||||
OPCODE(ConvertU16U32, U16, U32, )
|
||||
|
||||
// Image operations
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||
|
@ -273,6 +299,19 @@ OPCODE(ImageGradient, F32x4, Opaq
|
|||
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
|
||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
||||
|
||||
// Image atomic operations
|
||||
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||
|
||||
// Warp operations
|
||||
OPCODE(LaneId, U32, )
|
||||
OPCODE(QuadShuffle, U32, U32, U32 )
|
||||
|
|
|
@ -324,8 +324,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::BitFieldUExtract:
|
||||
FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
|
||||
if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
|
||||
base, shift, count);
|
||||
UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
|
||||
base, shift, count);
|
||||
}
|
||||
return (base >> shift) & ((1U << count) - 1);
|
||||
});
|
||||
|
@ -336,8 +336,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
const size_t left_shift{32 - back_shift};
|
||||
const size_t right_shift{static_cast<size_t>(32 - count)};
|
||||
if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
|
||||
base, shift, count);
|
||||
UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
|
||||
base, shift, count);
|
||||
}
|
||||
return static_cast<u32>((base << left_shift) >> right_shift);
|
||||
});
|
||||
|
@ -345,8 +345,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::BitFieldInsert:
|
||||
FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
|
||||
if (bits >= 32 || offset >= 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {}, {})",
|
||||
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
|
||||
UNREACHABLE_MSG("Undefined result in {}({}, {}, {}, {})",
|
||||
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
|
||||
}
|
||||
return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
|
||||
});
|
||||
|
|
|
@ -89,6 +89,17 @@ bool IsImageInstruction(const IR::Inst& inst) {
|
|||
case IR::Opcode::ImageGradient:
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageWrite:
|
||||
case IR::Opcode::ImageAtomicIAdd32:
|
||||
case IR::Opcode::ImageAtomicSMin32:
|
||||
case IR::Opcode::ImageAtomicUMin32:
|
||||
case IR::Opcode::ImageAtomicSMax32:
|
||||
case IR::Opcode::ImageAtomicUMax32:
|
||||
case IR::Opcode::ImageAtomicInc32:
|
||||
case IR::Opcode::ImageAtomicDec32:
|
||||
case IR::Opcode::ImageAtomicAnd32:
|
||||
case IR::Opcode::ImageAtomicOr32:
|
||||
case IR::Opcode::ImageAtomicXor32:
|
||||
case IR::Opcode::ImageAtomicExchange32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -99,6 +110,17 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
|
|||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageWrite:
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageAtomicIAdd32:
|
||||
case IR::Opcode::ImageAtomicSMin32:
|
||||
case IR::Opcode::ImageAtomicUMin32:
|
||||
case IR::Opcode::ImageAtomicSMax32:
|
||||
case IR::Opcode::ImageAtomicUMax32:
|
||||
case IR::Opcode::ImageAtomicInc32:
|
||||
case IR::Opcode::ImageAtomicDec32:
|
||||
case IR::Opcode::ImageAtomicAnd32:
|
||||
case IR::Opcode::ImageAtomicOr32:
|
||||
case IR::Opcode::ImageAtomicXor32:
|
||||
case IR::Opcode::ImageAtomicExchange32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -115,7 +137,8 @@ public:
|
|||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
desc.dword_offset == existing.dword_offset &&
|
||||
desc.inline_cbuf == existing.inline_cbuf;
|
||||
})};
|
||||
auto& buffer = buffer_resources[index];
|
||||
ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
|
||||
|
@ -196,20 +219,70 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
};
|
||||
}
|
||||
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
|
||||
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||
AmdGpu::Buffer& cbuf) {
|
||||
|
||||
// Assuming V# is in UD s[32:35]
|
||||
// The next pattern:
|
||||
// s_getpc_b64 s[32:33]
|
||||
// s_add_u32 s32, <const>, s32
|
||||
// s_addc_u32 s33, 0, s33
|
||||
// s_mov_b32 s35, <const>
|
||||
// s_movk_i32 s34, <const>
|
||||
// buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
|
||||
// is used to define an inline constant buffer
|
||||
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* p0 = handle->Arg(0).InstRecursive();
|
||||
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
|
||||
!p0->Arg(1).IsImmediate()) {
|
||||
return -1;
|
||||
}
|
||||
IR::Inst* p1 = handle->Arg(1).InstRecursive();
|
||||
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
|
||||
return -1;
|
||||
}
|
||||
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
|
||||
return -1;
|
||||
}
|
||||
// We have found this pattern. Build the sharp.
|
||||
std::array<u64, 2> buffer;
|
||||
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
|
||||
buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32;
|
||||
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
|
||||
// Assign a binding to this sharp.
|
||||
return descriptors.Add(BufferResource{
|
||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||
.dword_offset = 0,
|
||||
.stride = cbuf.GetStride(),
|
||||
.num_records = u32(cbuf.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.inline_cbuf = cbuf,
|
||||
.is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize,
|
||||
});
|
||||
}
|
||||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||
});
|
||||
s32 binding{};
|
||||
AmdGpu::Buffer buffer;
|
||||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||
});
|
||||
}
|
||||
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
|
@ -217,7 +290,10 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
if (inst_info.is_typed) {
|
||||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
||||
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
|
|
|
@ -16,6 +16,16 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedS8:
|
||||
case IR::Opcode::LoadSharedU8:
|
||||
case IR::Opcode::WriteSharedU8:
|
||||
info.uses_shared_u8 = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedS16:
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
info.uses_shared_u16 = true;
|
||||
break;
|
||||
case IR::Opcode::QuadShuffle:
|
||||
info.uses_group_quad = true;
|
||||
break;
|
||||
|
|
|
@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
|
|||
struct ExecFlagTag : FlagTag {};
|
||||
struct VccFlagTag : FlagTag {};
|
||||
struct VccLoTag : FlagTag {};
|
||||
struct SccLoTag : FlagTag {};
|
||||
struct VccHiTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
|
@ -44,7 +45,7 @@ struct GotoVariable : FlagTag {
|
|||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||
VccFlagTag, VccLoTag, VccHiTag>;
|
||||
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
|
@ -83,6 +84,13 @@ struct DefTable {
|
|||
exec_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SccLoTag) {
|
||||
return scc_lo_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
|
||||
scc_lo_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccLoTag) {
|
||||
return vcc_lo_flag[block];
|
||||
}
|
||||
|
@ -108,6 +116,7 @@ struct DefTable {
|
|||
ValueMap scc_flag;
|
||||
ValueMap exec_flag;
|
||||
ValueMap vcc_flag;
|
||||
ValueMap scc_lo_flag;
|
||||
ValueMap vcc_lo_flag;
|
||||
ValueMap vcc_hi_flag;
|
||||
};
|
||||
|
@ -124,6 +133,10 @@ IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
|
|||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const SccLoTag&) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
@ -321,6 +334,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::SetVcc:
|
||||
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetSccLo:
|
||||
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetVccLo:
|
||||
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
|
@ -350,6 +366,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::GetVcc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetSccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetVccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
||||
break;
|
||||
|
|
|
@ -14,7 +14,7 @@ BlockList PostOrder(const AbstractSyntaxNode& root) {
|
|||
BlockList post_order_blocks;
|
||||
|
||||
if (root.type != AbstractSyntaxNode::Type::Block) {
|
||||
throw LogicError("First node in abstract syntax list root is not a block");
|
||||
UNREACHABLE_MSG("First node in abstract syntax list root is not a block");
|
||||
}
|
||||
Block* const first_block{root.data.block};
|
||||
visited.insert(first_block);
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
@ -428,10 +428,10 @@ template <RegT Reg>
|
|||
[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
|
||||
const int result{static_cast<int>(reg) + num};
|
||||
if (result >= static_cast<int>(Reg::Max)) {
|
||||
throw LogicError("Overflow on register arithmetic");
|
||||
UNREACHABLE_MSG("Overflow on register arithmetic");
|
||||
}
|
||||
if (result < 0) {
|
||||
throw LogicError("Underflow on register arithmetic");
|
||||
UNREACHABLE_MSG("Underflow on register arithmetic");
|
||||
}
|
||||
return static_cast<Reg>(result);
|
||||
}
|
||||
|
|
|
@ -83,7 +83,7 @@ bool Value::operator==(const Value& other) const {
|
|||
case Type::F64x4:
|
||||
break;
|
||||
}
|
||||
throw LogicError("Invalid type {}", type);
|
||||
UNREACHABLE_MSG("Invalid type {}", type);
|
||||
}
|
||||
|
||||
bool Value::operator!=(const Value& other) const {
|
||||
|
|
|
@ -26,44 +26,9 @@ struct Profile {
|
|||
bool support_fp32_signed_zero_nan_preserve{};
|
||||
bool support_fp64_signed_zero_nan_preserve{};
|
||||
bool support_explicit_workgroup_layout{};
|
||||
bool support_vote{};
|
||||
bool support_viewport_mask{};
|
||||
bool support_typeless_image_loads{};
|
||||
bool support_derivative_control{};
|
||||
bool support_geometry_shader_passthrough{};
|
||||
bool support_native_ndc{};
|
||||
bool support_scaled_attributes{};
|
||||
bool support_multi_viewport{};
|
||||
bool support_geometry_streams{};
|
||||
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
|
||||
bool lower_left_origin_mode{};
|
||||
/// Fragment outputs have to be declared even if they are not written to avoid undefined values.
|
||||
/// See Ori and the Blind Forest's main menu for reference.
|
||||
bool need_declared_frag_colors{};
|
||||
/// Prevents fast math optimizations that may cause inaccuracies
|
||||
bool need_fastmath_off{};
|
||||
|
||||
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
|
||||
bool has_broken_spirv_clamp{};
|
||||
/// The Position builtin needs to be wrapped in a struct when used as an input
|
||||
bool has_broken_spirv_position_input{};
|
||||
/// Offset image operands with an unsigned type do not work
|
||||
bool has_broken_unsigned_image_offsets{};
|
||||
/// Signed instructions with unsigned data types are misinterpreted
|
||||
bool has_broken_signed_operations{};
|
||||
/// Float controls break when fp16 is enabled
|
||||
bool has_broken_fp16_float_controls{};
|
||||
/// Ignores SPIR-V ordered vs unordered using GLSL semantics
|
||||
bool ignore_nan_fp_comparisons{};
|
||||
|
||||
/// Maxwell and earlier nVidia architectures have broken robust support
|
||||
bool has_broken_robust{};
|
||||
|
||||
bool lower_left_origin_mode{};
|
||||
u64 min_ssbo_alignment{};
|
||||
|
||||
u32 max_user_clip_distances{};
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
@ -42,15 +41,45 @@ enum class TextureType : u32 {
|
|||
};
|
||||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||
|
||||
enum class VsOutput : u32 {
|
||||
None,
|
||||
PointSprite,
|
||||
EdgeFlag,
|
||||
KillFlag,
|
||||
GsCutFlag,
|
||||
GsMrtIndex,
|
||||
GsVpIndex,
|
||||
CullDist0,
|
||||
CullDist1,
|
||||
CullDist2,
|
||||
CullDist3,
|
||||
CullDist4,
|
||||
CullDist5,
|
||||
CullDist6,
|
||||
CullDist7,
|
||||
ClipDist0,
|
||||
ClipDist1,
|
||||
ClipDist2,
|
||||
ClipDist3,
|
||||
ClipDist4,
|
||||
ClipDist5,
|
||||
ClipDist6,
|
||||
ClipDist7,
|
||||
};
|
||||
using VsOutputMap = std::array<VsOutput, 4>;
|
||||
|
||||
struct Info;
|
||||
|
||||
struct BufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 stride;
|
||||
u32 num_records;
|
||||
IR::Type used_types;
|
||||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_storage;
|
||||
|
||||
auto operator<=>(const BufferResource&) const = default;
|
||||
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
||||
};
|
||||
using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
|
||||
|
||||
|
@ -123,6 +152,7 @@ struct Info {
|
|||
};
|
||||
AttributeFlags loads{};
|
||||
AttributeFlags stores{};
|
||||
boost::container::static_vector<VsOutputMap, 3> vs_outputs;
|
||||
|
||||
BufferResourceList buffers;
|
||||
ImageResourceList images;
|
||||
|
@ -134,7 +164,12 @@ struct Info {
|
|||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
|
||||
uintptr_t pgm_base{};
|
||||
u64 pgm_hash{};
|
||||
u32 shared_memory_size{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_shared_u8{};
|
||||
bool uses_shared_u16{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
|
||||
template <typename T>
|
||||
|
@ -149,6 +184,10 @@ struct Info {
|
|||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept {
|
||||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
template <>
|
||||
|
|
|
@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexBufferSize: {
|
||||
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
|
||||
regs.num_indices = index_size->num_indices;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||
break;
|
||||
|
|
|
@ -85,14 +85,14 @@ struct Liverpool {
|
|||
} settings;
|
||||
UserData user_data;
|
||||
|
||||
template <typename T = u8>
|
||||
const T* Address() const {
|
||||
template <typename T = u8*>
|
||||
const T Address() const {
|
||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||
return reinterpret_cast<const T*>(addr);
|
||||
return reinterpret_cast<const T>(addr);
|
||||
}
|
||||
|
||||
std::span<const u32> Code() const {
|
||||
const u32* code = Address<u32>();
|
||||
const u32* code = Address<u32*>();
|
||||
BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
|
@ -121,20 +121,26 @@ struct Liverpool {
|
|||
BitField<0, 6, u64> num_vgprs;
|
||||
BitField<6, 4, u64> num_sgprs;
|
||||
BitField<33, 5, u64> num_user_regs;
|
||||
BitField<47, 9, u64> lds_dwords;
|
||||
} settings;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 resource_limits;
|
||||
INSERT_PADDING_WORDS(0x2A);
|
||||
UserData user_data;
|
||||
|
||||
template <typename T = u8>
|
||||
const T* Address() const {
|
||||
template <typename T = u8*>
|
||||
const T Address() const {
|
||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||
return reinterpret_cast<const T*>(addr);
|
||||
return reinterpret_cast<const T>(addr);
|
||||
}
|
||||
|
||||
u32 SharedMemSize() const noexcept {
|
||||
// lds_dwords is in units of 128 dwords. We return bytes.
|
||||
return settings.lds_dwords.Value() * 128 * 4;
|
||||
}
|
||||
|
||||
std::span<const u32> Code() const {
|
||||
const u32* code = Address<u32>();
|
||||
const u32* code = Address<u32*>();
|
||||
BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
|
@ -144,7 +150,7 @@ struct Liverpool {
|
|||
|
||||
template <typename Shader>
|
||||
static constexpr auto* GetBinaryInfo(const Shader& sh) {
|
||||
const auto* code = sh.template Address<u32>();
|
||||
const auto* code = sh.template Address<u32*>();
|
||||
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
|
||||
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
|
||||
return bininfo;
|
||||
|
@ -208,6 +214,10 @@ struct Liverpool {
|
|||
BitField<18, 1, u32> use_vtx_render_target_idx;
|
||||
BitField<19, 1, u32> use_vtx_viewport_idx;
|
||||
BitField<20, 1, u32> use_vtx_kill_flag;
|
||||
BitField<21, 1, u32> vs_out_misc_enable;
|
||||
BitField<22, 1, u32> vs_out_ccdist0_enable;
|
||||
BitField<23, 1, u32> vs_out_ccdist1_enable;
|
||||
BitField<25, 1, u32> use_vtx_gs_cut_flag;
|
||||
|
||||
bool IsClipDistEnabled(u32 index) const {
|
||||
return (clip_distance_enable.Value() >> index) & 1;
|
||||
|
@ -469,7 +479,7 @@ struct Liverpool {
|
|||
|
||||
template <typename T = VAddr>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32);
|
||||
return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1021,6 +1031,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
|
|||
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
|
||||
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
|
||||
|
|
|
@ -549,8 +549,8 @@ struct PM4DumpConstRam {
|
|||
u32 addr_hi;
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>((u64(addr_hi) << 32u) | addr_lo);
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>((u64(addr_hi) << 32u) | addr_lo);
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 Offset() const {
|
||||
|
@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
|
|||
u32 addr_hi;
|
||||
};
|
||||
|
||||
struct PM4CmdDrawIndexBufferSize {
|
||||
PM4Type3Header header;
|
||||
u32 num_indices;
|
||||
};
|
||||
|
||||
struct PM4CmdIndirectBuffer {
|
||||
PM4Type3Header header;
|
||||
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned
|
||||
|
|
|
@ -21,32 +21,45 @@ enum class CompSwizzle : u32 {
|
|||
|
||||
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
|
||||
struct Buffer {
|
||||
union {
|
||||
BitField<0, 44, u64> base_address;
|
||||
BitField<48, 14, u64> stride;
|
||||
BitField<62, 1, u64> cache_swizzle;
|
||||
BitField<63, 1, u64> swizzle_enable;
|
||||
};
|
||||
u64 base_address : 44;
|
||||
u64 : 4;
|
||||
u64 stride : 14;
|
||||
u64 cache_swizzle : 1;
|
||||
u64 swizzle_enable : 1;
|
||||
u32 num_records;
|
||||
union {
|
||||
BitField<0, 3, u32> dst_sel_x;
|
||||
BitField<3, 3, u32> dst_sel_y;
|
||||
BitField<6, 3, u32> dst_sel_z;
|
||||
BitField<9, 3, u32> dst_sel_w;
|
||||
BitField<0, 12, u32> dst_sel;
|
||||
BitField<12, 3, NumberFormat> num_format;
|
||||
BitField<15, 4, DataFormat> data_format;
|
||||
BitField<19, 2, u32> element_size;
|
||||
BitField<21, 2, u32> index_stride;
|
||||
BitField<23, 1, u32> add_tid_enable;
|
||||
};
|
||||
u32 dst_sel_x : 3;
|
||||
u32 dst_sel_y : 3;
|
||||
u32 dst_sel_z : 3;
|
||||
u32 dst_sel_w : 3;
|
||||
u32 num_format : 3;
|
||||
u32 data_format : 4;
|
||||
u32 element_size : 2;
|
||||
u32 index_stride : 2;
|
||||
u32 add_tid_enable : 1;
|
||||
|
||||
operator bool() const noexcept {
|
||||
return base_address != 0;
|
||||
}
|
||||
|
||||
bool operator==(const Buffer& other) const noexcept {
|
||||
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
|
||||
}
|
||||
|
||||
CompSwizzle GetSwizzle(u32 comp) const noexcept {
|
||||
return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
|
||||
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
|
||||
return static_cast<CompSwizzle>(select[comp]);
|
||||
}
|
||||
|
||||
NumberFormat GetNumberFmt() const noexcept {
|
||||
return static_cast<NumberFormat>(num_format);
|
||||
}
|
||||
|
||||
DataFormat GetDataFmt() const noexcept {
|
||||
return static_cast<DataFormat>(data_format);
|
||||
}
|
||||
|
||||
u32 GetStride() const noexcept {
|
||||
return stride == 0 ? 1U : stride.Value();
|
||||
return stride == 0 ? 1U : stride;
|
||||
}
|
||||
|
||||
u32 GetStrideElements(u32 element_size) const noexcept {
|
||||
|
@ -61,6 +74,7 @@ struct Buffer {
|
|||
return GetStride() * num_records;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Buffer) == 16); // 128bits
|
||||
|
||||
enum class ImageType : u64 {
|
||||
Buffer = 0,
|
||||
|
|
|
@ -392,6 +392,36 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR16G16Sfloat;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eR16G16B16A16Snorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR32G32Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format4_4_4_4 &&
|
||||
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eR4G4B4A4UnormPack16;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR16G16B16A16Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR32G32B32A32Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR8Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Srgb) {
|
||||
return vk::Format::eBc1RgbaSrgbBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16Sint;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -14,8 +14,8 @@ namespace Vulkan {
|
|||
|
||||
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
vk::PipelineCache pipeline_cache, const Shader::Info* info_,
|
||||
vk::ShaderModule module)
|
||||
: instance{instance_}, scheduler{scheduler_}, info{*info_} {
|
||||
u64 compute_key_, vk::ShaderModule module)
|
||||
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{*info_} {
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = module,
|
||||
|
@ -85,15 +85,15 @@ ComputePipeline::~ComputePipeline() = default;
|
|||
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 8> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
u32 binding{};
|
||||
|
||||
for (const auto& buffer : info.buffers) {
|
||||
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const auto vsharp = buffer.GetVsharp(info);
|
||||
const u32 size = vsharp.GetSize();
|
||||
const VAddr address = vsharp.base_address.Value();
|
||||
const VAddr address = vsharp.base_address;
|
||||
texture_cache.OnCpuWrite(address);
|
||||
const u32 offset = staging.Copy(address, size,
|
||||
buffer.is_storage ? instance.StorageMinAlignment()
|
||||
|
|
|
@ -24,7 +24,7 @@ class ComputePipeline {
|
|||
public:
|
||||
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
|
||||
vk::PipelineCache pipeline_cache, const Shader::Info* info,
|
||||
vk::ShaderModule module);
|
||||
u64 compute_key, vk::ShaderModule module);
|
||||
~ComputePipeline();
|
||||
|
||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||
|
@ -40,6 +40,7 @@ private:
|
|||
vk::UniquePipeline pipeline;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
u64 compute_key;
|
||||
Shader::Info info{};
|
||||
};
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
attributes.push_back({
|
||||
.location = input.binding,
|
||||
.binding = input.binding,
|
||||
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
|
||||
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||
.offset = 0,
|
||||
});
|
||||
bindings.push_back({
|
||||
|
@ -326,8 +326,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
|
||||
for (const auto& stage : stages) {
|
||||
for (const auto& buffer : stage.buffers) {
|
||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const VAddr address = vsharp.base_address.Value();
|
||||
const auto vsharp = buffer.GetVsharp(stage);
|
||||
const VAddr address = vsharp.base_address;
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 offset = staging.Copy(address, size,
|
||||
buffer.is_storage ? instance.StorageMinAlignment()
|
||||
|
@ -419,8 +419,7 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
|||
continue;
|
||||
}
|
||||
guest_buffers.emplace_back(buffer);
|
||||
ranges.emplace_back(buffer.base_address.Value(),
|
||||
buffer.base_address.Value() + buffer.GetSize());
|
||||
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
||||
}
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
return lhv.base_address < rhv.base_address;
|
||||
|
|
|
@ -74,12 +74,12 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
|
|||
|
||||
available_extensions = GetSupportedExtensions(physical_device);
|
||||
properties = physical_device.getProperties();
|
||||
CollectDeviceParameters();
|
||||
ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion,
|
||||
"Vulkan {}.{} is required, but only {}.{} is supported by device!",
|
||||
VK_VERSION_MAJOR(TargetVulkanApiVersion), VK_VERSION_MINOR(TargetVulkanApiVersion),
|
||||
VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion));
|
||||
|
||||
CollectDeviceParameters();
|
||||
CreateDevice();
|
||||
CollectToolingInfo();
|
||||
}
|
||||
|
@ -156,6 +156,7 @@ bool Instance::CreateDevice() {
|
|||
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||
// The next two extensions are required to be available together in order to support write masks
|
||||
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
|
@ -208,12 +209,14 @@ bool Instance::CreateDevice() {
|
|||
.shaderImageGatherExtended = true,
|
||||
.shaderStorageImageMultisample = true,
|
||||
.shaderClipDistance = features.shaderClipDistance,
|
||||
.shaderInt16 = true,
|
||||
},
|
||||
},
|
||||
vk::PhysicalDeviceVulkan11Features{
|
||||
.shaderDrawParameters = true,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan12Features{
|
||||
.shaderFloat16 = true,
|
||||
.scalarBlockLayout = true,
|
||||
.uniformBufferStandardLayout = true,
|
||||
.hostQueryReset = true,
|
||||
|
@ -237,7 +240,12 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
|
||||
.depthClipControl = true,
|
||||
},
|
||||
};
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
|
||||
.workgroupMemoryExplicitLayout = true,
|
||||
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
|
||||
.workgroupMemoryExplicitLayout8BitAccess = true,
|
||||
.workgroupMemoryExplicitLayout16BitAccess = true,
|
||||
}};
|
||||
|
||||
if (!color_write_en) {
|
||||
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
|
||||
|
|
|
@ -18,6 +18,52 @@ extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::VsOutput;
|
||||
|
||||
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||
w != VsOutput::None) {
|
||||
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||
}
|
||||
};
|
||||
// VS_OUT_MISC_VEC
|
||||
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
|
||||
ctl.use_vtx_edge_flag
|
||||
? VsOutput::EdgeFlag
|
||||
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
|
||||
ctl.use_vtx_kill_flag
|
||||
? VsOutput::KillFlag
|
||||
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
|
||||
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
|
||||
// VS_OUT_CCDIST0
|
||||
add_output(ctl.IsClipDistEnabled(0)
|
||||
? VsOutput::ClipDist0
|
||||
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(1)
|
||||
? VsOutput::ClipDist1
|
||||
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(2)
|
||||
? VsOutput::ClipDist2
|
||||
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(3)
|
||||
? VsOutput::ClipDist3
|
||||
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
|
||||
// VS_OUT_CCDIST1
|
||||
add_output(ctl.IsClipDistEnabled(4)
|
||||
? VsOutput::ClipDist4
|
||||
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(5)
|
||||
? VsOutput::ClipDist5
|
||||
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(6)
|
||||
? VsOutput::ClipDist6
|
||||
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(7)
|
||||
? VsOutput::ClipDist7
|
||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
|
||||
const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{};
|
||||
|
@ -26,6 +72,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
|||
switch (stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
BuildVsOutputs(info, regs.vs_output_control);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
|
@ -45,6 +92,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
|||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -60,6 +108,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -153,7 +202,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
if (!pgm || !pgm->Address<u32>()) {
|
||||
if (!pgm || !pgm->Address<u32*>()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
@ -209,7 +258,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
// Recompile shader to IR.
|
||||
try {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
|
||||
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
info.pgm_hash = hash;
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
|
@ -247,8 +298,9 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
// Recompile shader to IR.
|
||||
try {
|
||||
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
||||
const Shader::Info info =
|
||||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
|
@ -258,8 +310,11 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
|
||||
}
|
||||
const auto module = CompileSPV(spv_code, instance.GetDevice());
|
||||
// Set module name to hash in renderdoc
|
||||
const auto name = fmt::format("cs_{:#x}", compute_key);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache,
|
||||
&program.info, module);
|
||||
&program.info, compute_key, module);
|
||||
} catch (const Shader::Exception& e) {
|
||||
UNREACHABLE_MSG("{}", e.what());
|
||||
return nullptr;
|
||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||
pipeline_cache{instance, scheduler, liverpool},
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} {
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 3_GB, BufferType::Upload} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
|
@ -44,11 +44,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
return;
|
||||
}
|
||||
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
try {
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
} catch (...) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
BeginRendering();
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
if (is_indexed) {
|
||||
|
@ -71,9 +74,14 @@ void Rasterizer::DispatchDirect() {
|
|||
return;
|
||||
}
|
||||
|
||||
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
if (!has_resources) {
|
||||
return;
|
||||
try {
|
||||
const auto has_resources =
|
||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||
if (!has_resources) {
|
||||
return;
|
||||
}
|
||||
} catch (...) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
scheduler.EndRendering();
|
||||
|
@ -163,7 +171,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
|
|||
|
||||
// Upload index data to stream buffer.
|
||||
const auto index_address = regs.index_base_address.Address<const void*>();
|
||||
const u32 index_buffer_size = regs.num_indices * index_size;
|
||||
const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size;
|
||||
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
|
||||
std::memcpy(data, index_address, index_buffer_size);
|
||||
vertex_index_buffer.Commit(index_buffer_size);
|
||||
|
|
|
@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
|||
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
|
||||
auto& watch = previous_watches[wait_cursor];
|
||||
wait_bound = watch.upper_bound;
|
||||
// scheduler.Wait(watch.tick);
|
||||
scheduler.Wait(watch.tick);
|
||||
++wait_cursor;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -321,12 +321,15 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
|
|||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
// Copy to the image.
|
||||
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil
|
||||
? vk::ImageAspectFlagBits::eDepth
|
||||
: aspect_mask;
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = info.pitch,
|
||||
.bufferImageHeight = info.size.height,
|
||||
.imageSubresource{
|
||||
.aspectMask = aspect_mask,
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
|
|
|
@ -77,7 +77,6 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
|||
if (usage_override) {
|
||||
usage_ci.usage = usage_override.value();
|
||||
}
|
||||
|
||||
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
|
||||
vk::Format format = info.format;
|
||||
vk::ImageAspectFlags aspect = image.aspect_mask;
|
||||
|
|
|
@ -134,13 +134,13 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
|
|||
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
|
||||
RegisterImage(image_id);
|
||||
} else {
|
||||
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0];
|
||||
image_id = image_ids[0];
|
||||
}
|
||||
|
||||
RegisterMeta(info, image_id);
|
||||
|
||||
Image& image = slot_images[image_id];
|
||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
||||
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
|
||||
RefreshImage(image);
|
||||
TrackImage(image, image_id);
|
||||
}
|
||||
|
@ -193,7 +193,7 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
|
|||
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint) {
|
||||
const ImageInfo info{buffer, hint};
|
||||
const ImageId image_id = FindImage(info, buffer.Address(), false);
|
||||
const ImageId image_id = FindImage(info, buffer.Address());
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
|
|
|
@ -179,19 +179,19 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eR8Unorm:
|
||||
return vk::Format::eR8Uint;
|
||||
case vk::Format::eR8G8Unorm:
|
||||
case vk::Format::eR16Sfloat:
|
||||
return vk::Format::eR8G8Uint;
|
||||
case vk::Format::eR8G8B8A8Srgb:
|
||||
[[fallthrough]];
|
||||
case vk::Format::eB8G8R8A8Srgb:
|
||||
[[fallthrough]];
|
||||
case vk::Format::eB8G8R8A8Unorm:
|
||||
[[fallthrough]];
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
case vk::Format::eR32Sfloat:
|
||||
case vk::Format::eR32Uint:
|
||||
return vk::Format::eR32Uint;
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
case vk::Format::eR32G32Sfloat:
|
||||
return vk::Format::eR32G32Uint;
|
||||
case vk::Format::eBc3SrgbBlock:
|
||||
[[fallthrough]];
|
||||
case vk::Format::eBc3UnormBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
|
|
Loading…
Reference in a new issue