shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)

* shader_recompiler: Add LDEXP

* shader_recompiler: Add most image integer atomic ops

* shader_recompiler: Implement shared memory load/store

* shader_recompiler: More image atomics

* externals: Update sirit

* clang format

* cmake: Add missing files

* shader_recompiler: Fix some atomic bugs

* shader_recompiler: Vs outputs

* shader_recompiler: Shared mem has side-effects, fix format component order

* shader_recompiler: Inline constant buffer impl

* video_core: Fix regressions

* Work

* Fixup a few things
This commit is contained in:
TheTurtle 2024-07-05 00:15:44 +03:00 committed by GitHub
parent af3bbc33e9
commit 6ceab6dfac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
69 changed files with 1597 additions and 310 deletions

View file

@ -349,6 +349,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/runtime_info.h
src/shader_recompiler/backend/spirv/emit_spirv.cpp
src/shader_recompiler/backend/spirv/emit_spirv.h
src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@ -359,6 +361,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 505cc66a2be70b268c1700fef4d5327a5fe46494
Subproject commit 8db09231c448b913ae905d5237ce2eca46e3fe87

25
src/common/div_ceil.h Executable file
View file

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <type_traits>
namespace Common {
/// Ceiled integer division.
template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeil(N number, D divisor) {
return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
}
/// Ceiled integer division with logarithmic divisor in base 2
template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) {
return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
}
} // namespace Common

View file

@ -14,6 +14,12 @@
namespace Common {
std::string ToLower(std::string str) {
std::transform(str.begin(), str.end(), str.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
return str;
}
std::vector<std::string> SplitString(const std::string& str, char delimiter) {
std::istringstream iss(str);
std::vector<std::string> output(1);

View file

@ -9,6 +9,9 @@
namespace Common {
/// Make a string lowercase
[[nodiscard]] std::string ToLower(std::string str);
std::vector<std::string> SplitString(const std::string& str, char delimiter);
#ifdef _WIN32

View file

@ -28,7 +28,7 @@ constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
// User area size is normally larger than this. However games are unlikely to map to high
// regions of that area, so by default we allocate a smaller virtual address space (about 1/4th).
// to save space on page tables.
static constexpr size_t UserSize = 1ULL << 38;
static constexpr size_t UserSize = 1ULL << 39;
static constexpr size_t SystemSize = USER_MIN - SYSTEM_MANAGED_MIN;
/**

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include "common/string_util.h"
#include "core/file_sys/fs.h"
namespace Core::FileSys {
@ -13,6 +14,7 @@ void MntPoints::Mount(const std::filesystem::path& host_folder, const std::strin
MntPair pair;
pair.host_path = host_folder.string();
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
pair.guest_path = guest_folder;
m_mnt_pairs.push_back(pair);
@ -46,11 +48,24 @@ std::string MntPoints::GetHostFile(const std::string& guest_file) {
for (auto& pair : m_mnt_pairs) {
// horrible code but it works :D
int find = guest_file.find(pair.guest_path);
if (find == 0) {
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
return pair.host_path + npath;
if (find != 0) {
continue;
}
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
const auto host_path = pair.host_path + npath;
#ifndef _WIN64
const std::filesystem::path path{host_path};
if (!std::filesystem::exists(path)) {
const auto filename = Common::ToLower(path.filename());
for (const auto& file : std::filesystem::directory_iterator(path.parent_path())) {
const auto exist_filename = Common::ToLower(file.path().filename());
if (filename == exist_filename) {
return file.path();
}
}
}
#endif
return host_path;
}
return "";
}

View file

@ -803,9 +803,9 @@ int PS4_SYSV_ABI sceGnmDrawOpaqueAuto() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
LOG_TRACE(Lib_GnmDriver, "called");
return false;
}
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface() {
@ -1962,7 +1962,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
if (Config::dumpPM4()) {
static auto last_frame_num = -1LL;
static u32 seq_num{};
if (last_frame_num == frames_submitted) {
if (last_frame_num == frames_submitted && cbpair == 0) {
++seq_num;
} else {
last_frame_num = frames_submitted;

View file

@ -63,7 +63,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size);
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size);
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size);
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto();
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuDebugger();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuException();

View file

@ -161,7 +161,6 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
}
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
LOG_WARNING(Kernel_Vmm, "called");
auto* memory = Core::Memory::Instance();
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
}

View file

@ -4,6 +4,7 @@
#include <mutex>
#include <thread>
#include <semaphore.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/logging/log.h"
@ -16,6 +17,8 @@
#include "core/linker.h"
#ifdef _WIN64
#include <windows.h>
#else
#include <sys/mman.h>
#endif
namespace Libraries::Kernel {
@ -46,7 +49,8 @@ void init_pthreads() {
}
void pthreadInitSelfMainThread() {
g_pthread_self = new PthreadInternal{};
auto* pthread_pool = g_pthread_cxt->GetPthreadPool();
g_pthread_self = pthread_pool->Create();
scePthreadAttrInit(&g_pthread_self->attr);
g_pthread_self->pth = pthread_self();
g_pthread_self->name = "Main_Thread";
@ -926,31 +930,25 @@ int PS4_SYSV_ABI scePthreadCreate(ScePthread* thread, const ScePthreadAttr* attr
if ((*thread)->attr != nullptr) {
scePthreadAttrDestroy(&(*thread)->attr);
}
scePthreadAttrInit(&(*thread)->attr);
int result = pthread_copy_attributes(&(*thread)->attr, attr);
ASSERT(result == 0);
if (result == 0) {
if (name != NULL) {
(*thread)->name = name;
} else {
(*thread)->name = "no-name";
}
(*thread)->entry = start_routine;
(*thread)->arg = arg;
(*thread)->is_almost_done = false;
(*thread)->is_detached = (*attr)->detached;
(*thread)->is_started = false;
result = pthread_create(&(*thread)->pth, &(*attr)->pth_attr, run_thread, *thread);
if (name != NULL) {
(*thread)->name = name;
} else {
(*thread)->name = "no-name";
}
(*thread)->entry = start_routine;
(*thread)->arg = arg;
(*thread)->is_almost_done = false;
(*thread)->is_detached = (*attr)->detached;
(*thread)->is_started = false;
pthread_attr_setstacksize(&(*attr)->pth_attr, 2_MB);
result = pthread_create(&(*thread)->pth, &(*attr)->pth_attr, run_thread, *thread);
if (result == 0) {
while (!(*thread)->is_started) {
std::this_thread::sleep_for(std::chrono::microseconds(1000));
}
}
LOG_INFO(Kernel_Pthread, "thread create name = {}", (*thread)->name);
switch (result) {
@ -979,7 +977,15 @@ ScePthread PThreadPool::Create() {
}
}
#ifndef _WIN64
auto* ret = new PthreadInternal{};
#else
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
auto* ret = reinterpret_cast<PthreadInternal*>(
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
#endif
ret->is_free = false;
ret->is_detached = false;

View file

@ -129,7 +129,11 @@ public:
const auto end = std::chrono::high_resolution_clock::now();
const auto time =
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
*timeout -= time;
if (status == std::cv_status::timeout) {
*timeout = 0;
} else {
*timeout -= time;
}
return GetResult(status == std::cv_status::timeout);
}
};

View file

@ -341,6 +341,7 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
switch (mount_mode) {
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF: {
if (!std::filesystem::exists(mount_dir)) {
return ORBIS_SAVE_DATA_ERROR_NOT_FOUND;
@ -349,11 +350,14 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
mnt->Mount(mount_dir, g_mount_point);
mount_result->mount_status = 0;
strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
} break;
std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
break;
}
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF |

View file

@ -85,7 +85,7 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) {
#include "shader_recompiler/ir/opcodes.inc"
#undef OPCODE
}
throw LogicError("Invalid opcode {}", inst->GetOpcode());
UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode());
}
Id TypeId(const EmitContext& ctx, IR::Type type) {
@ -176,7 +176,12 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
ctx.AddCapability(spv::Capability::Float16);
ctx.AddCapability(spv::Capability::Int16);
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
switch (program.info.stage) {
case Stage::Compute: {
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
@ -272,47 +277,55 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
void EmitReference(EmitContext&) {}
void EmitPhiMove(EmitContext&) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetScc(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetExec(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVcc(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetSccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVccLo(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVccHi(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetScc(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetExec(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVcc(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetSccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVccLo(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVccHi(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
} // namespace Shader::Backend::SPIRV

View file

@ -0,0 +1,70 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
const Id scope{ctx.ConstU32(static_cast<u32>(spv::Scope::Device))};
const Id semantics{ctx.u32_zero_value};
return {scope, semantics};
}
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, texture.id, coords, ctx.ConstU32(0U))};
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
}
} // Anonymous namespace
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
}
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicSMax);
}
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicAnd);
}
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicOr);
}
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicXor);
}
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicExchange);
}
} // namespace Shader::Backend::SPIRV

View file

@ -0,0 +1,37 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
const auto semantics{
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
spv::MemorySemanticsMask::ImageMemory};
ctx.OpMemoryBarrier(ctx.ConstU32(static_cast<u32>(scope)),
ctx.ConstU32(static_cast<u32>(semantics)));
}
} // Anonymous namespace
void EmitBarrier(EmitContext& ctx) {
const auto execution{spv::Scope::Workgroup};
const auto memory{spv::Scope::Workgroup};
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
spv::MemorySemanticsMask::WorkgroupMemory};
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
ctx.ConstU32(static_cast<u32>(memory)),
ctx.ConstU32(static_cast<u32>(memory_semantics)));
}
void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
MemoryBarrier(ctx, spv::Scope::Workgroup);
}
void EmitDeviceMemoryBarrier(EmitContext& ctx) {
MemoryBarrier(ctx, spv::Scope::Device);
}
} // namespace Shader::Backend::SPIRV

View file

@ -18,8 +18,8 @@ void EmitBitCastU64F64(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction");
}
void EmitBitCastF16U16(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction");
Id EmitBitCastF16U16(EmitContext& ctx, Id value) {
return ctx.OpBitcast(ctx.F16[1], value);
}
Id EmitBitCastF32U32(EmitContext& ctx, Id value) {

View file

@ -7,6 +7,37 @@
namespace Shader::Backend::SPIRV {
namespace {
Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
switch (output) {
case VsOutput::ClipDist0:
case VsOutput::ClipDist1:
case VsOutput::ClipDist2:
case VsOutput::ClipDist3:
case VsOutput::ClipDist4:
case VsOutput::ClipDist5:
case VsOutput::ClipDist6:
case VsOutput::ClipDist7: {
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
const Id clip_num{ctx.ConstU32(index)};
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
}
case VsOutput::CullDist0:
case VsOutput::CullDist1:
case VsOutput::CullDist2:
case VsOutput::CullDist3:
case VsOutput::CullDist4:
case VsOutput::CullDist5:
case VsOutput::CullDist6:
case VsOutput::CullDist7: {
const u32 index = u32(output) - u32(VsOutput::CullDist0);
const Id cull_num{ctx.ConstU32(index)};
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
}
default:
UNREACHABLE();
}
}
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
@ -20,10 +51,20 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
switch (attr) {
case IR::Attribute::Position0: {
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
case IR::Attribute::Position1:
case IR::Attribute::Position2:
case IR::Attribute::Position3: {
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]);
}
case IR::Attribute::RenderTarget0:
case IR::Attribute::RenderTarget1:
case IR::Attribute::RenderTarget2:
case IR::Attribute::RenderTarget3: {
case IR::Attribute::RenderTarget3:
case IR::Attribute::RenderTarget4:
case IR::Attribute::RenderTarget5:
case IR::Attribute::RenderTarget6:
case IR::Attribute::RenderTarget7: {
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
if (ctx.frag_num_comp[index] > 1) {
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
@ -45,39 +86,39 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
}
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetScalarRegister(EmitContext&) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetScalarRegister(EmitContext&) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVectorRegister(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVectorRegister(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetGotoVariable(EmitContext&) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetGotoVariable(EmitContext&) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
Id EmitReadConst(EmitContext& ctx) {
throw LogicError("Unreachable instruction");
UNREACHABLE_MSG("Unreachable instruction");
}
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
@ -159,7 +200,15 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
}
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
UNREACHABLE();
const auto info = inst->Flags<IR::BufferInstInfo>();
const auto& buffer = ctx.buffers[handle];
boost::container::static_vector<Id, 2> ids;
for (u32 i = 0; i < 2; i++) {
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
}
return ctx.OpCompositeConstruct(buffer.data_types->Get(2), ids);
}
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {

View file

@ -68,11 +68,7 @@ Id EmitConvertS32F16(EmitContext& ctx, Id value) {
}
Id EmitConvertS32F32(EmitContext& ctx, Id value) {
if (ctx.profile.has_broken_signed_operations) {
return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
} else {
return ctx.OpConvertFToS(ctx.U32[1], value);
}
return ctx.OpConvertFToS(ctx.U32[1], value);
}
Id EmitConvertS32F64(EmitContext& ctx, Id value) {
@ -259,4 +255,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F64[1], value);
}
Id EmitConvertU16U32(EmitContext& ctx, Id value) {
return ctx.OpUConvert(ctx.U16, value);
}
} // namespace Shader::Backend::SPIRV

View file

@ -6,6 +6,11 @@
namespace Shader::Backend::SPIRV {
Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
ctx.Decorate(op, spv::Decoration::NoContraction);
return op;
}
Id EmitFPAbs16(EmitContext& ctx, Id value) {
return ctx.OpFAbs(ctx.F16[1], value);
}
@ -19,31 +24,31 @@ Id EmitFPAbs64(EmitContext& ctx, Id value) {
}
Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F16[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
}
Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F32[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
}
Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F64[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
}
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFSub(ctx.F32[1], a, b);
return Decorate(ctx, inst, ctx.OpFSub(ctx.F32[1], a, b));
}
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F16[1], a, b, c);
return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
}
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F32[1], a, b, c);
return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
}
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F64[1], a, b, c);
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
}
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
@ -63,15 +68,15 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
}
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F16[1], a, b);
return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
}
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F32[1], a, b);
return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
}
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F64[1], a, b);
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
}
Id EmitFPNeg16(EmitContext& ctx, Id value) {
@ -98,6 +103,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) {
return ctx.OpExp2(ctx.F32[1], value);
}
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) {
return ctx.OpLdexp(ctx.F32[1], value, exp);
}
Id EmitFPLog2(EmitContext& ctx, Id value) {
return ctx.OpLog2(ctx.F32[1], value);
}
@ -360,4 +369,12 @@ Id EmitFPIsNan64(EmitContext& ctx, Id value) {
return ctx.OpIsNan(ctx.U1[1], value);
}
Id EmitFPIsInf32(EmitContext& ctx, Id value) {
return ctx.OpIsInf(ctx.U1[1], value);
}
Id EmitFPIsInf64(EmitContext& ctx, Id value) {
return ctx.OpIsInf(ctx.U1[1], value);
}
} // namespace Shader::Backend::SPIRV

View file

@ -79,10 +79,12 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id of
Id ms) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id result_type = texture.data_types->Get(4);
if (Sirit::ValidId(lod)) {
return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod);
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords,
spv::ImageOperandsMask::Lod, lod));
} else {
return ctx.OpImageFetch(ctx.F32[4], image, coords);
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords));
}
}
@ -134,7 +136,8 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), color);
const Id color_type = texture.data_types->Get(4);
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color));
}
} // namespace Shader::Backend::SPIRV

View file

@ -33,16 +33,21 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitGetScc(EmitContext& ctx);
void EmitGetExec(EmitContext& ctx);
void EmitGetVcc(EmitContext& ctx);
void EmitGetSccLo(EmitContext& ctx);
void EmitGetVccLo(EmitContext& ctx);
void EmitGetVccHi(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
void EmitSetExec(EmitContext& ctx);
void EmitSetVcc(EmitContext& ctx);
void EmitSetSccLo(EmitContext& ctx);
void EmitSetVccLo(EmitContext& ctx);
void EmitSetVccHi(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx);
void EmitEpilogue(EmitContext& ctx);
void EmitDiscard(EmitContext& ctx);
void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
void EmitGetThreadBitScalarReg(EmitContext& ctx);
void EmitSetThreadBitScalarReg(EmitContext& ctx);
@ -82,12 +87,13 @@ Id EmitUndefU8(EmitContext& ctx);
Id EmitUndefU16(EmitContext& ctx);
Id EmitUndefU32(EmitContext& ctx);
Id EmitUndefU64(EmitContext& ctx);
Id EmitReadSharedU8(EmitContext& ctx, Id offset);
Id EmitReadSharedS8(EmitContext& ctx, Id offset);
Id EmitReadSharedU16(EmitContext& ctx, Id offset);
Id EmitReadSharedS16(EmitContext& ctx, Id offset);
Id EmitReadSharedU32(EmitContext& ctx, Id offset);
Id EmitReadSharedU64(EmitContext& ctx, Id offset);
Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
@ -140,7 +146,7 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
void EmitBitCastU16F16(EmitContext& ctx);
Id EmitBitCastU32F32(EmitContext& ctx, Id value);
void EmitBitCastU64F64(EmitContext& ctx);
void EmitBitCastF16U16(EmitContext&);
Id EmitBitCastF16U16(EmitContext& ctx, Id value);
Id EmitBitCastF32U32(EmitContext& ctx, Id value);
void EmitBitCastF64U64(EmitContext& ctx);
Id EmitPackUint2x32(EmitContext& ctx, Id value);
@ -172,6 +178,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value);
Id EmitFPSin(EmitContext& ctx, Id value);
Id EmitFPCos(EmitContext& ctx, Id value);
Id EmitFPExp2(EmitContext& ctx, Id value);
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
Id EmitFPLog2(EmitContext& ctx, Id value);
Id EmitFPRecip32(EmitContext& ctx, Id value);
Id EmitFPRecip64(EmitContext& ctx, Id value);
@ -236,8 +243,11 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPIsNan16(EmitContext& ctx, Id value);
Id EmitFPIsNan32(EmitContext& ctx, Id value);
Id EmitFPIsNan64(EmitContext& ctx, Id value);
Id EmitFPIsInf32(EmitContext& ctx, Id value);
Id EmitFPIsInf64(EmitContext& ctx, Id value);
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b);
Id EmitISub32(EmitContext& ctx, Id a, Id b);
Id EmitISub64(EmitContext& ctx, Id a, Id b);
Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
@ -333,6 +343,7 @@ Id EmitConvertF64U8(EmitContext& ctx, Id value);
Id EmitConvertF64U16(EmitContext& ctx, Id value);
Id EmitConvertF64U32(EmitContext& ctx, Id value);
Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitConvertU16U32(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset);
@ -355,6 +366,18 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitLaneId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);

View file

@ -60,6 +60,10 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
return ctx.OpIAdd(ctx.U64, a, b);
}
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b) {
return ctx.OpIAddCarry(ctx.full_result_u32x2, a, b);
}
Id EmitISub32(EmitContext& ctx, Id a, Id b) {
return ctx.OpISub(ctx.U32[1], a, b);
}

View file

@ -0,0 +1,165 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
const Id shift_id{ctx.ConstU32(shift)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
}
Id Word(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
return ctx.OpLoad(ctx.U32[1], pointer);
}
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))};
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))};
const Id count_id{ctx.ConstU32(count)};
return {bit, count_id};
}
} // Anonymous namespace
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
return ctx.OpLoad(ctx.U32[1], pointer);
} else {
return Word(ctx, offset);
}
}
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
return ctx.OpLoad(ctx.U32[2], pointer);
} else {
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
}
}
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
return ctx.OpLoad(ctx.U32[4], pointer);
}
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
std::array<Id, 4> values{};
for (u32 i = 0; i < 4; ++i) {
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
}
return ctx.OpCompositeConstruct(ctx.U32[4], values);
}
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
}
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
}
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
Id pointer{};
if (ctx.profile.support_explicit_workgroup_layout) {
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
} else {
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
}
ctx.OpStore(pointer, value);
}
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
ctx.OpStore(pointer, value);
return;
}
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
}
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
ctx.OpStore(pointer, value);
return;
}
const Id shift{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
for (u32 i = 0; i < 4; ++i) {
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
}
}
} // namespace Shader::Backend::SPIRV

View file

@ -3,6 +3,7 @@
#include <boost/container/static_vector.hpp>
#include <fmt/format.h>
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
@ -41,8 +42,9 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces(program);
DefineBuffers(program.info);
DefineImagesAndSamplers(program.info);
DefineBuffers(info);
DefineImagesAndSamplers(info);
DefineSharedMemory(info);
}
EmitContext::~EmitContext() = default;
@ -72,19 +74,19 @@ Id EmitContext::Def(const IR::Value& value) {
void EmitContext::DefineArithmeticTypes() {
void_id = Name(TypeVoid(), "void_id");
U1[1] = Name(TypeBool(), "bool_id");
// F16[1] = Name(TypeFloat(16), "f16_id");
F16[1] = Name(TypeFloat(16), "f16_id");
F32[1] = Name(TypeFloat(32), "f32_id");
// F64[1] = Name(TypeFloat(64), "f64_id");
S32[1] = Name(TypeSInt(32), "i32_id");
U32[1] = Name(TypeUInt(32), "u32_id");
// U8 = Name(TypeSInt(8), "u8");
// S8 = Name(TypeUInt(8), "s8");
// U16 = Name(TypeUInt(16), "u16_id");
U16 = Name(TypeUInt(16), "u16_id");
// S16 = Name(TypeSInt(16), "s16_id");
// U64 = Name(TypeUInt(64), "u64_id");
for (u32 i = 2; i <= 4; i++) {
// F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i));
// F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i));
S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i));
@ -222,8 +224,17 @@ void EmitContext::DefineInputs(const Info& info) {
void EmitContext::DefineOutputs(const Info& info) {
switch (stage) {
case Stage::Vertex:
case Stage::Vertex: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value,
f32_zero_value, f32_zero_value, f32_zero_value,
f32_zero_value, f32_zero_value};
const Id type{TypeArray(F32[1], ConstU32(8U))};
const Id initializer{ConstantComposite(type, zero)};
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output,
initializer);
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output,
initializer);
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
@ -236,6 +247,7 @@ void EmitContext::DefineOutputs(const Info& info) {
interfaces.push_back(id);
}
break;
}
case Stage::Fragment:
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
@ -294,8 +306,49 @@ void EmitContext::DefineBuffers(const Info& info) {
}
}
spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::R32ui;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::R32f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rg32f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::R16f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rg16f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
return spv::ImageFormat::Rg8;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rgba16f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
return spv::ImageFormat::R8;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
return spv::ImageFormat::Rgba8;
}
UNREACHABLE();
}
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
const auto format = spv::ImageFormat::Unknown;
const auto image = ctx.info.ReadUd<AmdGpu::Image>(desc.sgpr_base, desc.dword_offset);
const auto format = desc.is_storage ? GetFormat(image) : spv::ImageFormat::Unknown;
const u32 sampled = desc.is_storage ? 2 : 1;
switch (desc.type) {
case AmdGpu::ImageType::Color1D:
@ -320,7 +373,17 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
void EmitContext::DefineImagesAndSamplers(const Info& info) {
for (const auto& image_desc : info.images) {
const Id sampled_type{image_desc.nfmt == AmdGpu::NumberFormat::Uint ? U32[1] : F32[1]};
const VectorIds* data_types = [&] {
switch (image_desc.nfmt) {
case AmdGpu::NumberFormat::Uint:
return &U32;
case AmdGpu::NumberFormat::Sint:
return &S32;
default:
return &F32;
}
}();
const Id sampled_type = data_types->Get(1);
const Id image_type{ImageType(*this, image_desc, sampled_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
@ -330,6 +393,7 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
image_desc.dword_offset));
images.push_back({
.id = id,
.data_types = data_types,
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
.pointer_type = pointer_type,
.image_type = image_type,
@ -338,6 +402,8 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
++binding;
}
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
if (info.samplers.empty()) {
return;
}
@ -356,4 +422,50 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
}
}
void EmitContext::DefineSharedMemory(const Info& info) {
if (info.shared_memory_size == 0) {
return;
}
const auto make{[&](Id element_type, u32 element_size) {
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
const Id struct_type{TypeStruct(array_type)};
MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
Decorate(struct_type, spv::Decoration::Block);
const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
Decorate(variable, spv::Decoration::Aliased);
interfaces.push_back(variable);
return std::make_tuple(variable, element_pointer, pointer);
}};
if (profile.support_explicit_workgroup_layout) {
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
if (info.uses_shared_u8) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
}
if (info.uses_shared_u16) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
}
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
return;
}
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
interfaces.push_back(shared_memory_u32);
}
} // namespace Shader::Backend::SPIRV

View file

@ -66,15 +66,17 @@ public:
}
template <bool global = true>
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class,
std::optional<Id> initializer = std::nullopt) {
const Id pointer_type_id{TypePointer(storage_class, type)};
return global ? AddGlobalVariable(pointer_type_id, storage_class)
: AddLocalVariable(pointer_type_id, storage_class);
return global ? AddGlobalVariable(pointer_type_id, storage_class, initializer)
: AddLocalVariable(pointer_type_id, storage_class, initializer);
}
[[nodiscard]] Id DefineVariable(Id type, std::optional<spv::BuiltIn> builtin,
spv::StorageClass storage_class) {
const Id id{DefineVar(type, storage_class)};
spv::StorageClass storage_class,
std::optional<Id> initializer = std::nullopt) {
const Id id{DefineVar(type, storage_class, initializer)};
if (builtin) {
Decorate(id, spv::Decoration::BuiltIn, *builtin);
}
@ -147,6 +149,12 @@ public:
Id u32_zero_value{};
Id f32_zero_value{};
Id shared_u8{};
Id shared_u16{};
Id shared_u32{};
Id shared_u32x2{};
Id shared_u32x4{};
Id input_u32{};
Id input_f32{};
Id input_s32{};
@ -163,13 +171,25 @@ public:
Id frag_depth{};
std::array<Id, 8> frag_color{};
std::array<u32, 8> frag_num_comp{};
Id clip_distances{};
Id cull_distances{};
Id workgroup_id{};
Id local_invocation_id{};
Id subgroup_local_invocation_id{};
Id image_u32{};
Id shared_memory_u8{};
Id shared_memory_u16{};
Id shared_memory_u32{};
Id shared_memory_u32x2{};
Id shared_memory_u32x4{};
Id shared_memory_u32_type{};
struct TextureDefinition {
Id id;
const VectorIds* data_types;
Id sampled_type;
Id pointer_type;
Id image_type;
@ -205,6 +225,7 @@ private:
void DefineOutputs(const Info& info);
void DefineBuffers(const Info& info);
void DefineImagesAndSamplers(const Info& info);
void DefineSharedMemory(const Info& info);
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
};

View file

@ -149,9 +149,15 @@ void CFG::LinkBlocks() {
block.end_class = EndClass::Branch;
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
const auto& prev_inst = inst_list[block.end_index - 1];
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0 &&
prev_inst.control.exp.target != 9) {
block.end_class = EndClass::Kill;
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
if (prev_inst.control.exp.target != 9) {
block.end_class = EndClass::Kill;
} else if (const auto& exec_mask = inst_list[block.end_index - 2];
exec_mask.src[0].field == OperandField::ConstZero) {
block.end_class = EndClass::Kill;
} else {
block.end_class = EndClass::Exit;
}
} else {
block.end_class = EndClass::Exit;
}

View file

@ -32,7 +32,7 @@ namespace Shader::Gcn {
* We take the reverse way, extract the original input semantics from these instructions.
**/
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
std::vector<VertexAttribute> attributes;
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
@ -47,6 +47,8 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
u32 semantic_index = 0;
while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice);
*out_size += inst.length;
if (inst.opcode == Opcode::S_SETPC_B64) {
break;
}

View file

@ -17,6 +17,6 @@ struct VertexAttribute {
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
};
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
} // namespace Shader::Gcn

View file

@ -3429,48 +3429,48 @@ constexpr std::array<InstFormat, 112> InstructionFormatMIMG = {{
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
// 17 = IMAGE_ATOMIC_ADD
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 18 = IMAGE_ATOMIC_SUB
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
{},
// 20 = IMAGE_ATOMIC_SMIN
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Sint32,
ScalarType::Sint32},
// 21 = IMAGE_ATOMIC_UMIN
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 22 = IMAGE_ATOMIC_SMAX
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Sint32,
ScalarType::Sint32},
// 23 = IMAGE_ATOMIC_UMAX
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 24 = IMAGE_ATOMIC_AND
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 25 = IMAGE_ATOMIC_OR
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 26 = IMAGE_ATOMIC_XOR
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 27 = IMAGE_ATOMIC_INC
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 28 = IMAGE_ATOMIC_DEC
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 29 = IMAGE_ATOMIC_FCMPSWAP
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
ScalarType::Float32},
// 30 = IMAGE_ATOMIC_FMIN
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
ScalarType::Float32},
// 31 = IMAGE_ATOMIC_FMAX
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
ScalarType::Float32},
// 32 = IMAGE_SAMPLE
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
ScalarType::Float32},

View file

@ -187,7 +187,7 @@ std::string DumpExpr(const Statement* stmt) {
case StatementType::Not:
case StatementType::Or:
case StatementType::Variable:
throw LogicError("Statement can't be printed");
UNREACHABLE_MSG("Statement can't be printed");
}
}
return ret;
@ -335,7 +335,7 @@ private:
}
// Expensive operation:
if (!AreSiblings(goto_stmt, label_stmt)) {
throw LogicError("Goto is not a sibling with the label");
UNREACHABLE_MSG("Goto is not a sibling with the label");
}
// goto_stmt and label_stmt are guaranteed to be siblings, eliminate
if (std::next(goto_stmt) == label_stmt) {
@ -451,7 +451,7 @@ private:
case StatementType::Loop:
return MoveOutwardLoop(goto_stmt);
default:
throw LogicError("Invalid outward movement");
UNREACHABLE_MSG("Invalid outward movement");
}
}
@ -486,7 +486,7 @@ private:
case StatementType::Loop:
break;
default:
throw LogicError("Invalid inward movement");
UNREACHABLE_MSG("Invalid inward movement");
}
Tree& nested_tree{label_nested_stmt->children};
Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
@ -633,7 +633,8 @@ private:
if (!stmt.block->is_dummy) {
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, inst_list.subspan(start, size), info);
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
info);
}
break;
}

View file

@ -22,16 +22,18 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
const IR::VectorReg dst_reg{inst.dst[0].code};
if (is_pair) {
// Pair loads are either 32 or 64-bit. We assume 32-bit for now.
ASSERT(bit_size == 32);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
ir.SetVectorReg(dst_reg, ir.ReadShared(32, is_signed, addr0));
ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)});
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
ir.SetVectorReg(dst_reg + 1, ir.ReadShared(32, is_signed, addr1));
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)});
} else if (bit_size == 64) {
const IR::Value data = ir.UnpackUint2x32(ir.ReadShared(bit_size, is_signed, addr));
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
} else {
const IR::U32 data = ir.ReadShared(bit_size, is_signed, addr);
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)};
ir.SetVectorReg(dst_reg, data);
}
}
@ -41,17 +43,26 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
const IR::VectorReg data0{inst.src[1].code};
const IR::VectorReg data1{inst.src[2].code};
if (is_pair) {
ASSERT(bit_size == 32);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
} else if (bit_size == 64) {
const IR::U64 data = ir.PackUint2x32(
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)));
const IR::Value data =
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
ir.WriteShared(bit_size, data, addr);
} else {
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr);
}
}
void Translator::S_BARRIER() {
ir.Barrier();
}
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
UNREACHABLE();
}
} // namespace Shader::Gcn

View file

@ -318,4 +318,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
ir.SetScc(ir.Imm1(false));
}
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
// This only really exists to let resource tracking pass know
// there is an inline cbuf.
SetDst(inst.dst[0], ir.Imm32(pc));
}
void Translator::S_ADDC_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
}
} // namespace Shader::Gcn

View file

@ -5,20 +5,29 @@
namespace Shader::Gcn {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
const u32 dword_offset = [&] -> u32 {
if (smrd.imm) {
return smrd.offset;
}
if (smrd.offset == SQ_SRC_LITERAL) {
return inst.src[1].code;
}
UNREACHABLE();
}();
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
}
}
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset = [&] -> IR::U32 {
@ -30,7 +39,9 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
}
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
const IR::Value vsharp = ir.GetScalarReg(sbase);
const IR::Value vsharp =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));

View file

@ -1,6 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
@ -190,7 +193,20 @@ void Translator::EmitFetch(const GcnInst& inst) {
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code);
u32 fetch_size{};
const auto attribs = ParseFetchShader(code, &fetch_size);
if (Config::dumpShaders()) {
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename = fmt::format("vs_fetch_{:#018x}.bin", info.pgm_hash);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteRaw<u8>(code, fetch_size);
}
for (const auto& attrib : attribs) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
@ -224,9 +240,9 @@ void Translator::EmitFetch(const GcnInst& inst) {
attrib.instance_data);
}
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
info.vs_inputs.push_back({
.fmt = buffer.num_format,
.fmt = buffer.GetNumberFmt(),
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
@ -236,12 +252,13 @@ void Translator::EmitFetch(const GcnInst& inst) {
}
}
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
if (inst_list.empty()) {
return;
}
Translator translator{block, info};
for (const auto& inst : inst_list) {
block_base += inst.length;
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
translator.S_MOVK(inst);
@ -345,6 +362,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_BFREV_B32:
translator.V_BFREV_B32(inst);
break;
case Opcode::V_LDEXP_F32:
translator.V_LDEXP_F32(inst);
break;
case Opcode::V_FRACT_F32:
translator.V_FRACT_F32(inst);
break;
@ -374,8 +394,40 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_SAMPLE_LZ:
case Opcode::IMAGE_SAMPLE:
case Opcode::IMAGE_SAMPLE_L:
case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_B:
translator.IMAGE_SAMPLE(inst);
break;
case Opcode::IMAGE_ATOMIC_ADD:
translator.IMAGE_ATOMIC(AtomicOp::Add, inst);
break;
case Opcode::IMAGE_ATOMIC_AND:
translator.IMAGE_ATOMIC(AtomicOp::And, inst);
break;
case Opcode::IMAGE_ATOMIC_OR:
translator.IMAGE_ATOMIC(AtomicOp::Or, inst);
break;
case Opcode::IMAGE_ATOMIC_XOR:
translator.IMAGE_ATOMIC(AtomicOp::Xor, inst);
break;
case Opcode::IMAGE_ATOMIC_UMAX:
translator.IMAGE_ATOMIC(AtomicOp::Umax, inst);
break;
case Opcode::IMAGE_ATOMIC_SMAX:
translator.IMAGE_ATOMIC(AtomicOp::Smax, inst);
break;
case Opcode::IMAGE_ATOMIC_UMIN:
translator.IMAGE_ATOMIC(AtomicOp::Umin, inst);
break;
case Opcode::IMAGE_ATOMIC_SMIN:
translator.IMAGE_ATOMIC(AtomicOp::Smin, inst);
break;
case Opcode::IMAGE_ATOMIC_INC:
translator.IMAGE_ATOMIC(AtomicOp::Inc, inst);
break;
case Opcode::IMAGE_ATOMIC_DEC:
translator.IMAGE_ATOMIC(AtomicOp::Dec, inst);
break;
case Opcode::IMAGE_GET_LOD:
translator.IMAGE_GET_LOD(inst);
break;
@ -457,9 +509,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CMP_NGT_F32:
translator.V_CMP_F32(ConditionOp::LE, false, inst);
break;
case Opcode::V_CMP_NGE_F32:
translator.V_CMP_F32(ConditionOp::LT, false, inst);
break;
case Opcode::S_CMP_LT_U32:
translator.S_CMP(ConditionOp::LT, false, inst);
break;
case Opcode::S_CMP_LE_U32:
translator.S_CMP(ConditionOp::LE, false, inst);
break;
case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst);
break;
@ -487,6 +545,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_X:
translator.BUFFER_LOAD_FORMAT(1, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XY:
translator.BUFFER_LOAD_FORMAT(2, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
translator.BUFFER_LOAD_FORMAT(3, true, inst);
break;
@ -581,6 +645,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CVT_I32_F32:
translator.V_CVT_I32_F32(inst);
break;
case Opcode::V_CVT_FLR_I32_F32:
translator.V_CVT_FLR_I32_F32(inst);
break;
case Opcode::V_SUBREV_F32:
translator.V_SUBREV_F32(inst);
break;
@ -715,6 +782,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
translator.V_MAD_I32_I24(inst);
break;
case Opcode::V_MUL_I32_I24:
case Opcode::V_MUL_U32_U24:
translator.V_MUL_I32_I24(inst);
break;
case Opcode::V_SUB_I32:
@ -771,6 +839,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CMP_NE_U64:
translator.V_CMP_NE_U64(inst);
break;
case Opcode::V_CMP_CLASS_F32:
translator.V_CMP_CLASS_F32(inst);
break;
case Opcode::V_TRUNC_F32:
translator.V_TRUNC_F32(inst);
break;
@ -786,7 +857,11 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_ADD_U32:
translator.S_ADD_U32(inst);
break;
case Opcode::S_ADDC_U32:
translator.S_ADDC_U32(inst);
break;
case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32:
translator.S_SUB_U32(inst);
break;
// TODO: Separate implementation for legacy variants.
@ -809,9 +884,30 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_GET_RESINFO:
translator.IMAGE_GET_RESINFO(inst);
break;
case Opcode::S_BARRIER:
translator.S_BARRIER();
break;
case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
break;
case Opcode::DS_READ_B32:
translator.DS_READ(32, false, false, inst);
break;
case Opcode::DS_READ2_B32:
translator.DS_READ(32, false, true, inst);
break;
case Opcode::DS_WRITE_B32:
translator.DS_WRITE(32, false, false, inst);
break;
case Opcode::DS_WRITE2_B32:
translator.DS_WRITE(32, false, true, inst);
break;
case Opcode::V_READFIRSTLANE_B32:
translator.V_READFIRSTLANE_B32(inst);
break;
case Opcode::S_GETPC_B64:
translator.S_GETPC_B64(block_base, inst);
break;
case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:

View file

@ -26,6 +26,25 @@ enum class ConditionOp : u32 {
TRU,
};
enum class AtomicOp : u32 {
Swap,
CmpSwap,
Add,
Sub,
Smin,
Umin,
Smax,
Umax,
And,
Or,
Xor,
Inc,
Dec,
FCmpSwap,
Fmin,
Fmax,
};
enum class NegateMode : u32 {
None,
Src1,
@ -61,6 +80,8 @@ public:
void S_BREV_B32(const GcnInst& inst);
void S_ADD_U32(const GcnInst& inst);
void S_SUB_U32(const GcnInst& inst);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_ADDC_U32(const GcnInst& inst);
// Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -133,6 +154,9 @@ public:
void V_NOT_B32(const GcnInst& inst);
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
void V_BFREV_B32(const GcnInst& inst);
void V_LDEXP_F32(const GcnInst& inst);
void V_CVT_FLR_I32_F32(const GcnInst& inst);
void V_CMP_CLASS_F32(const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
@ -145,6 +169,8 @@ public:
void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void V_READFIRSTLANE_B32(const GcnInst& inst);
void S_BARRIER();
// MIMG
void IMAGE_GET_RESINFO(const GcnInst& inst);
@ -153,6 +179,7 @@ public:
void IMAGE_STORE(const GcnInst& inst);
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
void IMAGE_GET_LOD(const GcnInst& inst);
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
// Export
void EXP(const GcnInst& inst);
@ -167,6 +194,6 @@ private:
static std::array<bool, IR::NumScalarRegs> exec_contexts;
};
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
} // namespace Shader::Gcn

View file

@ -28,7 +28,8 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
const IR::U32 src0 = GetSrc(inst.src[0]);
SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0));
const IR::U16 src0l = ir.UConvert(16, src0);
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
}
void Translator::V_MUL_F32(const GcnInst& inst) {
@ -50,11 +51,14 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
};
const bool has_flt_source =
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
const IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
}
if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) {
src0 = ir.BitCast<IR::F32, IR::U32>(src0);
}
const IR::Value result = ir.Select(flag, src1, src0);
ir.SetVectorReg(dst_reg, IR::U32F32{result});
}
@ -502,4 +506,19 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(src0));
}
void Translator::V_LDEXP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
}
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
}
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
UNREACHABLE();
}
} // namespace Shader::Gcn

View file

@ -212,10 +212,15 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
ASSERT(mimg.dmask == 0xF);
const IR::Value value = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(data_reg), ir.GetVectorReg<IR::F32>(data_reg + 1),
ir.GetVectorReg<IR::F32>(data_reg + 2), ir.GetVectorReg<IR::F32>(data_reg + 3));
boost::container::static_vector<IR::F32, 4> comps;
for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) {
comps.push_back(ir.Imm32(0.f));
continue;
}
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
}
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
ir.ImageWrite(handle, body, value, {});
}
@ -245,7 +250,10 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
}
const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::F32{value});
@ -304,7 +312,10 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
break;
}
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
ir.StoreBuffer(num_dwords, handle, address, value, info);
}
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
@ -322,4 +333,48 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
}
void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg val_reg{inst.dst[0].code};
IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const IR::Value value = ir.GetVectorReg(val_reg);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body =
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
const IR::Value prev = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.ImageAtomicExchange(handle, body, value, {});
case AtomicOp::Add:
return ir.ImageAtomicIAdd(handle, body, value, {});
case AtomicOp::Smin:
return ir.ImageAtomicIMin(handle, body, value, true, {});
case AtomicOp::Umin:
return ir.ImageAtomicUMin(handle, body, value, {});
case AtomicOp::Smax:
return ir.ImageAtomicIMax(handle, body, value, true, {});
case AtomicOp::Umax:
return ir.ImageAtomicUMax(handle, body, value, {});
case AtomicOp::And:
return ir.ImageAtomicAnd(handle, body, value, {});
case AtomicOp::Or:
return ir.ImageAtomicOr(handle, body, value, {});
case AtomicOp::Xor:
return ir.ImageAtomicXor(handle, body, value, {});
case AtomicOp::Inc:
return ir.ImageAtomicInc(handle, body, value, {});
case AtomicOp::Dec:
return ir.ImageAtomicDec(handle, body, value, {});
default:
UNREACHABLE();
}
}();
if (mimg.glc) {
ir.SetVectorReg(val_reg, IR::U32{prev});
}
}
} // namespace Shader::Gcn

View file

@ -4,8 +4,8 @@
#pragma once
#include <fmt/format.h>
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/exception.h"
namespace Shader::IR {
@ -88,10 +88,10 @@ constexpr size_t NumParams = 32;
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
const int result{static_cast<int>(attr) + num};
if (result > static_cast<int>(Attribute::Param31)) {
throw LogicError("Overflow on register arithmetic");
UNREACHABLE_MSG("Overflow on register arithmetic");
}
if (result < static_cast<int>(Attribute::RenderTarget0)) {
throw LogicError("Underflow on register arithmetic");
UNREACHABLE_MSG("Underflow on register arithmetic");
}
return static_cast<Attribute>(result);
}

View file

@ -39,10 +39,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
void Block::AddBranch(Block* block) {
if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
throw LogicError("Successor already inserted");
UNREACHABLE_MSG("Successor already inserted");
}
if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
throw LogicError("Predecessor already inserted");
UNREACHABLE_MSG("Predecessor already inserted");
}
imm_successors.push_back(block);
block->imm_predecessors.push_back(this);

View file

@ -115,6 +115,18 @@ void IREmitter::Discard() {
Inst(Opcode::Discard);
}
void IREmitter::Barrier() {
Inst(Opcode::Barrier);
}
void IREmitter::WorkgroupMemoryBarrier() {
Inst(Opcode::WorkgroupMemoryBarrier);
}
void IREmitter::DeviceMemoryBarrier() {
Inst(Opcode::DeviceMemoryBarrier);
}
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
return Inst<U32>(Opcode::GetUserData, reg);
}
@ -200,6 +212,10 @@ U1 IREmitter::GetVcc() {
return Inst<U1>(Opcode::GetVcc);
}
U32 IREmitter::GetSccLo() {
return Inst<U32>(Opcode::GetSccLo);
}
U32 IREmitter::GetVccLo() {
return Inst<U32>(Opcode::GetVccLo);
}
@ -220,6 +236,10 @@ void IREmitter::SetVcc(const U1& value) {
Inst(Opcode::SetVcc, value);
}
void IREmitter::SetSccLo(const U32& value) {
Inst(Opcode::SetSccLo, value);
}
void IREmitter::SetVccLo(const U32& value) {
Inst(Opcode::SetVccLo, value);
}
@ -240,22 +260,25 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
}
U32U64 IREmitter::ReadShared(int bit_size, bool is_signed, const U32& offset) {
/*switch (bit_size) {
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
switch (bit_size) {
case 8:
return Inst<U32>(is_signed ? Opcode::ReadSharedS8 : Opcode::ReadSharedU8, offset);
return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
case 16:
return Inst<U32>(is_signed ? Opcode::ReadSharedS16 : Opcode::ReadSharedU16, offset);
return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
case 32:
return Inst<U32>(Opcode::ReadSharedU32, offset);
return Inst<U32>(Opcode::LoadSharedU32, offset);
case 64:
return Inst<U64>(Opcode::ReadSharedU64, offset);
return Inst<U64>(Opcode::LoadSharedU64, offset);
case 128:
return Inst(Opcode::LoadSharedU128, offset);
default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}
UNREACHABLE_MSG("Invalid bit size {}", bit_size);*/
}
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
/*switch (bit_size) {
switch (bit_size) {
case 8:
Inst(Opcode::WriteSharedU8, offset, value);
break;
@ -268,9 +291,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
case 64:
Inst(Opcode::WriteSharedU64, offset, value);
break;
case 128:
Inst(Opcode::WriteSharedU128, offset, value);
break;
default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}*/
}
}
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
@ -603,6 +629,10 @@ F32 IREmitter::FPExp2(const F32& value) {
return Inst<F32>(Opcode::FPExp2, value);
}
F32 IREmitter::FPLdexp(const F32& value, const U32& exp) {
return Inst<F32>(Opcode::FPLdexp, value, exp);
}
F32 IREmitter::FPLog2(const F32& value) {
return Inst<F32>(Opcode::FPLog2, value);
}
@ -810,6 +840,17 @@ U1 IREmitter::FPIsNan(const F32F64& value) {
}
}
U1 IREmitter::FPIsInf(const F32F64& value) {
switch (value.Type()) {
case Type::F32:
return Inst<U1>(Opcode::FPIsInf32, value);
case Type::F64:
return Inst<U1>(Opcode::FPIsInf64, value);
default:
ThrowInvalidType(value.Type());
}
}
U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
@ -866,6 +907,18 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
}
}
Value IREmitter::IAddCary(const U32& a, const U32& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::U32:
return Inst<U32>(Opcode::IAddCary32, a, b);
default:
ThrowInvalidType(a.Type());
}
}
U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
@ -1142,6 +1195,13 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s
}
U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
switch (result_bitsize) {
case 16:
switch (value.Type()) {
case Type::U32:
return Inst<U16>(Opcode::ConvertU16U32, value);
}
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
@ -1163,6 +1223,73 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicIAdd32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicSMin32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicUMin32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
bool is_signed, TextureInstInfo info) {
return is_signed ? ImageAtomicSMin(handle, coords, value, info)
: ImageAtomicUMin(handle, coords, value, info);
}
Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicSMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
bool is_signed, TextureInstInfo info) {
return is_signed ? ImageAtomicSMax(handle, coords, value, info)
: ImageAtomicUMax(handle, coords, value, info);
}
Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicInc32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicDec32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicAnd32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicOr32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicXor32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
const Value& offset, const F32& lod_clamp,
TextureInstInfo info) {

View file

@ -43,6 +43,10 @@ public:
void Epilogue();
void Discard();
void Barrier();
void WorkgroupMemoryBarrier();
void DeviceMemoryBarrier();
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);
@ -60,11 +64,13 @@ public:
[[nodiscard]] U1 GetScc();
[[nodiscard]] U1 GetExec();
[[nodiscard]] U1 GetVcc();
[[nodiscard]] U32 GetSccLo();
[[nodiscard]] U32 GetVccLo();
[[nodiscard]] U32 GetVccHi();
void SetScc(const U1& value);
void SetExec(const U1& value);
void SetVcc(const U1& value);
void SetSccLo(const U32& value);
void SetVccLo(const U32& value);
void SetVccHi(const U32& value);
@ -74,7 +80,7 @@ public:
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
@ -120,6 +126,7 @@ public:
[[nodiscard]] F32 FPSin(const F32& value);
[[nodiscard]] F32 FPExp2(const F32& value);
[[nodiscard]] F32 FPLog2(const F32& value);
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
[[nodiscard]] F32 FPSqrt(const F32& value);
@ -139,14 +146,16 @@ public:
[[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPIsNan(const F32F64& value);
[[nodiscard]] U1 FPIsInf(const F32F64& value);
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32U64 INeg(const U32U64& value);
@ -199,6 +208,33 @@ public:
[[nodiscard]] U16U32U64 UConvert(size_t result_bitsize, const U16U32U64& value);
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
[[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
const F32& bias, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);

View file

@ -40,6 +40,9 @@ Inst::~Inst() {
bool Inst::MayHaveSideEffects() const noexcept {
switch (op) {
case Opcode::Barrier:
case Opcode::WorkgroupMemoryBarrier:
case Opcode::DeviceMemoryBarrier:
case Opcode::ConditionRef:
case Opcode::Reference:
case Opcode::PhiMove:
@ -52,7 +55,23 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferU32:
case Opcode::WriteSharedU128:
case Opcode::WriteSharedU64:
case Opcode::WriteSharedU32:
case Opcode::WriteSharedU16:
case Opcode::WriteSharedU8:
case Opcode::ImageWrite:
case Opcode::ImageAtomicIAdd32:
case Opcode::ImageAtomicSMin32:
case Opcode::ImageAtomicUMin32:
case Opcode::ImageAtomicSMax32:
case Opcode::ImageAtomicUMax32:
case Opcode::ImageAtomicInc32:
case Opcode::ImageAtomicDec32:
case Opcode::ImageAtomicAnd32:
case Opcode::ImageAtomicOr32:
case Opcode::ImageAtomicXor32:
case Opcode::ImageAtomicExchange32:
return true;
default:
return false;
@ -61,7 +80,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
bool Inst::AreAllArgsImmediates() const {
if (op == Opcode::Phi) {
throw LogicError("Testing for all arguments are immediates on phi instruction");
UNREACHABLE_MSG("Testing for all arguments are immediates on phi instruction");
}
return std::all_of(args.begin(), args.begin() + NumArgs(),
[](const IR::Value& value) { return value.IsImmediate(); });
@ -91,7 +110,7 @@ void Inst::SetArg(size_t index, Value value) {
Block* Inst::PhiBlock(size_t index) const {
if (op != Opcode::Phi) {
throw LogicError("{} is not a Phi instruction", op);
UNREACHABLE_MSG("{} is not a Phi instruction", op);
}
if (index >= phi_args.size()) {
throw InvalidArgument("Out of bounds argument index {} in phi instruction");
@ -143,7 +162,7 @@ void Inst::ReplaceUsesWith(Value replacement) {
void Inst::ReplaceOpcode(IR::Opcode opcode) {
if (opcode == IR::Opcode::Phi) {
throw LogicError("Cannot transition into Phi");
UNREACHABLE_MSG("Cannot transition into Phi");
}
if (op == Opcode::Phi) {
// Transition out of phi arguments into non-phi

View file

@ -19,6 +19,25 @@ OPCODE(ReadConst, U32, U32x
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
// Barriers
OPCODE(Barrier, Void, )
OPCODE(WorkgroupMemoryBarrier, Void, )
OPCODE(DeviceMemoryBarrier, Void, )
// Shared memory operations
OPCODE(LoadSharedU8, U32, U32, )
OPCODE(LoadSharedS8, U32, U32, )
OPCODE(LoadSharedU16, U32, U32, )
OPCODE(LoadSharedS16, U32, U32, )
OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U32x2, U32, )
OPCODE(LoadSharedU128, U32x4, U32, )
OPCODE(WriteSharedU8, Void, U32, U32, )
OPCODE(WriteSharedU16, Void, U32, U32, )
OPCODE(WriteSharedU32, Void, U32, U32, )
OPCODE(WriteSharedU64, Void, U32, U32x2, )
OPCODE(WriteSharedU128, Void, U32, U32x4, )
// Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, )
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
@ -37,11 +56,13 @@ OPCODE(SetAttribute, Void, Attr
OPCODE(GetScc, U1, Void, )
OPCODE(GetExec, U1, Void, )
OPCODE(GetVcc, U1, Void, )
OPCODE(GetSccLo, U32, Void, )
OPCODE(GetVccLo, U32, Void, )
OPCODE(GetVccHi, U32, Void, )
OPCODE(SetScc, Void, U1, )
OPCODE(SetExec, Void, U1, )
OPCODE(SetVcc, Void, U1, )
OPCODE(SetSccLo, Void, U32, )
OPCODE(SetVccLo, Void, U32, )
OPCODE(SetVccHi, Void, U32, )
@ -148,6 +169,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
OPCODE(FPSqrt, F32, F32, )
OPCODE(FPSin, F32, F32, )
OPCODE(FPExp2, F32, F32, )
OPCODE(FPLdexp, F32, F32, U32, )
OPCODE(FPCos, F32, F32, )
OPCODE(FPLog2, F32, F32, )
OPCODE(FPSaturate32, F32, F32, )
@ -190,10 +212,13 @@ OPCODE(FPUnordGreaterThanEqual32, U1, F32,
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
OPCODE(FPIsNan32, U1, F32, )
OPCODE(FPIsNan64, U1, F64, )
OPCODE(FPIsInf32, U1, F32, )
OPCODE(FPIsInf64, U1, F64, )
// Integer operations
OPCODE(IAdd32, U32, U32, U32, )
OPCODE(IAdd64, U64, U64, U64, )
OPCODE(IAddCary32, U32x2, U32, U32, )
OPCODE(ISub32, U32, U32, U32, )
OPCODE(ISub64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, )
@ -258,6 +283,7 @@ OPCODE(ConvertF32U32, F32, U32,
OPCODE(ConvertF64S32, F64, U32, )
OPCODE(ConvertF64U32, F64, U32, )
OPCODE(ConvertF32U16, F32, U16, )
OPCODE(ConvertU16U32, U16, U32, )
// Image operations
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
@ -273,6 +299,19 @@ OPCODE(ImageGradient, F32x4, Opaq
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
// Image atomic operations
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 )

View file

@ -324,8 +324,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::BitFieldUExtract:
FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
base, shift, count);
UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
base, shift, count);
}
return (base >> shift) & ((1U << count) - 1);
});
@ -336,8 +336,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
const size_t left_shift{32 - back_shift};
const size_t right_shift{static_cast<size_t>(32 - count)};
if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
base, shift, count);
UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
base, shift, count);
}
return static_cast<u32>((base << left_shift) >> right_shift);
});
@ -345,8 +345,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::BitFieldInsert:
FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
if (bits >= 32 || offset >= 32) {
throw LogicError("Undefined result in {}({}, {}, {}, {})",
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
UNREACHABLE_MSG("Undefined result in {}({}, {}, {}, {})",
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
}
return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
});

View file

@ -89,6 +89,17 @@ bool IsImageInstruction(const IR::Inst& inst) {
case IR::Opcode::ImageGradient:
case IR::Opcode::ImageRead:
case IR::Opcode::ImageWrite:
case IR::Opcode::ImageAtomicIAdd32:
case IR::Opcode::ImageAtomicSMin32:
case IR::Opcode::ImageAtomicUMin32:
case IR::Opcode::ImageAtomicSMax32:
case IR::Opcode::ImageAtomicUMax32:
case IR::Opcode::ImageAtomicInc32:
case IR::Opcode::ImageAtomicDec32:
case IR::Opcode::ImageAtomicAnd32:
case IR::Opcode::ImageAtomicOr32:
case IR::Opcode::ImageAtomicXor32:
case IR::Opcode::ImageAtomicExchange32:
return true;
default:
return false;
@ -99,6 +110,17 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageWrite:
case IR::Opcode::ImageRead:
case IR::Opcode::ImageAtomicIAdd32:
case IR::Opcode::ImageAtomicSMin32:
case IR::Opcode::ImageAtomicUMin32:
case IR::Opcode::ImageAtomicSMax32:
case IR::Opcode::ImageAtomicUMax32:
case IR::Opcode::ImageAtomicInc32:
case IR::Opcode::ImageAtomicDec32:
case IR::Opcode::ImageAtomicAnd32:
case IR::Opcode::ImageAtomicOr32:
case IR::Opcode::ImageAtomicXor32:
case IR::Opcode::ImageAtomicExchange32:
return true;
default:
return false;
@ -115,7 +137,8 @@ public:
u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset;
desc.dword_offset == existing.dword_offset &&
desc.inline_cbuf == existing.inline_cbuf;
})};
auto& buffer = buffer_resources[index];
ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
@ -196,20 +219,70 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
};
}
static constexpr size_t MaxUboSize = 65536;
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
AmdGpu::Buffer& cbuf) {
// Assuming V# is in UD s[32:35]
// The next pattern:
// s_getpc_b64 s[32:33]
// s_add_u32 s32, <const>, s32
// s_addc_u32 s33, 0, s33
// s_mov_b32 s35, <const>
// s_movk_i32 s34, <const>
// buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
// is used to define an inline constant buffer
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* p0 = handle->Arg(0).InstRecursive();
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
!p0->Arg(1).IsImmediate()) {
return -1;
}
IR::Inst* p1 = handle->Arg(1).InstRecursive();
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
return -1;
}
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
return -1;
}
// We have found this pattern. Build the sharp.
std::array<u64, 2> buffer;
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32;
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
// Assign a binding to this sharp.
return descriptors.Add(BufferResource{
.sgpr_base = std::numeric_limits<u32>::max(),
.dword_offset = 0,
.stride = cbuf.GetStride(),
.num_records = u32(cbuf.num_records),
.used_types = BufferDataType(inst),
.inline_cbuf = cbuf,
.is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize,
});
}
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
static constexpr size_t MaxUboSize = 65536;
IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer);
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
const u32 binding = descriptors.Add(BufferResource{
.sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset,
.stride = buffer.GetStride(),
.num_records = u32(buffer.num_records),
.used_types = BufferDataType(inst),
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
});
s32 binding{};
AmdGpu::Buffer buffer;
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* producer = handle->Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer);
buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
binding = descriptors.Add(BufferResource{
.sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset,
.stride = buffer.GetStride(),
.num_records = u32(buffer.num_records),
.used_types = BufferDataType(inst),
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
});
}
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
// Replace handle with binding index in buffer resource list.
@ -217,7 +290,10 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
if (inst_info.is_typed) {
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32));
}
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {

View file

@ -16,6 +16,16 @@ void Visit(Info& info, IR::Inst& inst) {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
case IR::Opcode::LoadSharedS8:
case IR::Opcode::LoadSharedU8:
case IR::Opcode::WriteSharedU8:
info.uses_shared_u8 = true;
break;
case IR::Opcode::LoadSharedS16:
case IR::Opcode::LoadSharedU16:
case IR::Opcode::WriteSharedU16:
info.uses_shared_u16 = true;
break;
case IR::Opcode::QuadShuffle:
info.uses_group_quad = true;
break;

View file

@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
struct ExecFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {};
struct VccLoTag : FlagTag {};
struct SccLoTag : FlagTag {};
struct VccHiTag : FlagTag {};
struct GotoVariable : FlagTag {
@ -44,7 +45,7 @@ struct GotoVariable : FlagTag {
};
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
VccFlagTag, VccLoTag, VccHiTag>;
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable {
@ -83,6 +84,13 @@ struct DefTable {
exec_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, SccLoTag) {
return scc_lo_flag[block];
}
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
scc_lo_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, VccLoTag) {
return vcc_lo_flag[block];
}
@ -108,6 +116,7 @@ struct DefTable {
ValueMap scc_flag;
ValueMap exec_flag;
ValueMap vcc_flag;
ValueMap scc_lo_flag;
ValueMap vcc_lo_flag;
ValueMap vcc_hi_flag;
};
@ -124,6 +133,10 @@ IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const SccLoTag&) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
return IR::Opcode::UndefU32;
}
@ -321,6 +334,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::SetVcc:
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetSccLo:
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetVccLo:
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
break;
@ -350,6 +366,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetVcc:
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
break;
case IR::Opcode::GetSccLo:
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
break;
case IR::Opcode::GetVccLo:
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
break;

View file

@ -14,7 +14,7 @@ BlockList PostOrder(const AbstractSyntaxNode& root) {
BlockList post_order_blocks;
if (root.type != AbstractSyntaxNode::Type::Block) {
throw LogicError("First node in abstract syntax list root is not a block");
UNREACHABLE_MSG("First node in abstract syntax list root is not a block");
}
Block* const first_block{root.data.block};
visited.insert(first_block);

View file

@ -3,9 +3,9 @@
#pragma once
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/types.h"
#include "shader_recompiler/exception.h"
#include "video_core/amdgpu/pixel_format.h"
namespace Shader::IR {
@ -428,10 +428,10 @@ template <RegT Reg>
[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
const int result{static_cast<int>(reg) + num};
if (result >= static_cast<int>(Reg::Max)) {
throw LogicError("Overflow on register arithmetic");
UNREACHABLE_MSG("Overflow on register arithmetic");
}
if (result < 0) {
throw LogicError("Underflow on register arithmetic");
UNREACHABLE_MSG("Underflow on register arithmetic");
}
return static_cast<Reg>(result);
}

View file

@ -83,7 +83,7 @@ bool Value::operator==(const Value& other) const {
case Type::F64x4:
break;
}
throw LogicError("Invalid type {}", type);
UNREACHABLE_MSG("Invalid type {}", type);
}
bool Value::operator!=(const Value& other) const {

View file

@ -26,44 +26,9 @@ struct Profile {
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
bool support_explicit_workgroup_layout{};
bool support_vote{};
bool support_viewport_mask{};
bool support_typeless_image_loads{};
bool support_derivative_control{};
bool support_geometry_shader_passthrough{};
bool support_native_ndc{};
bool support_scaled_attributes{};
bool support_multi_viewport{};
bool support_geometry_streams{};
bool warp_size_potentially_larger_than_guest{};
bool lower_left_origin_mode{};
/// Fragment outputs have to be declared even if they are not written to avoid undefined values.
/// See Ori and the Blind Forest's main menu for reference.
bool need_declared_frag_colors{};
/// Prevents fast math optimizations that may cause inaccuracies
bool need_fastmath_off{};
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{};
/// The Position builtin needs to be wrapped in a struct when used as an input
bool has_broken_spirv_position_input{};
/// Offset image operands with an unsigned type do not work
bool has_broken_unsigned_image_offsets{};
/// Signed instructions with unsigned data types are misinterpreted
bool has_broken_signed_operations{};
/// Float controls break when fp16 is enabled
bool has_broken_fp16_float_controls{};
/// Ignores SPIR-V ordered vs unordered using GLSL semantics
bool ignore_nan_fp_comparisons{};
/// Maxwell and earlier nVidia architectures have broken robust support
bool has_broken_robust{};
bool lower_left_origin_mode{};
u64 min_ssbo_alignment{};
u32 max_user_clip_distances{};
};
} // namespace Shader

View file

@ -4,7 +4,6 @@
#pragma once
#include <span>
#include <vector>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
@ -42,15 +41,45 @@ enum class TextureType : u32 {
};
constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class VsOutput : u32 {
None,
PointSprite,
EdgeFlag,
KillFlag,
GsCutFlag,
GsMrtIndex,
GsVpIndex,
CullDist0,
CullDist1,
CullDist2,
CullDist3,
CullDist4,
CullDist5,
CullDist6,
CullDist7,
ClipDist0,
ClipDist1,
ClipDist2,
ClipDist3,
ClipDist4,
ClipDist5,
ClipDist6,
ClipDist7,
};
using VsOutputMap = std::array<VsOutput, 4>;
struct Info;
struct BufferResource {
u32 sgpr_base;
u32 dword_offset;
u32 stride;
u32 num_records;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
bool is_storage;
auto operator<=>(const BufferResource&) const = default;
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
};
using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
@ -123,6 +152,7 @@ struct Info {
};
AttributeFlags loads{};
AttributeFlags stores{};
boost::container::static_vector<VsOutputMap, 3> vs_outputs;
BufferResourceList buffers;
ImageResourceList images;
@ -134,7 +164,12 @@ struct Info {
std::span<const u32> user_data;
Stage stage;
uintptr_t pgm_base{};
u64 pgm_hash{};
u32 shared_memory_size{};
bool uses_group_quad{};
bool uses_shared_u8{};
bool uses_shared_u16{};
bool translation_failed{}; // indicates that shader has unsupported instructions
template <typename T>
@ -149,6 +184,10 @@ struct Info {
}
};
constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept {
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
} // namespace Shader
template <>

View file

@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
break;
}
case PM4ItOpcode::IndexBufferSize: {
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break;

View file

@ -85,14 +85,14 @@ struct Liverpool {
} settings;
UserData user_data;
template <typename T = u8>
const T* Address() const {
template <typename T = u8*>
const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T*>(addr);
return reinterpret_cast<const T>(addr);
}
std::span<const u32> Code() const {
const u32* code = Address<u32>();
const u32* code = Address<u32*>();
BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
@ -121,20 +121,26 @@ struct Liverpool {
BitField<0, 6, u64> num_vgprs;
BitField<6, 4, u64> num_sgprs;
BitField<33, 5, u64> num_user_regs;
BitField<47, 9, u64> lds_dwords;
} settings;
INSERT_PADDING_WORDS(1);
u32 resource_limits;
INSERT_PADDING_WORDS(0x2A);
UserData user_data;
template <typename T = u8>
const T* Address() const {
template <typename T = u8*>
const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T*>(addr);
return reinterpret_cast<const T>(addr);
}
u32 SharedMemSize() const noexcept {
// lds_dwords is in units of 128 dwords. We return bytes.
return settings.lds_dwords.Value() * 128 * 4;
}
std::span<const u32> Code() const {
const u32* code = Address<u32>();
const u32* code = Address<u32*>();
BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
@ -144,7 +150,7 @@ struct Liverpool {
template <typename Shader>
static constexpr auto* GetBinaryInfo(const Shader& sh) {
const auto* code = sh.template Address<u32>();
const auto* code = sh.template Address<u32*>();
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
return bininfo;
@ -208,6 +214,10 @@ struct Liverpool {
BitField<18, 1, u32> use_vtx_render_target_idx;
BitField<19, 1, u32> use_vtx_viewport_idx;
BitField<20, 1, u32> use_vtx_kill_flag;
BitField<21, 1, u32> vs_out_misc_enable;
BitField<22, 1, u32> vs_out_ccdist0_enable;
BitField<23, 1, u32> vs_out_ccdist1_enable;
BitField<25, 1, u32> use_vtx_gs_cut_flag;
bool IsClipDistEnabled(u32 index) const {
return (clip_distance_enable.Value() >> index) & 1;
@ -469,7 +479,7 @@ struct Liverpool {
template <typename T = VAddr>
T Address() const {
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32);
return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
}
};
@ -1021,6 +1031,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);

View file

@ -549,8 +549,8 @@ struct PM4DumpConstRam {
u32 addr_hi;
template <typename T>
T* Address() const {
return reinterpret_cast<T*>((u64(addr_hi) << 32u) | addr_lo);
T Address() const {
return reinterpret_cast<T>((u64(addr_hi) << 32u) | addr_lo);
}
[[nodiscard]] u32 Offset() const {
@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
u32 addr_hi;
};
struct PM4CmdDrawIndexBufferSize {
PM4Type3Header header;
u32 num_indices;
};
struct PM4CmdIndirectBuffer {
PM4Type3Header header;
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned

View file

@ -21,32 +21,45 @@ enum class CompSwizzle : u32 {
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
struct Buffer {
union {
BitField<0, 44, u64> base_address;
BitField<48, 14, u64> stride;
BitField<62, 1, u64> cache_swizzle;
BitField<63, 1, u64> swizzle_enable;
};
u64 base_address : 44;
u64 : 4;
u64 stride : 14;
u64 cache_swizzle : 1;
u64 swizzle_enable : 1;
u32 num_records;
union {
BitField<0, 3, u32> dst_sel_x;
BitField<3, 3, u32> dst_sel_y;
BitField<6, 3, u32> dst_sel_z;
BitField<9, 3, u32> dst_sel_w;
BitField<0, 12, u32> dst_sel;
BitField<12, 3, NumberFormat> num_format;
BitField<15, 4, DataFormat> data_format;
BitField<19, 2, u32> element_size;
BitField<21, 2, u32> index_stride;
BitField<23, 1, u32> add_tid_enable;
};
u32 dst_sel_x : 3;
u32 dst_sel_y : 3;
u32 dst_sel_z : 3;
u32 dst_sel_w : 3;
u32 num_format : 3;
u32 data_format : 4;
u32 element_size : 2;
u32 index_stride : 2;
u32 add_tid_enable : 1;
operator bool() const noexcept {
return base_address != 0;
}
bool operator==(const Buffer& other) const noexcept {
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
}
CompSwizzle GetSwizzle(u32 comp) const noexcept {
return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
return static_cast<CompSwizzle>(select[comp]);
}
NumberFormat GetNumberFmt() const noexcept {
return static_cast<NumberFormat>(num_format);
}
DataFormat GetDataFmt() const noexcept {
return static_cast<DataFormat>(data_format);
}
u32 GetStride() const noexcept {
return stride == 0 ? 1U : stride.Value();
return stride == 0 ? 1U : stride;
}
u32 GetStrideElements(u32 element_size) const noexcept {
@ -61,6 +74,7 @@ struct Buffer {
return GetStride() * num_records;
}
};
static_assert(sizeof(Buffer) == 16); // 128bits
enum class ImageType : u64 {
Buffer = 0,

View file

@ -392,6 +392,36 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16G16Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR16G16B16A16Snorm;
}
if (data_format == AmdGpu::DataFormat::Format32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32Uint;
}
if (data_format == AmdGpu::DataFormat::Format4_4_4_4 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR4G4B4A4UnormPack16;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR16G16B16A16Uint;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32A32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR8Sint;
}
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc1RgbaSrgbBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16Sint;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View file

@ -14,8 +14,8 @@ namespace Vulkan {
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
vk::PipelineCache pipeline_cache, const Shader::Info* info_,
vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, info{*info_} {
u64 compute_key_, vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{*info_} {
const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = module,
@ -85,15 +85,15 @@ ComputePipeline::~ComputePipeline() = default;
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 8> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
u32 binding{};
for (const auto& buffer : info.buffers) {
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const auto vsharp = buffer.GetVsharp(info);
const u32 size = vsharp.GetSize();
const VAddr address = vsharp.base_address.Value();
const VAddr address = vsharp.base_address;
texture_cache.OnCpuWrite(address);
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()

View file

@ -24,7 +24,7 @@ class ComputePipeline {
public:
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
vk::PipelineCache pipeline_cache, const Shader::Info* info,
vk::ShaderModule module);
u64 compute_key, vk::ShaderModule module);
~ComputePipeline();
[[nodiscard]] vk::Pipeline Handle() const noexcept {
@ -40,6 +40,7 @@ private:
vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout;
u64 compute_key;
Shader::Info info{};
};

View file

@ -47,7 +47,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0,
});
bindings.push_back({
@ -326,8 +326,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const VAddr address = vsharp.base_address.Value();
const auto vsharp = buffer.GetVsharp(stage);
const VAddr address = vsharp.base_address;
const u32 size = vsharp.GetSize();
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()
@ -419,8 +419,7 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
continue;
}
guest_buffers.emplace_back(buffer);
ranges.emplace_back(buffer.base_address.Value(),
buffer.base_address.Value() + buffer.GetSize());
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
}
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
return lhv.base_address < rhv.base_address;

View file

@ -74,12 +74,12 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
available_extensions = GetSupportedExtensions(physical_device);
properties = physical_device.getProperties();
CollectDeviceParameters();
ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion,
"Vulkan {}.{} is required, but only {}.{} is supported by device!",
VK_VERSION_MAJOR(TargetVulkanApiVersion), VK_VERSION_MINOR(TargetVulkanApiVersion),
VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion));
CollectDeviceParameters();
CreateDevice();
CollectToolingInfo();
}
@ -156,6 +156,7 @@ bool Instance::CreateDevice() {
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
@ -208,12 +209,14 @@ bool Instance::CreateDevice() {
.shaderImageGatherExtended = true,
.shaderStorageImageMultisample = true,
.shaderClipDistance = features.shaderClipDistance,
.shaderInt16 = true,
},
},
vk::PhysicalDeviceVulkan11Features{
.shaderDrawParameters = true,
},
vk::PhysicalDeviceVulkan12Features{
.shaderFloat16 = true,
.scalarBlockLayout = true,
.uniformBufferStandardLayout = true,
.hostQueryReset = true,
@ -237,7 +240,12 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
.depthClipControl = true,
},
};
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
.workgroupMemoryExplicitLayout = true,
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
.workgroupMemoryExplicitLayout8BitAccess = true,
.workgroupMemoryExplicitLayout16BitAccess = true,
}};
if (!color_write_en) {
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();

View file

@ -18,6 +18,52 @@ extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
namespace Vulkan {
using Shader::VsOutput;
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
w != VsOutput::None) {
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
}
};
// VS_OUT_MISC_VEC
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
ctl.use_vtx_edge_flag
? VsOutput::EdgeFlag
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
ctl.use_vtx_kill_flag
? VsOutput::KillFlag
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
// VS_OUT_CCDIST0
add_output(ctl.IsClipDistEnabled(0)
? VsOutput::ClipDist0
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
ctl.IsClipDistEnabled(1)
? VsOutput::ClipDist1
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
ctl.IsClipDistEnabled(2)
? VsOutput::ClipDist2
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
ctl.IsClipDistEnabled(3)
? VsOutput::ClipDist3
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
// VS_OUT_CCDIST1
add_output(ctl.IsClipDistEnabled(4)
? VsOutput::ClipDist4
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
ctl.IsClipDistEnabled(5)
? VsOutput::ClipDist5
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
ctl.IsClipDistEnabled(6)
? VsOutput::ClipDist6
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
ctl.IsClipDistEnabled(7)
? VsOutput::ClipDist7
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
}
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{};
@ -26,6 +72,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
switch (stage) {
case Shader::Stage::Vertex: {
info.num_user_data = regs.vs_program.settings.num_user_regs;
BuildVsOutputs(info, regs.vs_output_control);
break;
}
case Shader::Stage::Fragment: {
@ -45,6 +92,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
info.num_user_data = cs_pgm.settings.num_user_regs;
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
cs_pgm.num_thread_z.full};
info.shared_memory_size = cs_pgm.SharedMemSize();
break;
}
default:
@ -60,6 +108,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
profile = Shader::Profile{
.supported_spirv = 0x00010600U,
.support_explicit_workgroup_layout = true,
};
}
@ -153,7 +202,7 @@ void PipelineCache::RefreshGraphicsKey() {
for (u32 i = 0; i < MaxShaderStages; i++) {
auto* pgm = regs.ProgramForStage(i);
if (!pgm || !pgm->Address<u32>()) {
if (!pgm || !pgm->Address<u32*>()) {
key.stage_hashes[i] = 0;
continue;
}
@ -209,7 +258,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
// Recompile shader to IR.
try {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
info.pgm_base = pgm->Address<uintptr_t>();
info.pgm_hash = hash;
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V
@ -247,8 +298,9 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
// Recompile shader to IR.
try {
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
const Shader::Info info =
Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
info.pgm_base = cs_pgm.Address<uintptr_t>();
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V
@ -258,8 +310,11 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv");
}
const auto module = CompileSPV(spv_code, instance.GetDevice());
// Set module name to hash in renderdoc
const auto name = fmt::format("cs_{:#x}", compute_key);
Vulkan::SetObjectName(instance.GetDevice(), module, name);
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache,
&program.info, module);
&program.info, compute_key, module);
} catch (const Shader::Exception& e) {
UNREACHABLE_MSG("{}", e.what());
return nullptr;

View file

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} {
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 3_GB, BufferType::Upload} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
@ -44,11 +44,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
return;
}
UpdateDynamicState(*pipeline);
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
try {
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
} catch (...) {
UNREACHABLE();
}
BeginRendering();
UpdateDynamicState(*pipeline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) {
@ -71,9 +74,14 @@ void Rasterizer::DispatchDirect() {
return;
}
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
if (!has_resources) {
return;
try {
const auto has_resources =
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
if (!has_resources) {
return;
}
} catch (...) {
UNREACHABLE();
}
scheduler.EndRendering();
@ -163,7 +171,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
// Upload index data to stream buffer.
const auto index_address = regs.index_base_address.Address<const void*>();
const u32 index_buffer_size = regs.num_indices * index_size;
const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size;
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
std::memcpy(data, index_address, index_buffer_size);
vertex_index_buffer.Commit(index_buffer_size);

View file

@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
// scheduler.Wait(watch.tick);
scheduler.Wait(watch.tick);
++wait_cursor;
}
}

View file

@ -321,12 +321,15 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
// Copy to the image.
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil
? vk::ImageAspectFlagBits::eDepth
: aspect_mask;
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = info.pitch,
.bufferImageHeight = info.size.height,
.imageSubresource{
.aspectMask = aspect_mask,
.aspectMask = aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,

View file

@ -77,7 +77,6 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
if (usage_override) {
usage_ci.usage = usage_override.value();
}
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
vk::Format format = info.format;
vk::ImageAspectFlags aspect = image.aspect_mask;

View file

@ -134,13 +134,13 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
RegisterImage(image_id);
} else {
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0];
image_id = image_ids[0];
}
RegisterMeta(info, image_id);
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified)) {
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
RefreshImage(image);
TrackImage(image, image_id);
}
@ -193,7 +193,7 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
const ImageId image_id = FindImage(info, buffer.Address(), false);
const ImageId image_id = FindImage(info, buffer.Address());
Image& image = slot_images[image_id];
image.flags &= ~ImageFlagBits::CpuModified;

View file

@ -179,19 +179,19 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR8Unorm:
return vk::Format::eR8Uint;
case vk::Format::eR8G8Unorm:
case vk::Format::eR16Sfloat:
return vk::Format::eR8G8Uint;
case vk::Format::eR8G8B8A8Srgb:
[[fallthrough]];
case vk::Format::eB8G8R8A8Srgb:
[[fallthrough]];
case vk::Format::eB8G8R8A8Unorm:
[[fallthrough]];
case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR32Sfloat:
case vk::Format::eR32Uint:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eR32G32Sfloat:
return vk::Format::eR32G32Uint;
case vk::Format::eBc3SrgbBlock:
[[fallthrough]];
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock: