mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-29 11:06:07 +00:00
shader_recompiler: Small instruction parsing refactor/bugfixes (#340)
* translator: Implemtn f32 to f16 convert * shader_recompiler: Add bit instructions * shader_recompiler: More data share instructions * shader_recompiler: Remove exec contexts, fix S_MOV_B64 * shader_recompiler: Split instruction parsing into categories * shader_recompiler: Better BFS search * shader_recompiler: Constant propagation pass for cmp_class_f32 * shader_recompiler: Partial readfirstlane implementation * shader_recompiler: Stub readlane/writelane only for non-compute * hack: Fix swizzle on RDR * Will properly fix this when merging this * clang format * address_space: Bump user area size to full * shader_recompiler: V_INTERP_MOV_F32 * Should work the same as spirv will emit flat decoration on demand * kernel: Add MAP_OP_MAP_FLEXIBLE * image_view: Attempt to apply storage swizzle on format * vk_scheduler: Barrier attachments on renderpass end * clang format * liverpool: cs state backup * shader_recompiler: More instructions and formats * vector_alu: Proper V_MBCNT_U32_B32 * shader_recompiler: Port some dark souls things * file_system: Implement sceKernelRename * more formats * clang format * resource_tracking_pass: Back to assert * translate: Tracedata * kernel: Remove tracy lock * Solves random crashes in Dark Souls * code: Review comments
This commit is contained in:
parent
ac6dc20c3b
commit
a7c9bfa5c5
|
@ -15,7 +15,7 @@ static u32 screenWidth = 1280;
|
|||
static u32 screenHeight = 720;
|
||||
static s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select
|
||||
static std::string logFilter;
|
||||
static std::string logType = "sync";
|
||||
static std::string logType = "async";
|
||||
static bool isDebugDump = false;
|
||||
static bool isLibc = true;
|
||||
static bool isShowSplash = false;
|
||||
|
|
|
@ -207,8 +207,8 @@ public:
|
|||
message_queue.EmplaceWait(entry);
|
||||
} else {
|
||||
ForEachBackend([&entry](auto& backend) { backend.Write(entry); });
|
||||
std::fflush(stdout);
|
||||
}
|
||||
std::fflush(stdout);
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -34,10 +34,7 @@ constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
|
|||
|
||||
static constexpr size_t SystemManagedSize = SYSTEM_MANAGED_MAX - SYSTEM_MANAGED_MIN + 1;
|
||||
static constexpr size_t SystemReservedSize = SYSTEM_RESERVED_MAX - SYSTEM_RESERVED_MIN + 1;
|
||||
// User area size is normally larger than this. However games are unlikely to map to high
|
||||
// regions of that area, so by default we allocate a smaller virtual address space (about 1/4th).
|
||||
// to save space on page tables.
|
||||
static constexpr size_t UserSize = 1ULL << 39;
|
||||
static constexpr size_t UserSize = 1ULL << 40;
|
||||
|
||||
/**
|
||||
* Represents the user virtual address space backed by a dmem memory block
|
||||
|
|
|
@ -70,7 +70,7 @@ std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory)
|
|||
// exist in filesystem but in different case.
|
||||
auto guest_path = current_path;
|
||||
while (!path_parts.empty()) {
|
||||
const auto& part = path_parts.back();
|
||||
const auto part = path_parts.back();
|
||||
const auto add_match = [&](const auto& host_part) {
|
||||
current_path /= host_part;
|
||||
guest_path /= part;
|
||||
|
|
|
@ -957,7 +957,7 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
LOG_DEBUG(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -472,6 +472,28 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {
|
|||
return file->f.WriteRaw<u8>(buf, nbytes);
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelRename(const char* from, const char* to) {
|
||||
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||
const auto src_path = mnt->GetHostPath(from);
|
||||
if (!std::filesystem::exists(src_path)) {
|
||||
return ORBIS_KERNEL_ERROR_ENOENT;
|
||||
}
|
||||
const auto dst_path = mnt->GetHostPath(to);
|
||||
const bool src_is_dir = std::filesystem::is_directory(src_path);
|
||||
const bool dst_is_dir = std::filesystem::is_directory(dst_path);
|
||||
if (src_is_dir && !dst_is_dir) {
|
||||
return ORBIS_KERNEL_ERROR_ENOTDIR;
|
||||
}
|
||||
if (!src_is_dir && dst_is_dir) {
|
||||
return ORBIS_KERNEL_ERROR_EISDIR;
|
||||
}
|
||||
if (dst_is_dir && !std::filesystem::is_empty(dst_path)) {
|
||||
return ORBIS_KERNEL_ERROR_ENOTEMPTY;
|
||||
}
|
||||
std::filesystem::copy(src_path, dst_path, std::filesystem::copy_options::overwrite_existing);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
||||
std::srand(std::time(nullptr));
|
||||
LIB_FUNCTION("1G3lF1Gg1k8", "libkernel", 1, "libkernel", 1, 1, sceKernelOpen);
|
||||
|
@ -493,6 +515,7 @@ void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("kBwCPsYX-m4", "libkernel", 1, "libkernel", 1, 1, sceKernelFStat);
|
||||
LIB_FUNCTION("mqQMh1zPPT8", "libScePosix", 1, "libkernel", 1, 1, posix_fstat);
|
||||
LIB_FUNCTION("VW3TVZiM4-E", "libkernel", 1, "libkernel", 1, 1, sceKernelFtruncate);
|
||||
LIB_FUNCTION("52NcYU9+lEo", "libkernel", 1, "libkernel", 1, 1, sceKernelRename);
|
||||
|
||||
LIB_FUNCTION("E6ao34wPw+U", "libScePosix", 1, "libkernel", 1, 1, posix_stat);
|
||||
LIB_FUNCTION("+r3rMFwItV4", "libkernel", 1, "libkernel", 1, 1, sceKernelPread);
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <boost/asio/io_context.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/singleton.h"
|
||||
|
@ -84,6 +85,9 @@ static PS4_SYSV_ABI void stack_chk_fail() {
|
|||
|
||||
int PS4_SYSV_ABI sceKernelMunmap(void* addr, size_t len) {
|
||||
LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}", fmt::ptr(addr), len);
|
||||
if (len == 0) {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
auto* memory = Core::Memory::Instance();
|
||||
memory->UnmapMemory(std::bit_cast<VAddr>(addr), len);
|
||||
return SCE_OK;
|
||||
|
|
|
@ -262,6 +262,16 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn
|
|||
LOG_INFO(Kernel_Vmm, "BatchMap: entry = {}, operation = {}, len = {:#x}, result = {}",
|
||||
i, entries[i].operation, entries[i].length, result);
|
||||
|
||||
if (result == 0)
|
||||
processed++;
|
||||
} else if (entries[i].operation == MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE) {
|
||||
result = sceKernelMapNamedFlexibleMemory(&entries[i].start, entries[i].length,
|
||||
entries[i].protection, flags, "");
|
||||
LOG_INFO(Kernel_Vmm,
|
||||
"BatchMap: entry = {}, operation = {}, len = {:#x}, type = {}, "
|
||||
"result = {}",
|
||||
i, entries[i].operation, entries[i].length, (u8)entries[i].type, result);
|
||||
|
||||
if (result == 0)
|
||||
processed++;
|
||||
} else {
|
||||
|
|
|
@ -439,11 +439,7 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut
|
|||
|
||||
int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr);
|
||||
|
||||
static auto mutex_loc = MUTEX_LOCATION("mutex");
|
||||
(*mutex)->tracy_lock = std::make_unique<tracy::LockableCtx>(&mutex_loc);
|
||||
|
||||
if (name != nullptr) {
|
||||
(*mutex)->tracy_lock->CustomName(name, std::strlen(name));
|
||||
LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result);
|
||||
}
|
||||
|
||||
|
@ -555,15 +551,11 @@ int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) {
|
|||
return SCE_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
(*mutex)->tracy_lock->BeforeLock();
|
||||
|
||||
int result = pthread_mutex_lock(&(*mutex)->pth_mutex);
|
||||
if (result != 0) {
|
||||
LOG_TRACE(Kernel_Pthread, "Locked name={}, result={}", (*mutex)->name, result);
|
||||
}
|
||||
|
||||
(*mutex)->tracy_lock->AfterLock();
|
||||
|
||||
switch (result) {
|
||||
case 0:
|
||||
return SCE_OK;
|
||||
|
@ -589,8 +581,6 @@ int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) {
|
|||
LOG_TRACE(Kernel_Pthread, "Unlocking name={}, result={}", (*mutex)->name, result);
|
||||
}
|
||||
|
||||
(*mutex)->tracy_lock->AfterUnlock();
|
||||
|
||||
switch (result) {
|
||||
case 0:
|
||||
return SCE_OK;
|
||||
|
@ -1195,8 +1185,6 @@ int PS4_SYSV_ABI scePthreadMutexTrylock(ScePthreadMutex* mutex) {
|
|||
LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
||||
}
|
||||
|
||||
(*mutex)->tracy_lock->AfterTryLock(result == 0);
|
||||
|
||||
switch (result) {
|
||||
case 0:
|
||||
return ORBIS_OK;
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include <vector>
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include "common/debug.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Core::Loader {
|
||||
|
@ -74,7 +73,6 @@ struct PthreadMutexInternal {
|
|||
u8 reserved[256];
|
||||
std::string name;
|
||||
pthread_mutex_t pth_mutex;
|
||||
std::unique_ptr<tracy::LockableCtx> tracy_lock;
|
||||
};
|
||||
|
||||
struct PthreadMutexattrInternal {
|
||||
|
|
|
@ -559,7 +559,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNetEpollWait() {
|
||||
LOG_ERROR(Lib_Net, "(STUBBED) called");
|
||||
LOG_TRACE(Lib_Net, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ int PS4_SYSV_ABI sceNetCtlUnregisterCallbackV6() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNetCtlCheckCallback() {
|
||||
LOG_ERROR(Lib_NetCtl, "(STUBBED) called");
|
||||
LOG_TRACE(Lib_NetCtl, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -870,7 +870,7 @@ int PS4_SYSV_ABI sceNpAsmTerminate() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNpCheckCallback() {
|
||||
LOG_ERROR(Lib_NpManager, "(STUBBED) called");
|
||||
LOG_TRACE(Lib_NpManager, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -3510,4 +3510,4 @@ void RegisterlibSceNpManager(Core::Loader::SymbolsResolver* sym) {
|
|||
sceNpUnregisterStateCallbackForToolkit);
|
||||
};
|
||||
|
||||
} // namespace Libraries::NpManager
|
||||
} // namespace Libraries::NpManager
|
||||
|
|
|
@ -183,6 +183,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
ctx.AddCapability(spv::Capability::Float16);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::Int64);
|
||||
if (info.has_storage_images) {
|
||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||
}
|
||||
|
@ -204,8 +205,8 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
} else {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
if (info.uses_group_quad) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||
}
|
||||
if (info.has_discard) {
|
||||
|
@ -217,9 +218,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
if (info.has_image_query) {
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
}
|
||||
// if (program.info.stores_frag_depth) {
|
||||
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
// }
|
||||
if (info.stores.Get(IR::Attribute::Depth)) {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
void EmitBitCastU16F16(EmitContext&) {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
Id EmitBitCastU16F16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpBitcast(ctx.U16, value);
|
||||
}
|
||||
|
||||
Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
|
||||
|
|
|
@ -120,6 +120,7 @@ void EmitGetGotoVariable(EmitContext&) {
|
|||
}
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx) {
|
||||
return ctx.u32_zero_value;
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
|
@ -149,6 +150,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
// Attribute is disabled or varying component is not written
|
||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||
}
|
||||
if (param.is_default) {
|
||||
return ctx.OpCompositeExtract(param.component_type, param.id, comp);
|
||||
}
|
||||
|
||||
if (param.num_components > 1) {
|
||||
const Id pointer{
|
||||
|
@ -208,7 +212,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
|
||||
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
||||
ctx.OpStore(pointer, value);
|
||||
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
|
|
|
@ -259,4 +259,8 @@ Id EmitConvertU16U32(EmitContext& ctx, Id value) {
|
|||
return ctx.OpUConvert(ctx.U16, value);
|
||||
}
|
||||
|
||||
Id EmitConvertU32U16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpUConvert(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -385,4 +385,8 @@ Id EmitFPIsInf64(EmitContext& ctx, Id value) {
|
|||
return ctx.OpIsInf(ctx.U1[1], value);
|
||||
}
|
||||
|
||||
void EmitFPCmpClass32(EmitContext&) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -70,7 +70,6 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id o
|
|||
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
|
||||
ImageOperands operands;
|
||||
operands.Add(spv::ImageOperandsMask::Offset, offset);
|
||||
operands.Add(spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
|
||||
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
@ -106,8 +105,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
|
|||
const auto type = ctx.info.images[handle & 0xFFFF].type;
|
||||
const Id zero = ctx.u32_zero_value;
|
||||
const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }};
|
||||
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa &&
|
||||
type != AmdGpu::ImageType::Buffer};
|
||||
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa};
|
||||
const auto query{[&](Id type) {
|
||||
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
|
||||
: ctx.OpImageQuerySize(type, image);
|
||||
|
|
|
@ -42,6 +42,7 @@ void EmitSetVcc(EmitContext& ctx);
|
|||
void EmitSetSccLo(EmitContext& ctx);
|
||||
void EmitSetVccLo(EmitContext& ctx);
|
||||
void EmitSetVccHi(EmitContext& ctx);
|
||||
void EmitFPCmpClass32(EmitContext& ctx);
|
||||
void EmitPrologue(EmitContext& ctx);
|
||||
void EmitEpilogue(EmitContext& ctx);
|
||||
void EmitDiscard(EmitContext& ctx);
|
||||
|
@ -148,7 +149,7 @@ Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
|||
Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
||||
Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
||||
Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
||||
void EmitBitCastU16F16(EmitContext& ctx);
|
||||
Id EmitBitCastU16F16(EmitContext& ctx, Id value);
|
||||
Id EmitBitCastU32F32(EmitContext& ctx, Id value);
|
||||
void EmitBitCastU64F64(EmitContext& ctx);
|
||||
Id EmitBitCastF16U16(EmitContext& ctx, Id value);
|
||||
|
@ -282,6 +283,7 @@ Id EmitBitCount32(EmitContext& ctx, Id value);
|
|||
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
|
||||
Id EmitFindSMsb32(EmitContext& ctx, Id value);
|
||||
Id EmitFindUMsb32(EmitContext& ctx, Id value);
|
||||
Id EmitFindILsb32(EmitContext& ctx, Id value);
|
||||
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitUMin32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSMax32(EmitContext& ctx, Id a, Id b);
|
||||
|
@ -353,6 +355,7 @@ Id EmitConvertF64U16(EmitContext& ctx, Id value);
|
|||
Id EmitConvertF64U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU16U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU32U16(EmitContext& ctx, Id value);
|
||||
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset);
|
||||
|
@ -387,6 +390,7 @@ Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
|||
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx);
|
||||
Id EmitWarpId(EmitContext& ctx);
|
||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -198,6 +198,10 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) {
|
|||
return ctx.OpFindUMsb(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
Id EmitFindILsb32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFindILsb(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpSMin(ctx.U32[1], a, b);
|
||||
}
|
||||
|
|
|
@ -10,6 +10,10 @@ Id SubgroupScope(EmitContext& ctx) {
|
|||
return ctx.ConstU32(static_cast<u32>(spv::Scope::Subgroup));
|
||||
}
|
||||
|
||||
Id EmitWarpId(EmitContext& ctx) {
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_id);
|
||||
}
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx) {
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
|
|||
DefineInterfaces(program);
|
||||
DefineBuffers(info);
|
||||
DefineImagesAndSamplers(info);
|
||||
DefineSharedMemory(info);
|
||||
DefineSharedMemory();
|
||||
}
|
||||
|
||||
EmitContext::~EmitContext() = default;
|
||||
|
@ -86,6 +86,7 @@ void EmitContext::DefineArithmeticTypes() {
|
|||
F32[1] = Name(TypeFloat(32), "f32_id");
|
||||
S32[1] = Name(TypeSInt(32), "i32_id");
|
||||
U32[1] = Name(TypeUInt(32), "u32_id");
|
||||
U64 = Name(TypeUInt(64), "u64_id");
|
||||
|
||||
for (u32 i = 2; i <= 4; i++) {
|
||||
if (info.uses_fp16) {
|
||||
|
@ -126,6 +127,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return ctx.F32[4];
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return ctx.S32[4];
|
||||
|
@ -146,6 +148,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return {id, input_f32, F32[1], 4};
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return {id, input_u32, U32[1], 4};
|
||||
|
@ -204,7 +207,9 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
: 1;
|
||||
// Note that we pass index rather than Id
|
||||
input_params[input.binding] = {
|
||||
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
|
||||
rate_idx, input_u32,
|
||||
U32[1], input.num_components,
|
||||
false, input.instance_data_buf,
|
||||
};
|
||||
} else {
|
||||
Id id{DefineInput(type, input.binding)};
|
||||
|
@ -220,19 +225,18 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
if (info.uses_group_quad) {
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
subgroup_id = DefineVariable(U32[1], spv::BuiltIn::SubgroupId, spv::StorageClass::Input);
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
for (const auto& input : info.ps_inputs) {
|
||||
const u32 semantic = input.param_index;
|
||||
if (input.is_default) {
|
||||
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), input_f32,
|
||||
F32[1]};
|
||||
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1],
|
||||
F32[1], 4, true};
|
||||
continue;
|
||||
}
|
||||
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
|
||||
|
@ -392,7 +396,16 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
||||
return spv::ImageFormat::Rgba8ui;
|
||||
}
|
||||
UNREACHABLE();
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format10_11_11 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::R11fG11fB10f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::Rgba32f;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown storage format data_format={}, num_format={}", image.GetDataFmt(),
|
||||
image.GetNumberFmt());
|
||||
}
|
||||
|
||||
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||
|
@ -412,8 +425,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
|
||||
case AmdGpu::ImageType::Cube:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format);
|
||||
case AmdGpu::ImageType::Buffer:
|
||||
throw NotImplementedException("Image buffer");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -471,10 +482,14 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineSharedMemory(const Info& info) {
|
||||
if (info.shared_memory_size == 0) {
|
||||
void EmitContext::DefineSharedMemory() {
|
||||
static constexpr size_t DefaultSharedMemSize = 16_KB;
|
||||
if (!info.uses_shared) {
|
||||
return;
|
||||
}
|
||||
if (info.shared_memory_size == 0) {
|
||||
info.shared_memory_size = DefaultSharedMemSize;
|
||||
}
|
||||
const auto make{[&](Id element_type, u32 element_size) {
|
||||
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
|
||||
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
||||
|
|
|
@ -180,6 +180,7 @@ public:
|
|||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
Id subgroup_id{};
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id image_u32{};
|
||||
|
||||
|
@ -219,6 +220,7 @@ public:
|
|||
Id pointer_type;
|
||||
Id component_type;
|
||||
u32 num_components;
|
||||
bool is_default{};
|
||||
s32 buffer_handle{-1};
|
||||
};
|
||||
std::array<SpirvAttribute, 32> input_params{};
|
||||
|
@ -231,7 +233,7 @@ private:
|
|||
void DefineOutputs(const Info& info);
|
||||
void DefineBuffers(const Info& info);
|
||||
void DefineImagesAndSamplers(const Info& info);
|
||||
void DefineSharedMemory(const Info& info);
|
||||
void DefineSharedMemory();
|
||||
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||
};
|
||||
|
|
|
@ -1479,7 +1479,7 @@ constexpr std::array<InstFormat, 455> InstructionFormatVOP3 = {{
|
|||
{InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 337 = V_MIN3_F32
|
||||
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
|
||||
{InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 338 = V_MIN3_I32
|
||||
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32,
|
||||
|
@ -1488,7 +1488,7 @@ constexpr std::array<InstFormat, 455> InstructionFormatVOP3 = {{
|
|||
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 340 = V_MAX3_F32
|
||||
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
|
||||
{InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 341 = V_MAX3_I32
|
||||
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32,
|
||||
|
@ -1497,7 +1497,7 @@ constexpr std::array<InstFormat, 455> InstructionFormatVOP3 = {{
|
|||
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32,
|
||||
ScalarType::Uint32},
|
||||
// 343 = V_MED3_F32
|
||||
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
|
||||
{InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
|
||||
ScalarType::Float32},
|
||||
// 344 = V_MED3_I32
|
||||
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32,
|
||||
|
@ -2779,11 +2779,9 @@ constexpr std::array<InstFormat, 256> InstructionFormatDS = {{
|
|||
// 60 = DS_READ_U16
|
||||
{InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32},
|
||||
// 61 = DS_CONSUME
|
||||
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32},
|
||||
// 62 = DS_APPEND
|
||||
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32},
|
||||
// 63 = DS_ORDERED_COUNT
|
||||
{InstClass::GdsOrdCnt, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
|
||||
ScalarType::Undefined},
|
||||
|
|
|
@ -76,11 +76,11 @@ struct SMRD {
|
|||
};
|
||||
|
||||
struct InstControlSOPK {
|
||||
BitField<0, 16, u32> simm;
|
||||
s16 simm;
|
||||
};
|
||||
|
||||
struct InstControlSOPP {
|
||||
BitField<0, 16, u32> simm;
|
||||
s16 simm;
|
||||
};
|
||||
|
||||
struct InstControlVOP3 {
|
||||
|
|
|
@ -600,13 +600,13 @@ public:
|
|||
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
|
||||
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
||||
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
|
||||
Info& info_)
|
||||
Info& info_, const Profile& profile_)
|
||||
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} {
|
||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} {
|
||||
Visit(root_stmt, nullptr, nullptr);
|
||||
|
||||
IR::Block& first_block{*syntax_list.front().data.block};
|
||||
Translator{&first_block, info}.EmitPrologue();
|
||||
Translator{&first_block, info, profile}.EmitPrologue();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -635,7 +635,7 @@ private:
|
|||
const u32 start = stmt.block->begin_index;
|
||||
const u32 size = stmt.block->end_index - start + 1;
|
||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
||||
info);
|
||||
info, profile);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -815,16 +815,18 @@ private:
|
|||
const Block dummy_flow_block{.is_dummy = true};
|
||||
std::span<const GcnInst> inst_list;
|
||||
Info& info;
|
||||
const Profile& profile;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
CFG& cfg, Info& info) {
|
||||
CFG& cfg, Info& info, const Profile& profile) {
|
||||
ObjectPool<Statement> stmt_pool{64};
|
||||
GotoPass goto_pass{cfg, stmt_pool};
|
||||
Statement& root{goto_pass.RootStatement()};
|
||||
IR::AbstractSyntaxList syntax_list;
|
||||
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
|
||||
TranslatePass{inst_pool, block_pool, stmt_pool, root,
|
||||
syntax_list, cfg.inst_list, info, profile};
|
||||
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
|
||||
return syntax_list;
|
||||
}
|
||||
|
|
|
@ -11,12 +11,13 @@
|
|||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
}
|
||||
struct Profile;
|
||||
} // namespace Shader
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
||||
Info& info);
|
||||
Info& info, const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -5,6 +5,31 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitDataShare(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::DS_SWIZZLE_B32:
|
||||
return DS_SWIZZLE_B32(inst);
|
||||
case Opcode::DS_READ_B32:
|
||||
return DS_READ(32, false, false, inst);
|
||||
case Opcode::DS_READ_B64:
|
||||
return DS_READ(64, false, false, inst);
|
||||
case Opcode::DS_READ2_B32:
|
||||
return DS_READ(32, false, true, inst);
|
||||
case Opcode::DS_READ2_B64:
|
||||
return DS_READ(64, false, true, inst);
|
||||
case Opcode::DS_WRITE_B32:
|
||||
return DS_WRITE(32, false, false, inst);
|
||||
case Opcode::DS_WRITE_B64:
|
||||
return DS_WRITE(64, false, false, inst);
|
||||
case Opcode::DS_WRITE2_B32:
|
||||
return DS_WRITE(32, false, true, inst);
|
||||
case Opcode::DS_WRITE2_B64:
|
||||
return DS_WRITE(64, false, true, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
||||
const u8 offset0 = inst.control.ds.offset0;
|
||||
const u8 offset1 = inst.control.ds.offset1;
|
||||
|
@ -20,14 +45,25 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
|||
|
||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) {
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
if (is_pair) {
|
||||
// Pair loads are either 32 or 64-bit. We assume 32-bit for now.
|
||||
ASSERT(bit_size == 32);
|
||||
// Pair loads are either 32 or 64-bit
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)});
|
||||
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
|
||||
if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
||||
} else {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)});
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)});
|
||||
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
||||
if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
||||
} else {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
|
||||
}
|
||||
} else if (bit_size == 64) {
|
||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
||||
|
@ -43,11 +79,22 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
|||
const IR::VectorReg data0{inst.src[1].code};
|
||||
const IR::VectorReg data1{inst.src[2].code};
|
||||
if (is_pair) {
|
||||
ASSERT(bit_size == 32);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
} else {
|
||||
ir.WriteShared(
|
||||
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
|
||||
addr0);
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||
if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||
} else {
|
||||
ir.WriteShared(
|
||||
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)),
|
||||
addr1);
|
||||
}
|
||||
} else if (bit_size == 64) {
|
||||
const IR::Value data =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||
|
@ -62,7 +109,18 @@ void Translator::S_BARRIER() {
|
|||
}
|
||||
|
||||
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||
UNREACHABLE();
|
||||
ASSERT(info.stage != Stage::Compute);
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
}
|
||||
|
||||
void Translator::V_READLANE_B32(const GcnInst& inst) {
|
||||
ASSERT(info.stage != Stage::Compute);
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
}
|
||||
|
||||
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
||||
ASSERT(info.stage != Stage::Compute);
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EXP(const GcnInst& inst) {
|
||||
void Translator::EmitExport(const GcnInst& inst) {
|
||||
if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) {
|
||||
LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation");
|
||||
ir.Discard(ir.LogicalNot(ir.GetExec()));
|
||||
|
|
|
@ -5,8 +5,102 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOVK_I32:
|
||||
return S_MOVK(inst);
|
||||
case Opcode::S_MOV_B32:
|
||||
return S_MOV(inst);
|
||||
case Opcode::S_MUL_I32:
|
||||
return S_MUL_I32(inst);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_AND_SAVEEXEC_B64(inst);
|
||||
case Opcode::S_MOV_B64:
|
||||
return S_MOV_B64(inst);
|
||||
case Opcode::S_CMP_LT_U32:
|
||||
return S_CMP(ConditionOp::LT, false, inst);
|
||||
case Opcode::S_CMP_LE_U32:
|
||||
return S_CMP(ConditionOp::LE, false, inst);
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
return S_CMP(ConditionOp::LG, false, inst);
|
||||
case Opcode::S_CMP_LT_I32:
|
||||
return S_CMP(ConditionOp::LT, true, inst);
|
||||
case Opcode::S_CMP_LG_I32:
|
||||
return S_CMP(ConditionOp::LG, true, inst);
|
||||
case Opcode::S_CMP_GT_I32:
|
||||
return S_CMP(ConditionOp::GT, true, inst);
|
||||
case Opcode::S_CMP_GE_I32:
|
||||
return S_CMP(ConditionOp::GE, true, inst);
|
||||
case Opcode::S_CMP_EQ_I32:
|
||||
return S_CMP(ConditionOp::EQ, true, inst);
|
||||
case Opcode::S_CMP_EQ_U32:
|
||||
return S_CMP(ConditionOp::EQ, false, inst);
|
||||
case Opcode::S_CMP_GE_U32:
|
||||
return S_CMP(ConditionOp::GE, false, inst);
|
||||
case Opcode::S_CMP_GT_U32:
|
||||
return S_CMP(ConditionOp::GT, false, inst);
|
||||
case Opcode::S_OR_B64:
|
||||
return S_OR_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_NOR_B64:
|
||||
return S_OR_B64(NegateMode::Result, false, inst);
|
||||
case Opcode::S_XOR_B64:
|
||||
return S_OR_B64(NegateMode::None, true, inst);
|
||||
case Opcode::S_ORN2_B64:
|
||||
return S_OR_B64(NegateMode::Src1, false, inst);
|
||||
case Opcode::S_AND_B64:
|
||||
return S_AND_B64(NegateMode::None, inst);
|
||||
case Opcode::S_NAND_B64:
|
||||
return S_AND_B64(NegateMode::Result, inst);
|
||||
case Opcode::S_ANDN2_B64:
|
||||
return S_AND_B64(NegateMode::Src1, inst);
|
||||
case Opcode::S_NOT_B64:
|
||||
return S_NOT_B64(inst);
|
||||
case Opcode::S_ADD_I32:
|
||||
return S_ADD_I32(inst);
|
||||
case Opcode::S_AND_B32:
|
||||
return S_AND_B32(inst);
|
||||
case Opcode::S_ASHR_I32:
|
||||
return S_ASHR_I32(inst);
|
||||
case Opcode::S_OR_B32:
|
||||
return S_OR_B32(inst);
|
||||
case Opcode::S_LSHL_B32:
|
||||
return S_LSHL_B32(inst);
|
||||
case Opcode::S_LSHR_B32:
|
||||
return S_LSHR_B32(inst);
|
||||
case Opcode::S_CSELECT_B32:
|
||||
return S_CSELECT_B32(inst);
|
||||
case Opcode::S_CSELECT_B64:
|
||||
return S_CSELECT_B64(inst);
|
||||
case Opcode::S_BFE_U32:
|
||||
return S_BFE_U32(inst);
|
||||
case Opcode::S_BFM_B32:
|
||||
return S_BFM_B32(inst);
|
||||
case Opcode::S_BREV_B32:
|
||||
return S_BREV_B32(inst);
|
||||
case Opcode::S_ADD_U32:
|
||||
return S_ADD_U32(inst);
|
||||
case Opcode::S_ADDC_U32:
|
||||
return S_ADDC_U32(inst);
|
||||
case Opcode::S_ADDK_I32:
|
||||
return S_ADDK_I32(inst);
|
||||
case Opcode::S_MULK_I32:
|
||||
return S_MULK_I32(inst);
|
||||
case Opcode::S_SUB_U32:
|
||||
case Opcode::S_SUB_I32:
|
||||
return S_SUB_U32(inst);
|
||||
case Opcode::S_MIN_U32:
|
||||
return S_MIN_U32(inst);
|
||||
case Opcode::S_MAX_U32:
|
||||
return S_MAX_U32(inst);
|
||||
case Opcode::S_WQM_B64:
|
||||
break;
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_MOVK(const GcnInst& inst) {
|
||||
const auto simm16 = inst.control.sopk.simm.Value();
|
||||
const auto simm16 = inst.control.sopk.simm;
|
||||
if (simm16 & (1 << 15)) {
|
||||
// TODO: need to verify the case of imm sign extension
|
||||
UNREACHABLE();
|
||||
|
@ -14,6 +108,16 @@ void Translator::S_MOVK(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.Imm32(simm16));
|
||||
}
|
||||
|
||||
void Translator::S_ADDK_I32(const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
|
||||
}
|
||||
|
||||
void Translator::S_MULK_I32(const GcnInst& inst) {
|
||||
const s32 simm16 = inst.control.sopk.simm;
|
||||
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
|
||||
}
|
||||
|
||||
void Translator::S_MOV(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
}
|
||||
|
@ -62,15 +166,10 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
|||
}
|
||||
}();
|
||||
|
||||
// Mark destination SPGR as an EXEC context. This means we will use 1-bit
|
||||
// IR instruction whenever it's loaded.
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::ScalarGPR: {
|
||||
const u32 reg = inst.dst[0].code;
|
||||
exec_contexts[reg] = true;
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(reg), exec);
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), exec);
|
||||
break;
|
||||
}
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(exec);
|
||||
break;
|
||||
|
@ -79,27 +178,37 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
// Update EXEC.
|
||||
ir.SetExec(ir.LogicalAnd(exec, src));
|
||||
const IR::U1 result = ir.LogicalAnd(exec, src);
|
||||
ir.SetExec(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||
// TODO: Using VCC as EXEC context.
|
||||
if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) {
|
||||
return;
|
||||
}
|
||||
if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) {
|
||||
// Exec context push
|
||||
exec_contexts[inst.dst[0].code] = true;
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), ir.GetExec());
|
||||
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
||||
inst.src[0].field == OperandField::ScalarGPR) {
|
||||
// Exec context pop
|
||||
exec_contexts[inst.src[0].code] = false;
|
||||
ir.SetExec(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)));
|
||||
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
||||
inst.src[0].field == OperandField::ConstZero) {
|
||||
ir.SetExec(ir.Imm1(false));
|
||||
} else {
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||
case OperandField::ConstZero:
|
||||
return ir.Imm1(false);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), src);
|
||||
break;
|
||||
case OperandField::ExecLo:
|
||||
ir.SetExec(src);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(src);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
@ -338,4 +447,20 @@ void Translator::S_ADDC_U32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
|
||||
}
|
||||
|
||||
void Translator::S_MAX_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result = ir.UMax(src0, src1);
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.IEqual(result, src0));
|
||||
}
|
||||
|
||||
void Translator::S_MIN_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result = ir.UMin(src0, src1);
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.IEqual(result, src0));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -7,6 +7,29 @@ namespace Shader::Gcn {
|
|||
|
||||
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
||||
|
||||
void Translator::EmitScalarMemory(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_LOAD_DWORDX4:
|
||||
return S_LOAD_DWORD(4, inst);
|
||||
case Opcode::S_LOAD_DWORDX8:
|
||||
return S_LOAD_DWORD(8, inst);
|
||||
case Opcode::S_LOAD_DWORDX16:
|
||||
return S_LOAD_DWORD(16, inst);
|
||||
case Opcode::S_BUFFER_LOAD_DWORD:
|
||||
return S_BUFFER_LOAD_DWORD(1, inst);
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX2:
|
||||
return S_BUFFER_LOAD_DWORD(2, inst);
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX4:
|
||||
return S_BUFFER_LOAD_DWORD(4, inst);
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX8:
|
||||
return S_BUFFER_LOAD_DWORD(8, inst);
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX16:
|
||||
return S_BUFFER_LOAD_DWORD(16, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const u32 dword_offset = [&] -> u32 {
|
||||
|
|
|
@ -16,13 +16,10 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
std::array<bool, IR::NumScalarRegs> Translator::exec_contexts{};
|
||||
|
||||
Translator::Translator(IR::Block* block_, Info& info_)
|
||||
: ir{*block_, block_->begin()}, info{info_} {}
|
||||
Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_)
|
||||
: ir{*block_, block_->begin()}, info{info_}, profile{profile_} {}
|
||||
|
||||
void Translator::EmitPrologue() {
|
||||
exec_contexts.fill(false);
|
||||
ir.Prologue();
|
||||
ir.SetExec(ir.Imm1(true));
|
||||
|
||||
|
@ -97,7 +94,7 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
}
|
||||
break;
|
||||
case OperandField::ConstZero:
|
||||
if (force_flt) {
|
||||
if (is_float) {
|
||||
value = ir.Imm32(0.f);
|
||||
} else {
|
||||
value = ir.Imm32(0U);
|
||||
|
@ -112,14 +109,14 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
|
||||
break;
|
||||
case OperandField::LiteralConst:
|
||||
if (force_flt) {
|
||||
if (is_float) {
|
||||
value = ir.Imm32(std::bit_cast<float>(operand.code));
|
||||
} else {
|
||||
value = ir.Imm32(operand.code);
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_1_0:
|
||||
if (force_flt) {
|
||||
if (is_float) {
|
||||
value = ir.Imm32(1.f);
|
||||
} else {
|
||||
value = ir.Imm32(std::bit_cast<u32>(1.f));
|
||||
|
@ -138,7 +135,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
value = ir.Imm32(-0.5f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_1_0:
|
||||
value = ir.Imm32(-1.0f);
|
||||
if (is_float) {
|
||||
value = ir.Imm32(-1.0f);
|
||||
} else {
|
||||
value = ir.Imm32(std::bit_cast<u32>(-1.0f));
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_2_0:
|
||||
value = ir.Imm32(-2.0f);
|
||||
|
@ -160,6 +161,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
value = ir.GetVccHi();
|
||||
}
|
||||
break;
|
||||
case OperandField::M0:
|
||||
return m0_value;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -336,6 +339,7 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
|||
case OperandField::VccHi:
|
||||
return ir.SetVccHi(result);
|
||||
case OperandField::M0:
|
||||
m0_value = result;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -458,712 +462,84 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
|
||||
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_BARRIER:
|
||||
return S_BARRIER();
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
return;
|
||||
case Opcode::S_GETPC_B64:
|
||||
return S_GETPC_B64(pc, inst);
|
||||
case Opcode::S_WAITCNT:
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_ENDPGM:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_CBRANCH_VCCNZ:
|
||||
case Opcode::S_CBRANCH_VCCZ:
|
||||
case Opcode::S_BRANCH:
|
||||
return;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::LogMissingOpcode(const GcnInst& inst) {
|
||||
const u32 opcode = u32(inst.opcode);
|
||||
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})",
|
||||
magic_enum::enum_name(inst.opcode), u32(inst.opcode),
|
||||
magic_enum::enum_name(inst.category));
|
||||
info.translation_failed = true;
|
||||
}
|
||||
|
||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
|
||||
const Profile& profile) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
Translator translator{block, info};
|
||||
Translator translator{block, info, profile};
|
||||
for (const auto& inst : inst_list) {
|
||||
block_base += inst.length;
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOVK_I32:
|
||||
translator.S_MOVK(inst);
|
||||
break;
|
||||
case Opcode::S_MOV_B32:
|
||||
translator.S_MOV(inst);
|
||||
break;
|
||||
case Opcode::S_MUL_I32:
|
||||
translator.S_MUL_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MAD_F32:
|
||||
translator.V_MAD_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MOV_B32:
|
||||
translator.V_MOV(inst);
|
||||
break;
|
||||
case Opcode::V_MAC_F32:
|
||||
translator.V_MAC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_F32:
|
||||
translator.V_MUL_F32(inst);
|
||||
break;
|
||||
case Opcode::V_AND_B32:
|
||||
translator.V_AND_B32(inst);
|
||||
break;
|
||||
case Opcode::V_OR_B32:
|
||||
translator.V_OR_B32(false, inst);
|
||||
break;
|
||||
case Opcode::V_XOR_B32:
|
||||
translator.V_OR_B32(true, inst);
|
||||
break;
|
||||
case Opcode::V_LSHLREV_B32:
|
||||
translator.V_LSHLREV_B32(inst);
|
||||
break;
|
||||
case Opcode::V_ADD_I32:
|
||||
translator.V_ADD_I32(inst);
|
||||
break;
|
||||
case Opcode::V_ADDC_U32:
|
||||
translator.V_ADDC_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_I32:
|
||||
translator.V_CVT_F32_I32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_U32:
|
||||
translator.V_CVT_F32_U32(inst);
|
||||
break;
|
||||
case Opcode::V_RCP_F32:
|
||||
translator.V_RCP_F32(inst);
|
||||
break;
|
||||
case Opcode::S_SWAPPC_B64:
|
||||
pc += inst.length;
|
||||
|
||||
// Special case for emitting fetch shader.
|
||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||
ASSERT(info.stage == Stage::Vertex);
|
||||
translator.EmitFetch(inst);
|
||||
break;
|
||||
case Opcode::S_WAITCNT:
|
||||
break;
|
||||
case Opcode::S_LOAD_DWORDX4:
|
||||
translator.S_LOAD_DWORD(4, inst);
|
||||
break;
|
||||
case Opcode::S_LOAD_DWORDX8:
|
||||
translator.S_LOAD_DWORD(8, inst);
|
||||
break;
|
||||
case Opcode::S_LOAD_DWORDX16:
|
||||
translator.S_LOAD_DWORD(16, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORD:
|
||||
translator.S_BUFFER_LOAD_DWORD(1, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX2:
|
||||
translator.S_BUFFER_LOAD_DWORD(2, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX4:
|
||||
translator.S_BUFFER_LOAD_DWORD(4, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX8:
|
||||
translator.S_BUFFER_LOAD_DWORD(8, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORDX16:
|
||||
translator.S_BUFFER_LOAD_DWORD(16, inst);
|
||||
break;
|
||||
case Opcode::EXP:
|
||||
translator.EXP(inst);
|
||||
break;
|
||||
case Opcode::V_INTERP_P2_F32:
|
||||
translator.V_INTERP_P2_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_PKRTZ_F16_F32:
|
||||
translator.V_CVT_PKRTZ_F16_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_F16:
|
||||
translator.V_CVT_F32_F16(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_UBYTE0:
|
||||
translator.V_CVT_F32_UBYTE(0, inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_UBYTE1:
|
||||
translator.V_CVT_F32_UBYTE(1, inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_UBYTE2:
|
||||
translator.V_CVT_F32_UBYTE(2, inst);
|
||||
break;
|
||||
case Opcode::V_CVT_F32_UBYTE3:
|
||||
translator.V_CVT_F32_UBYTE(3, inst);
|
||||
break;
|
||||
case Opcode::V_BFREV_B32:
|
||||
translator.V_BFREV_B32(inst);
|
||||
break;
|
||||
case Opcode::V_LDEXP_F32:
|
||||
translator.V_LDEXP_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FRACT_F32:
|
||||
translator.V_FRACT_F32(inst);
|
||||
break;
|
||||
case Opcode::V_ADD_F32:
|
||||
translator.V_ADD_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_OFF_F32_I4:
|
||||
translator.V_CVT_OFF_F32_I4(inst);
|
||||
break;
|
||||
case Opcode::V_MED3_F32:
|
||||
translator.V_MED3_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FLOOR_F32:
|
||||
translator.V_FLOOR_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SUB_F32:
|
||||
translator.V_SUB_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FMA_F32:
|
||||
case Opcode::V_MADAK_F32: // Yes these can share the opcode
|
||||
translator.V_FMA_F32(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_SAMPLE_LZ_O:
|
||||
case Opcode::IMAGE_SAMPLE_O:
|
||||
case Opcode::IMAGE_SAMPLE_C:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ:
|
||||
case Opcode::IMAGE_SAMPLE_LZ:
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
case Opcode::IMAGE_SAMPLE_C_O:
|
||||
case Opcode::IMAGE_SAMPLE_B:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ_O:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_ADD:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Add, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_AND:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::And, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_OR:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Or, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_XOR:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Xor, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_UMAX:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Umax, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_SMAX:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Smax, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_UMIN:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Umin, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_SMIN:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Smin, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_INC:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Inc, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_DEC:
|
||||
translator.IMAGE_ATOMIC(AtomicOp::Dec, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_GET_LOD:
|
||||
translator.IMAGE_GET_LOD(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_GATHER4_C:
|
||||
case Opcode::IMAGE_GATHER4_LZ:
|
||||
case Opcode::IMAGE_GATHER4_LZ_O:
|
||||
translator.IMAGE_GATHER(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_STORE:
|
||||
translator.IMAGE_STORE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_LOAD_MIP:
|
||||
translator.IMAGE_LOAD(true, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_LOAD:
|
||||
translator.IMAGE_LOAD(false, inst);
|
||||
break;
|
||||
case Opcode::V_MAD_U64_U32:
|
||||
translator.V_MAD_U64_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_I32:
|
||||
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_I32:
|
||||
translator.V_CMP_U32(ConditionOp::EQ, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LE_I32:
|
||||
translator.V_CMP_U32(ConditionOp::LE, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NE_I32:
|
||||
translator.V_CMP_U32(ConditionOp::LG, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LG, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_U32:
|
||||
translator.V_CMP_U32(ConditionOp::EQ, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_F_U32:
|
||||
translator.V_CMP_U32(ConditionOp::F, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LT_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LT, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_U32:
|
||||
translator.V_CMP_U32(ConditionOp::GT, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::GE, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_TRU_U32:
|
||||
translator.V_CMP_U32(ConditionOp::TRU, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NEQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_F_F32:
|
||||
translator.V_CMP_F32(ConditionOp::F, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::EQ, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LG_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NLE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NLT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NGT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NGE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LT_U32:
|
||||
translator.S_CMP(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LE_U32:
|
||||
translator.S_CMP(ConditionOp::LE, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LT_I32:
|
||||
translator.S_CMP(ConditionOp::LT, true, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_I32:
|
||||
translator.S_CMP(ConditionOp::LG, true, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_GT_I32:
|
||||
translator.S_CMP(ConditionOp::GT, true, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_GE_I32:
|
||||
translator.S_CMP(ConditionOp::GE, true, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_EQ_I32:
|
||||
translator.S_CMP(ConditionOp::EQ, true, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_EQ_U32:
|
||||
translator.S_CMP(ConditionOp::EQ, false, inst);
|
||||
break;
|
||||
case Opcode::S_LSHL_B32:
|
||||
translator.S_LSHL_B32(inst);
|
||||
break;
|
||||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_X:
|
||||
translator.BUFFER_LOAD_FORMAT(1, true, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XY:
|
||||
translator.BUFFER_LOAD_FORMAT(2, true, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
|
||||
translator.BUFFER_LOAD_FORMAT(3, true, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
|
||||
translator.BUFFER_LOAD_FORMAT(4, true, true, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_X:
|
||||
translator.BUFFER_LOAD_FORMAT(1, false, true, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XY:
|
||||
translator.BUFFER_LOAD_FORMAT(2, false, true, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
|
||||
translator.BUFFER_LOAD_FORMAT(3, false, true, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
|
||||
translator.BUFFER_LOAD_FORMAT(4, false, true, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_DWORD:
|
||||
translator.BUFFER_LOAD_FORMAT(1, false, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_DWORDX2:
|
||||
translator.BUFFER_LOAD_FORMAT(2, false, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_DWORDX3:
|
||||
translator.BUFFER_LOAD_FORMAT(3, false, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_DWORDX4:
|
||||
translator.BUFFER_LOAD_FORMAT(4, false, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
case Opcode::BUFFER_STORE_DWORD:
|
||||
translator.BUFFER_STORE_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_DWORDX2:
|
||||
translator.BUFFER_STORE_FORMAT(2, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_DWORDX3:
|
||||
translator.BUFFER_STORE_FORMAT(3, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||
case Opcode::BUFFER_STORE_DWORDX4:
|
||||
translator.BUFFER_STORE_FORMAT(4, false, inst);
|
||||
break;
|
||||
case Opcode::V_MAX_F32:
|
||||
translator.V_MAX_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MAX_I32:
|
||||
translator.V_MAX_U32(true, inst);
|
||||
break;
|
||||
case Opcode::V_MAX_U32:
|
||||
translator.V_MAX_U32(false, inst);
|
||||
break;
|
||||
case Opcode::V_NOT_B32:
|
||||
translator.V_NOT_B32(inst);
|
||||
break;
|
||||
case Opcode::V_RSQ_F32:
|
||||
translator.V_RSQ_F32(inst);
|
||||
break;
|
||||
case Opcode::S_ANDN2_B64:
|
||||
translator.S_AND_B64(NegateMode::Src1, inst);
|
||||
break;
|
||||
case Opcode::S_ORN2_B64:
|
||||
translator.S_OR_B64(NegateMode::Src1, false, inst);
|
||||
break;
|
||||
case Opcode::V_SIN_F32:
|
||||
translator.V_SIN_F32(inst);
|
||||
break;
|
||||
case Opcode::V_COS_F32:
|
||||
translator.V_COS_F32(inst);
|
||||
break;
|
||||
case Opcode::V_LOG_F32:
|
||||
translator.V_LOG_F32(inst);
|
||||
break;
|
||||
case Opcode::V_EXP_F32:
|
||||
translator.V_EXP_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SQRT_F32:
|
||||
translator.V_SQRT_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN_F32:
|
||||
translator.V_MIN_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN_I32:
|
||||
translator.V_MIN_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN3_F32:
|
||||
translator.V_MIN3_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN_LEGACY_F32:
|
||||
translator.V_MIN_F32(inst, true);
|
||||
break;
|
||||
case Opcode::V_MADMK_F32:
|
||||
translator.V_MADMK_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CUBEMA_F32:
|
||||
translator.V_CUBEMA_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CUBESC_F32:
|
||||
translator.V_CUBESC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CUBETC_F32:
|
||||
translator.V_CUBETC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CUBEID_F32:
|
||||
translator.V_CUBEID_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_U32_F32:
|
||||
translator.V_CVT_U32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_I32_F32:
|
||||
translator.V_CVT_I32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_FLR_I32_F32:
|
||||
translator.V_CVT_FLR_I32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SUBREV_F32:
|
||||
translator.V_SUBREV_F32(inst);
|
||||
break;
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
translator.S_AND_SAVEEXEC_B64(inst);
|
||||
break;
|
||||
case Opcode::S_MOV_B64:
|
||||
translator.S_MOV_B64(inst);
|
||||
break;
|
||||
case Opcode::V_SUBREV_I32:
|
||||
translator.V_SUBREV_I32(inst);
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
case Opcode::V_CMPX_F_F32:
|
||||
translator.V_CMP_F32(ConditionOp::F, true, inst);
|
||||
// Emit instructions for each category.
|
||||
switch (inst.category) {
|
||||
case InstCategory::DataShare:
|
||||
translator.EmitDataShare(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, true, inst);
|
||||
case InstCategory::VectorInterpolation:
|
||||
translator.EmitVectorInterpolation(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_EQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::EQ, true, inst);
|
||||
case InstCategory::ScalarMemory:
|
||||
translator.EmitScalarMemory(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, true, inst);
|
||||
case InstCategory::VectorMemory:
|
||||
translator.EmitVectorMemory(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, true, inst);
|
||||
case InstCategory::Export:
|
||||
translator.EmitExport(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LG_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, true, inst);
|
||||
case InstCategory::FlowControl:
|
||||
translator.EmitFlowControl(pc, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, true, inst);
|
||||
case InstCategory::ScalarALU:
|
||||
translator.EmitScalarAlu(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NGE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, true, inst);
|
||||
case InstCategory::VectorALU:
|
||||
translator.EmitVectorAlu(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NLG_F32:
|
||||
translator.V_CMP_F32(ConditionOp::EQ, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NGT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NLE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NEQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NLT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_TRU_F32:
|
||||
translator.V_CMP_F32(ConditionOp::TRU, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LE, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_I32:
|
||||
translator.V_CMP_U32(ConditionOp::GT, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LT_I32:
|
||||
translator.V_CMP_U32(ConditionOp::LT, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LT_I32:
|
||||
translator.V_CMP_U32(ConditionOp::LT, true, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_F_U32:
|
||||
translator.V_CMP_U32(ConditionOp::F, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LT_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LT, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_EQ_U32:
|
||||
translator.V_CMP_U32(ConditionOp::EQ, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LE, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GT_U32:
|
||||
translator.V_CMP_U32(ConditionOp::GT, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LG, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::GE, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_TRU_U32:
|
||||
translator.V_CMP_U32(ConditionOp::TRU, false, true, inst);
|
||||
break;
|
||||
case Opcode::S_OR_B64:
|
||||
translator.S_OR_B64(NegateMode::None, false, inst);
|
||||
break;
|
||||
case Opcode::S_NOR_B64:
|
||||
translator.S_OR_B64(NegateMode::Result, false, inst);
|
||||
break;
|
||||
case Opcode::S_XOR_B64:
|
||||
translator.S_OR_B64(NegateMode::None, true, inst);
|
||||
break;
|
||||
case Opcode::S_AND_B64:
|
||||
translator.S_AND_B64(NegateMode::None, inst);
|
||||
break;
|
||||
case Opcode::S_NOT_B64:
|
||||
translator.S_NOT_B64(inst);
|
||||
break;
|
||||
case Opcode::S_NAND_B64:
|
||||
translator.S_AND_B64(NegateMode::Result, inst);
|
||||
break;
|
||||
case Opcode::V_LSHRREV_B32:
|
||||
translator.V_LSHRREV_B32(inst);
|
||||
break;
|
||||
case Opcode::S_ADD_I32:
|
||||
translator.S_ADD_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_HI_U32:
|
||||
translator.V_MUL_HI_U32(false, inst);
|
||||
break;
|
||||
case Opcode::V_MUL_LO_I32:
|
||||
translator.V_MUL_LO_U32(inst);
|
||||
break;
|
||||
case Opcode::V_SAD_U32:
|
||||
translator.V_SAD_U32(inst);
|
||||
break;
|
||||
case Opcode::V_BFE_U32:
|
||||
translator.V_BFE_U32(false, inst);
|
||||
break;
|
||||
case Opcode::V_BFE_I32:
|
||||
translator.V_BFE_U32(true, inst);
|
||||
break;
|
||||
case Opcode::V_MAD_I32_I24:
|
||||
translator.V_MAD_I32_I24(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_I32_I24:
|
||||
case Opcode::V_MUL_U32_U24:
|
||||
translator.V_MUL_I32_I24(inst);
|
||||
break;
|
||||
case Opcode::V_SUB_I32:
|
||||
translator.V_SUB_I32(inst);
|
||||
break;
|
||||
case Opcode::V_LSHR_B32:
|
||||
translator.V_LSHR_B32(inst);
|
||||
break;
|
||||
case Opcode::V_ASHRREV_I32:
|
||||
translator.V_ASHRREV_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MAD_U32_U24:
|
||||
translator.V_MAD_U32_U24(inst);
|
||||
break;
|
||||
case Opcode::S_AND_B32:
|
||||
translator.S_AND_B32(inst);
|
||||
break;
|
||||
case Opcode::S_ASHR_I32:
|
||||
translator.S_ASHR_I32(inst);
|
||||
break;
|
||||
case Opcode::S_OR_B32:
|
||||
translator.S_OR_B32(inst);
|
||||
break;
|
||||
case Opcode::S_LSHR_B32:
|
||||
translator.S_LSHR_B32(inst);
|
||||
break;
|
||||
case Opcode::S_CSELECT_B32:
|
||||
translator.S_CSELECT_B32(inst);
|
||||
break;
|
||||
case Opcode::S_CSELECT_B64:
|
||||
translator.S_CSELECT_B64(inst);
|
||||
break;
|
||||
case Opcode::S_BFE_U32:
|
||||
translator.S_BFE_U32(inst);
|
||||
break;
|
||||
case Opcode::V_RNDNE_F32:
|
||||
translator.V_RNDNE_F32(inst);
|
||||
break;
|
||||
case Opcode::V_BCNT_U32_B32:
|
||||
translator.V_BCNT_U32_B32(inst);
|
||||
break;
|
||||
case Opcode::V_MAX3_F32:
|
||||
translator.V_MAX3_F32(inst);
|
||||
break;
|
||||
case Opcode::DS_SWIZZLE_B32:
|
||||
translator.DS_SWIZZLE_B32(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_LO_U32:
|
||||
translator.V_MUL_LO_U32(inst);
|
||||
break;
|
||||
case Opcode::S_BFM_B32:
|
||||
translator.S_BFM_B32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN_U32:
|
||||
translator.V_MIN_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NE_U64:
|
||||
translator.V_CMP_NE_U64(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_CLASS_F32:
|
||||
translator.V_CMP_CLASS_F32(inst);
|
||||
break;
|
||||
case Opcode::V_TRUNC_F32:
|
||||
translator.V_TRUNC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CEIL_F32:
|
||||
translator.V_CEIL_F32(inst);
|
||||
break;
|
||||
case Opcode::V_BFI_B32:
|
||||
translator.V_BFI_B32(inst);
|
||||
break;
|
||||
case Opcode::S_BREV_B32:
|
||||
translator.S_BREV_B32(inst);
|
||||
break;
|
||||
case Opcode::S_ADD_U32:
|
||||
translator.S_ADD_U32(inst);
|
||||
break;
|
||||
case Opcode::S_ADDC_U32:
|
||||
translator.S_ADDC_U32(inst);
|
||||
break;
|
||||
case Opcode::S_SUB_U32:
|
||||
case Opcode::S_SUB_I32:
|
||||
translator.S_SUB_U32(inst);
|
||||
break;
|
||||
// TODO: Separate implementation for legacy variants.
|
||||
case Opcode::V_MUL_LEGACY_F32:
|
||||
translator.V_MUL_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MAC_LEGACY_F32:
|
||||
translator.V_MAC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MAD_LEGACY_F32:
|
||||
translator.V_MAD_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MAX_LEGACY_F32:
|
||||
translator.V_MAX_F32(inst, true);
|
||||
break;
|
||||
case Opcode::V_RSQ_LEGACY_F32:
|
||||
case Opcode::V_RSQ_CLAMP_F32:
|
||||
translator.V_RSQ_F32(inst);
|
||||
break;
|
||||
case Opcode::V_RCP_IFLAG_F32:
|
||||
translator.V_RCP_F32(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_GET_RESINFO:
|
||||
translator.IMAGE_GET_RESINFO(inst);
|
||||
break;
|
||||
case Opcode::S_BARRIER:
|
||||
translator.S_BARRIER();
|
||||
break;
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
break;
|
||||
case Opcode::DS_READ_B32:
|
||||
translator.DS_READ(32, false, false, inst);
|
||||
break;
|
||||
case Opcode::DS_READ2_B32:
|
||||
translator.DS_READ(32, false, true, inst);
|
||||
break;
|
||||
case Opcode::DS_WRITE_B32:
|
||||
translator.DS_WRITE(32, false, false, inst);
|
||||
break;
|
||||
case Opcode::DS_WRITE2_B32:
|
||||
translator.DS_WRITE(32, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_READFIRSTLANE_B32:
|
||||
translator.V_READFIRSTLANE_B32(inst);
|
||||
break;
|
||||
case Opcode::S_GETPC_B64:
|
||||
translator.S_GETPC_B64(block_base, inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_CBRANCH_VCCNZ:
|
||||
case Opcode::S_CBRANCH_VCCZ:
|
||||
case Opcode::S_BRANCH:
|
||||
case Opcode::S_WQM_B64:
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
case Opcode::S_ENDPGM:
|
||||
case InstCategory::DebugProfile:
|
||||
break;
|
||||
default:
|
||||
const u32 opcode = u32(inst.opcode);
|
||||
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({})",
|
||||
magic_enum::enum_name(inst.opcode), opcode);
|
||||
info.translation_failed = true;
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,8 @@
|
|||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
}
|
||||
struct Profile;
|
||||
} // namespace Shader
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
|
@ -24,6 +25,7 @@ enum class ConditionOp : u32 {
|
|||
LT,
|
||||
LE,
|
||||
TRU,
|
||||
U,
|
||||
};
|
||||
|
||||
enum class AtomicOp : u32 {
|
||||
|
@ -53,10 +55,19 @@ enum class NegateMode : u32 {
|
|||
|
||||
class Translator {
|
||||
public:
|
||||
explicit Translator(IR::Block* block_, Info& info);
|
||||
explicit Translator(IR::Block* block_, Info& info, const Profile& profile);
|
||||
|
||||
// Instruction categories
|
||||
void EmitPrologue();
|
||||
void EmitFetch(const GcnInst& inst);
|
||||
void EmitDataShare(const GcnInst& inst);
|
||||
void EmitVectorInterpolation(const GcnInst& inst);
|
||||
void EmitScalarMemory(const GcnInst& inst);
|
||||
void EmitVectorMemory(const GcnInst& inst);
|
||||
void EmitExport(const GcnInst& inst);
|
||||
void EmitFlowControl(u32 pc, const GcnInst& inst);
|
||||
void EmitScalarAlu(const GcnInst& inst);
|
||||
void EmitVectorAlu(const GcnInst& inst);
|
||||
|
||||
// Scalar ALU
|
||||
void S_MOVK(const GcnInst& inst);
|
||||
|
@ -83,6 +94,10 @@ public:
|
|||
void S_SUB_U32(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_ADDC_U32(const GcnInst& inst);
|
||||
void S_MULK_I32(const GcnInst& inst);
|
||||
void S_ADDK_I32(const GcnInst& inst);
|
||||
void S_MAX_U32(const GcnInst& inst);
|
||||
void S_MIN_U32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -94,11 +109,13 @@ public:
|
|||
void V_MAC_F32(const GcnInst& inst);
|
||||
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
|
||||
void V_CVT_F32_F16(const GcnInst& inst);
|
||||
void V_CVT_F16_F32(const GcnInst& inst);
|
||||
void V_MUL_F32(const GcnInst& inst);
|
||||
void V_CNDMASK_B32(const GcnInst& inst);
|
||||
void V_OR_B32(bool is_xor, const GcnInst& inst);
|
||||
void V_AND_B32(const GcnInst& inst);
|
||||
void V_LSHLREV_B32(const GcnInst& inst);
|
||||
void V_LSHL_B32(const GcnInst& inst);
|
||||
void V_ADD_I32(const GcnInst& inst);
|
||||
void V_ADDC_U32(const GcnInst& inst);
|
||||
void V_CVT_F32_I32(const GcnInst& inst);
|
||||
|
@ -122,6 +139,7 @@ public:
|
|||
void V_SQRT_F32(const GcnInst& inst);
|
||||
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
|
||||
void V_MIN3_F32(const GcnInst& inst);
|
||||
void V_MIN3_I32(const GcnInst& inst);
|
||||
void V_MADMK_F32(const GcnInst& inst);
|
||||
void V_CUBEMA_F32(const GcnInst& inst);
|
||||
void V_CUBESC_F32(const GcnInst& inst);
|
||||
|
@ -146,6 +164,7 @@ public:
|
|||
void V_BCNT_U32_B32(const GcnInst& inst);
|
||||
void V_COS_F32(const GcnInst& inst);
|
||||
void V_MAX3_F32(const GcnInst& inst);
|
||||
void V_MAX3_U32(const GcnInst& inst);
|
||||
void V_CVT_I32_F32(const GcnInst& inst);
|
||||
void V_MIN_I32(const GcnInst& inst);
|
||||
void V_MUL_LO_U32(const GcnInst& inst);
|
||||
|
@ -160,6 +179,8 @@ public:
|
|||
void V_LDEXP_F32(const GcnInst& inst);
|
||||
void V_CVT_FLR_I32_F32(const GcnInst& inst);
|
||||
void V_CMP_CLASS_F32(const GcnInst& inst);
|
||||
void V_FFBL_B32(const GcnInst& inst);
|
||||
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
|
@ -167,12 +188,15 @@ public:
|
|||
|
||||
// Vector interpolation
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
void V_INTERP_MOV_F32(const GcnInst& inst);
|
||||
|
||||
// Data share
|
||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void V_READLANE_B32(const GcnInst& inst);
|
||||
void V_WRITELANE_B32(const GcnInst& inst);
|
||||
void S_BARRIER();
|
||||
|
||||
// MIMG
|
||||
|
@ -184,9 +208,6 @@ public:
|
|||
void IMAGE_GET_LOD(const GcnInst& inst);
|
||||
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
|
||||
// Export
|
||||
void EXP(const GcnInst& inst);
|
||||
|
||||
private:
|
||||
template <typename T = IR::U32F32>
|
||||
[[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false);
|
||||
|
@ -195,12 +216,17 @@ private:
|
|||
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
|
||||
|
||||
void LogMissingOpcode(const GcnInst& inst);
|
||||
|
||||
private:
|
||||
IR::IREmitter ir;
|
||||
Info& info;
|
||||
static std::array<bool, IR::NumScalarRegs> exec_contexts;
|
||||
const Profile& profile;
|
||||
IR::U32 m0_value;
|
||||
bool opcode_missing = false;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
|
||||
const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -2,9 +2,311 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::V_LSHLREV_B32:
|
||||
return V_LSHLREV_B32(inst);
|
||||
case Opcode::V_LSHL_B32:
|
||||
return V_LSHL_B32(inst);
|
||||
case Opcode::V_BFREV_B32:
|
||||
return V_BFREV_B32(inst);
|
||||
case Opcode::V_BFE_U32:
|
||||
return V_BFE_U32(false, inst);
|
||||
case Opcode::V_BFE_I32:
|
||||
return V_BFE_U32(true, inst);
|
||||
case Opcode::V_BFI_B32:
|
||||
return V_BFI_B32(inst);
|
||||
case Opcode::V_LSHR_B32:
|
||||
return V_LSHR_B32(inst);
|
||||
case Opcode::V_ASHRREV_I32:
|
||||
return V_ASHRREV_I32(inst);
|
||||
case Opcode::V_LSHRREV_B32:
|
||||
return V_LSHRREV_B32(inst);
|
||||
case Opcode::V_NOT_B32:
|
||||
return V_NOT_B32(inst);
|
||||
case Opcode::V_AND_B32:
|
||||
return V_AND_B32(inst);
|
||||
case Opcode::V_OR_B32:
|
||||
return V_OR_B32(false, inst);
|
||||
case Opcode::V_XOR_B32:
|
||||
return V_OR_B32(true, inst);
|
||||
case Opcode::V_FFBL_B32:
|
||||
return V_FFBL_B32(inst);
|
||||
|
||||
case Opcode::V_MOV_B32:
|
||||
return V_MOV(inst);
|
||||
case Opcode::V_ADD_I32:
|
||||
return V_ADD_I32(inst);
|
||||
case Opcode::V_ADDC_U32:
|
||||
return V_ADDC_U32(inst);
|
||||
case Opcode::V_CVT_F32_I32:
|
||||
return V_CVT_F32_I32(inst);
|
||||
case Opcode::V_CVT_F32_U32:
|
||||
return V_CVT_F32_U32(inst);
|
||||
case Opcode::V_CVT_PKRTZ_F16_F32:
|
||||
return V_CVT_PKRTZ_F16_F32(inst);
|
||||
case Opcode::V_CVT_F32_F16:
|
||||
return V_CVT_F32_F16(inst);
|
||||
case Opcode::V_CVT_F16_F32:
|
||||
return V_CVT_F16_F32(inst);
|
||||
case Opcode::V_CVT_F32_UBYTE0:
|
||||
return V_CVT_F32_UBYTE(0, inst);
|
||||
case Opcode::V_CVT_F32_UBYTE1:
|
||||
return V_CVT_F32_UBYTE(1, inst);
|
||||
case Opcode::V_CVT_F32_UBYTE2:
|
||||
return V_CVT_F32_UBYTE(2, inst);
|
||||
case Opcode::V_CVT_F32_UBYTE3:
|
||||
return V_CVT_F32_UBYTE(3, inst);
|
||||
case Opcode::V_CVT_OFF_F32_I4:
|
||||
return V_CVT_OFF_F32_I4(inst);
|
||||
case Opcode::V_MAD_U64_U32:
|
||||
return V_MAD_U64_U32(inst);
|
||||
case Opcode::V_CMP_GE_I32:
|
||||
return V_CMP_U32(ConditionOp::GE, true, false, inst);
|
||||
case Opcode::V_CMP_EQ_I32:
|
||||
return V_CMP_U32(ConditionOp::EQ, true, false, inst);
|
||||
case Opcode::V_CMP_LE_I32:
|
||||
return V_CMP_U32(ConditionOp::LE, true, false, inst);
|
||||
case Opcode::V_CMP_NE_I32:
|
||||
return V_CMP_U32(ConditionOp::LG, true, false, inst);
|
||||
case Opcode::V_CMP_NE_U32:
|
||||
return V_CMP_U32(ConditionOp::LG, false, false, inst);
|
||||
case Opcode::V_CMP_EQ_U32:
|
||||
return V_CMP_U32(ConditionOp::EQ, false, false, inst);
|
||||
case Opcode::V_CMP_F_U32:
|
||||
return V_CMP_U32(ConditionOp::F, false, false, inst);
|
||||
case Opcode::V_CMP_LT_U32:
|
||||
return V_CMP_U32(ConditionOp::LT, false, false, inst);
|
||||
case Opcode::V_CMP_GT_U32:
|
||||
return V_CMP_U32(ConditionOp::GT, false, false, inst);
|
||||
case Opcode::V_CMP_GE_U32:
|
||||
return V_CMP_U32(ConditionOp::GE, false, false, inst);
|
||||
case Opcode::V_CMP_TRU_U32:
|
||||
return V_CMP_U32(ConditionOp::TRU, false, false, inst);
|
||||
case Opcode::V_CMP_NEQ_F32:
|
||||
return V_CMP_F32(ConditionOp::LG, false, inst);
|
||||
case Opcode::V_CMP_F_F32:
|
||||
return V_CMP_F32(ConditionOp::F, false, inst);
|
||||
case Opcode::V_CMP_LT_F32:
|
||||
return V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
case Opcode::V_CMP_EQ_F32:
|
||||
return V_CMP_F32(ConditionOp::EQ, false, inst);
|
||||
case Opcode::V_CMP_LE_F32:
|
||||
return V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
case Opcode::V_CMP_GT_F32:
|
||||
return V_CMP_F32(ConditionOp::GT, false, inst);
|
||||
case Opcode::V_CMP_LG_F32:
|
||||
return V_CMP_F32(ConditionOp::LG, false, inst);
|
||||
case Opcode::V_CMP_GE_F32:
|
||||
return V_CMP_F32(ConditionOp::GE, false, inst);
|
||||
case Opcode::V_CMP_NLE_F32:
|
||||
return V_CMP_F32(ConditionOp::GT, false, inst);
|
||||
case Opcode::V_CMP_NLT_F32:
|
||||
return V_CMP_F32(ConditionOp::GE, false, inst);
|
||||
case Opcode::V_CMP_NGT_F32:
|
||||
return V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
case Opcode::V_CMP_NGE_F32:
|
||||
return V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
case Opcode::V_CMP_U_F32:
|
||||
return V_CMP_F32(ConditionOp::U, false, inst);
|
||||
case Opcode::V_CNDMASK_B32:
|
||||
return V_CNDMASK_B32(inst);
|
||||
case Opcode::V_MAX_I32:
|
||||
return V_MAX_U32(true, inst);
|
||||
case Opcode::V_MAX_U32:
|
||||
return V_MAX_U32(false, inst);
|
||||
case Opcode::V_MIN_I32:
|
||||
return V_MIN_I32(inst);
|
||||
case Opcode::V_CUBEMA_F32:
|
||||
return V_CUBEMA_F32(inst);
|
||||
case Opcode::V_CUBESC_F32:
|
||||
return V_CUBESC_F32(inst);
|
||||
case Opcode::V_CUBETC_F32:
|
||||
return V_CUBETC_F32(inst);
|
||||
case Opcode::V_CUBEID_F32:
|
||||
return V_CUBEID_F32(inst);
|
||||
case Opcode::V_CVT_U32_F32:
|
||||
return V_CVT_U32_F32(inst);
|
||||
case Opcode::V_CVT_I32_F32:
|
||||
return V_CVT_I32_F32(inst);
|
||||
case Opcode::V_CVT_FLR_I32_F32:
|
||||
return V_CVT_FLR_I32_F32(inst);
|
||||
case Opcode::V_SUBREV_I32:
|
||||
return V_SUBREV_I32(inst);
|
||||
case Opcode::V_MUL_HI_U32:
|
||||
return V_MUL_HI_U32(false, inst);
|
||||
case Opcode::V_MUL_LO_I32:
|
||||
return V_MUL_LO_U32(inst);
|
||||
case Opcode::V_SAD_U32:
|
||||
return V_SAD_U32(inst);
|
||||
case Opcode::V_SUB_I32:
|
||||
return V_SUB_I32(inst);
|
||||
case Opcode::V_MAD_I32_I24:
|
||||
return V_MAD_I32_I24(inst);
|
||||
case Opcode::V_MUL_I32_I24:
|
||||
case Opcode::V_MUL_U32_U24:
|
||||
return V_MUL_I32_I24(inst);
|
||||
case Opcode::V_MAD_U32_U24:
|
||||
return V_MAD_U32_U24(inst);
|
||||
case Opcode::V_BCNT_U32_B32:
|
||||
return V_BCNT_U32_B32(inst);
|
||||
case Opcode::V_MUL_LO_U32:
|
||||
return V_MUL_LO_U32(inst);
|
||||
case Opcode::V_MIN_U32:
|
||||
return V_MIN_U32(inst);
|
||||
case Opcode::V_CMP_NE_U64:
|
||||
return V_CMP_NE_U64(inst);
|
||||
case Opcode::V_READFIRSTLANE_B32:
|
||||
return V_READFIRSTLANE_B32(inst);
|
||||
case Opcode::V_READLANE_B32:
|
||||
return V_READLANE_B32(inst);
|
||||
case Opcode::V_WRITELANE_B32:
|
||||
return V_WRITELANE_B32(inst);
|
||||
|
||||
case Opcode::V_MAD_F32:
|
||||
return V_MAD_F32(inst);
|
||||
case Opcode::V_MAC_F32:
|
||||
return V_MAC_F32(inst);
|
||||
case Opcode::V_MUL_F32:
|
||||
return V_MUL_F32(inst);
|
||||
case Opcode::V_RCP_F32:
|
||||
return V_RCP_F32(inst);
|
||||
case Opcode::V_LDEXP_F32:
|
||||
return V_LDEXP_F32(inst);
|
||||
case Opcode::V_FRACT_F32:
|
||||
return V_FRACT_F32(inst);
|
||||
case Opcode::V_ADD_F32:
|
||||
return V_ADD_F32(inst);
|
||||
case Opcode::V_MED3_F32:
|
||||
return V_MED3_F32(inst);
|
||||
case Opcode::V_FLOOR_F32:
|
||||
return V_FLOOR_F32(inst);
|
||||
case Opcode::V_SUB_F32:
|
||||
return V_SUB_F32(inst);
|
||||
case Opcode::V_FMA_F32:
|
||||
case Opcode::V_MADAK_F32:
|
||||
return V_FMA_F32(inst);
|
||||
case Opcode::V_MAX_F32:
|
||||
return V_MAX_F32(inst);
|
||||
case Opcode::V_RSQ_F32:
|
||||
return V_RSQ_F32(inst);
|
||||
case Opcode::V_SIN_F32:
|
||||
return V_SIN_F32(inst);
|
||||
case Opcode::V_COS_F32:
|
||||
return V_COS_F32(inst);
|
||||
case Opcode::V_LOG_F32:
|
||||
return V_LOG_F32(inst);
|
||||
case Opcode::V_EXP_F32:
|
||||
return V_EXP_F32(inst);
|
||||
case Opcode::V_SQRT_F32:
|
||||
return V_SQRT_F32(inst);
|
||||
case Opcode::V_MIN_F32:
|
||||
return V_MIN_F32(inst, false);
|
||||
case Opcode::V_MIN3_F32:
|
||||
return V_MIN3_F32(inst);
|
||||
case Opcode::V_MIN3_I32:
|
||||
return V_MIN3_I32(inst);
|
||||
case Opcode::V_MIN_LEGACY_F32:
|
||||
return V_MIN_F32(inst, true);
|
||||
case Opcode::V_MADMK_F32:
|
||||
return V_MADMK_F32(inst);
|
||||
case Opcode::V_SUBREV_F32:
|
||||
return V_SUBREV_F32(inst);
|
||||
case Opcode::V_RNDNE_F32:
|
||||
return V_RNDNE_F32(inst);
|
||||
case Opcode::V_MAX3_F32:
|
||||
return V_MAX3_F32(inst);
|
||||
case Opcode::V_MAX3_U32:
|
||||
return V_MAX3_U32(inst);
|
||||
case Opcode::V_TRUNC_F32:
|
||||
return V_TRUNC_F32(inst);
|
||||
case Opcode::V_CEIL_F32:
|
||||
return V_CEIL_F32(inst);
|
||||
case Opcode::V_MUL_LEGACY_F32:
|
||||
return V_MUL_F32(inst);
|
||||
case Opcode::V_MAC_LEGACY_F32:
|
||||
return V_MAC_F32(inst);
|
||||
case Opcode::V_MAD_LEGACY_F32:
|
||||
return V_MAD_F32(inst);
|
||||
case Opcode::V_MAX_LEGACY_F32:
|
||||
return V_MAX_F32(inst, true);
|
||||
case Opcode::V_RSQ_LEGACY_F32:
|
||||
case Opcode::V_RSQ_CLAMP_F32:
|
||||
return V_RSQ_F32(inst);
|
||||
case Opcode::V_RCP_IFLAG_F32:
|
||||
return V_RCP_F32(inst);
|
||||
|
||||
case Opcode::V_CMPX_F_F32:
|
||||
return V_CMP_F32(ConditionOp::F, true, inst);
|
||||
case Opcode::V_CMPX_LT_F32:
|
||||
return V_CMP_F32(ConditionOp::LT, true, inst);
|
||||
case Opcode::V_CMPX_EQ_F32:
|
||||
return V_CMP_F32(ConditionOp::EQ, true, inst);
|
||||
case Opcode::V_CMPX_LE_F32:
|
||||
return V_CMP_F32(ConditionOp::LE, true, inst);
|
||||
case Opcode::V_CMPX_GT_F32:
|
||||
return V_CMP_F32(ConditionOp::GT, true, inst);
|
||||
case Opcode::V_CMPX_LG_F32:
|
||||
return V_CMP_F32(ConditionOp::LG, true, inst);
|
||||
case Opcode::V_CMPX_GE_F32:
|
||||
return V_CMP_F32(ConditionOp::GE, true, inst);
|
||||
case Opcode::V_CMPX_NGE_F32:
|
||||
return V_CMP_F32(ConditionOp::LT, true, inst);
|
||||
case Opcode::V_CMPX_NLG_F32:
|
||||
return V_CMP_F32(ConditionOp::EQ, true, inst);
|
||||
case Opcode::V_CMPX_NGT_F32:
|
||||
return V_CMP_F32(ConditionOp::LE, true, inst);
|
||||
case Opcode::V_CMPX_NLE_F32:
|
||||
return V_CMP_F32(ConditionOp::GT, true, inst);
|
||||
case Opcode::V_CMPX_NEQ_F32:
|
||||
return V_CMP_F32(ConditionOp::LG, true, inst);
|
||||
case Opcode::V_CMPX_NLT_F32:
|
||||
return V_CMP_F32(ConditionOp::GE, true, inst);
|
||||
case Opcode::V_CMPX_TRU_F32:
|
||||
return V_CMP_F32(ConditionOp::TRU, true, inst);
|
||||
case Opcode::V_CMP_CLASS_F32:
|
||||
return V_CMP_CLASS_F32(inst);
|
||||
|
||||
case Opcode::V_CMP_LE_U32:
|
||||
return V_CMP_U32(ConditionOp::LE, false, false, inst);
|
||||
case Opcode::V_CMP_GT_I32:
|
||||
return V_CMP_U32(ConditionOp::GT, true, false, inst);
|
||||
case Opcode::V_CMP_LT_I32:
|
||||
return V_CMP_U32(ConditionOp::LT, true, false, inst);
|
||||
case Opcode::V_CMPX_LT_I32:
|
||||
return V_CMP_U32(ConditionOp::LT, true, true, inst);
|
||||
case Opcode::V_CMPX_F_U32:
|
||||
return V_CMP_U32(ConditionOp::F, false, true, inst);
|
||||
case Opcode::V_CMPX_LT_U32:
|
||||
return V_CMP_U32(ConditionOp::LT, false, true, inst);
|
||||
case Opcode::V_CMPX_EQ_U32:
|
||||
return V_CMP_U32(ConditionOp::EQ, false, true, inst);
|
||||
case Opcode::V_CMPX_LE_U32:
|
||||
return V_CMP_U32(ConditionOp::LE, false, true, inst);
|
||||
case Opcode::V_CMPX_GT_U32:
|
||||
return V_CMP_U32(ConditionOp::GT, false, true, inst);
|
||||
case Opcode::V_CMPX_NE_U32:
|
||||
return V_CMP_U32(ConditionOp::LG, false, true, inst);
|
||||
case Opcode::V_CMPX_GE_U32:
|
||||
return V_CMP_U32(ConditionOp::GE, false, true, inst);
|
||||
case Opcode::V_CMPX_TRU_U32:
|
||||
return V_CMP_U32(ConditionOp::TRU, false, true, inst);
|
||||
case Opcode::V_CMPX_LG_I32:
|
||||
return V_CMP_U32(ConditionOp::LG, true, true, inst);
|
||||
|
||||
case Opcode::V_MBCNT_LO_U32_B32:
|
||||
return V_MBCNT_U32_B32(true, inst);
|
||||
case Opcode::V_MBCNT_HI_U32_B32:
|
||||
return V_MBCNT_U32_B32(false, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::V_MOV(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
}
|
||||
|
@ -32,6 +334,12 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_F16_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0 = GetSrc(inst.src[0], true);
|
||||
const IR::F16 src0fp16 = ir.FPConvert(16, src0);
|
||||
SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast<IR::U16>(src0fp16)));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
|
||||
}
|
||||
|
@ -85,6 +393,12 @@ void Translator::V_LSHLREV_B32(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_LSHL_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||
|
@ -208,6 +522,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
|||
return ir.FPLessThanEqual(src0, src1);
|
||||
case ConditionOp::GE:
|
||||
return ir.FPGreaterThanEqual(src0, src1);
|
||||
case ConditionOp::U:
|
||||
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1)));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -278,6 +594,13 @@ void Translator::V_MIN3_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_MIN3_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_MADMK_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
|
@ -320,12 +643,13 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MAD_U64_U32(const GcnInst& inst) {
|
||||
|
||||
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
|
||||
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
|
||||
const auto src2 = GetSrc64<IR::U64>(inst.src[2]);
|
||||
|
||||
const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
|
||||
// const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
|
||||
const IR::U64 mul_result =
|
||||
ir.PackUint2x32(ir.CompositeConstruct(ir.IMul(src0, src1), ir.Imm32(0U)));
|
||||
const IR::U64 sum_result = ir.IAdd(mul_result, src2);
|
||||
|
||||
SetDst64(inst.dst[0], sum_result);
|
||||
|
@ -463,6 +787,13 @@ void Translator::V_MAX3_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_MAX3_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
||||
|
@ -561,38 +892,58 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||
constexpr u32 SIGNALING_NAN = 1 << 0;
|
||||
constexpr u32 QUIET_NAN = 1 << 1;
|
||||
constexpr u32 NEGATIVE_INFINITY = 1 << 2;
|
||||
constexpr u32 NEGATIVE_NORMAL = 1 << 3;
|
||||
constexpr u32 NEGATIVE_DENORM = 1 << 4;
|
||||
constexpr u32 NEGATIVE_ZERO = 1 << 5;
|
||||
constexpr u32 POSITIVE_ZERO = 1 << 6;
|
||||
constexpr u32 POSITIVE_DENORM = 1 << 7;
|
||||
constexpr u32 POSITIVE_NORMAL = 1 << 8;
|
||||
constexpr u32 POSITIVE_INFINITY = 1 << 9;
|
||||
|
||||
const IR::F32F64 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
IR::U1 value;
|
||||
if (src1.IsImmediate()) {
|
||||
const u32 class_mask = src1.U32();
|
||||
IR::U1 value;
|
||||
if ((class_mask & (SIGNALING_NAN | QUIET_NAN)) == (SIGNALING_NAN | QUIET_NAN)) {
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(src1.U32());
|
||||
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
||||
value = ir.FPIsNan(src0);
|
||||
} else if ((class_mask & (POSITIVE_INFINITY | NEGATIVE_INFINITY)) ==
|
||||
(POSITIVE_INFINITY | NEGATIVE_INFINITY)) {
|
||||
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
||||
value = ir.FPIsInf(src0);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
if (inst.dst[1].field == OperandField::VccLo) {
|
||||
return ir.SetVcc(value);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else {
|
||||
// We don't know the type yet, delay its resolution.
|
||||
value = ir.FPCmpClass32(src0, src1);
|
||||
}
|
||||
|
||||
switch (inst.dst[1].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.SetVcc(value);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::V_FFBL_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FindILsb(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 lane_id = ir.LaneId();
|
||||
|
||||
const auto [warp_half, mask_shift] = [&]() -> std::pair<IR::U32, IR::U32> {
|
||||
if (profile.subgroup_size == 32) {
|
||||
const IR::U32 warp_half = ir.BitwiseAnd(ir.WarpId(), ir.Imm32(1));
|
||||
return std::make_pair(warp_half, lane_id);
|
||||
}
|
||||
const IR::U32 warp_half = ir.ShiftRightLogical(lane_id, ir.Imm32(5));
|
||||
const IR::U32 mask_shift = ir.BitwiseAnd(lane_id, ir.Imm32(0x1F));
|
||||
return std::make_pair(warp_half, mask_shift);
|
||||
}();
|
||||
|
||||
const IR::U32 thread_mask = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), mask_shift), ir.Imm32(1));
|
||||
const IR::U1 is_odd_warp = ir.INotEqual(warp_half, ir.Imm32(0));
|
||||
const IR::U32 mask = IR::U32{ir.Select(is_odd_warp, is_low ? ir.Imm32(~0U) : thread_mask,
|
||||
is_low ? thread_mask : ir.Imm32(0))};
|
||||
const IR::U32 masked_value = ir.BitwiseAnd(src0, mask);
|
||||
const IR::U32 result = ir.IAdd(src1, ir.BitCount(masked_value));
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -12,4 +12,24 @@ void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||
}
|
||||
|
||||
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
|
||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||
}
|
||||
|
||||
void Translator::EmitVectorInterpolation(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
return;
|
||||
case Opcode::V_INTERP_P2_F32:
|
||||
return V_INTERP_P2_F32(inst);
|
||||
case Opcode::V_INTERP_MOV_F32:
|
||||
return V_INTERP_MOV_F32(inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -5,9 +5,96 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitVectorMemory(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::IMAGE_SAMPLE_LZ_O:
|
||||
case Opcode::IMAGE_SAMPLE_O:
|
||||
case Opcode::IMAGE_SAMPLE_C:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ:
|
||||
case Opcode::IMAGE_SAMPLE_LZ:
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
case Opcode::IMAGE_SAMPLE_C_O:
|
||||
case Opcode::IMAGE_SAMPLE_B:
|
||||
case Opcode::IMAGE_SAMPLE_C_LZ_O:
|
||||
return IMAGE_SAMPLE(inst);
|
||||
case Opcode::IMAGE_GATHER4_C:
|
||||
case Opcode::IMAGE_GATHER4_LZ:
|
||||
case Opcode::IMAGE_GATHER4_LZ_O:
|
||||
return IMAGE_GATHER(inst);
|
||||
case Opcode::IMAGE_ATOMIC_ADD:
|
||||
return IMAGE_ATOMIC(AtomicOp::Add, inst);
|
||||
case Opcode::IMAGE_ATOMIC_AND:
|
||||
return IMAGE_ATOMIC(AtomicOp::And, inst);
|
||||
case Opcode::IMAGE_ATOMIC_OR:
|
||||
return IMAGE_ATOMIC(AtomicOp::Or, inst);
|
||||
case Opcode::IMAGE_ATOMIC_XOR:
|
||||
return IMAGE_ATOMIC(AtomicOp::Xor, inst);
|
||||
case Opcode::IMAGE_ATOMIC_UMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Umax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_SMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Smax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_UMIN:
|
||||
return IMAGE_ATOMIC(AtomicOp::Umin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_SMIN:
|
||||
return IMAGE_ATOMIC(AtomicOp::Smin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_INC:
|
||||
return IMAGE_ATOMIC(AtomicOp::Inc, inst);
|
||||
case Opcode::IMAGE_ATOMIC_DEC:
|
||||
return IMAGE_ATOMIC(AtomicOp::Dec, inst);
|
||||
case Opcode::IMAGE_GET_LOD:
|
||||
return IMAGE_GET_LOD(inst);
|
||||
case Opcode::IMAGE_STORE:
|
||||
return IMAGE_STORE(inst);
|
||||
case Opcode::IMAGE_LOAD_MIP:
|
||||
return IMAGE_LOAD(true, inst);
|
||||
case Opcode::IMAGE_LOAD:
|
||||
return IMAGE_LOAD(false, inst);
|
||||
case Opcode::IMAGE_GET_RESINFO:
|
||||
return IMAGE_GET_RESINFO(inst);
|
||||
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_X:
|
||||
return BUFFER_LOAD_FORMAT(1, true, true, inst);
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XY:
|
||||
return BUFFER_LOAD_FORMAT(2, true, true, inst);
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
|
||||
return BUFFER_LOAD_FORMAT(3, true, true, inst);
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
|
||||
return BUFFER_LOAD_FORMAT(4, true, true, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_X:
|
||||
return BUFFER_LOAD_FORMAT(1, false, true, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XY:
|
||||
return BUFFER_LOAD_FORMAT(2, false, true, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
|
||||
return BUFFER_LOAD_FORMAT(3, false, true, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
|
||||
return BUFFER_LOAD_FORMAT(4, false, true, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORD:
|
||||
return BUFFER_LOAD_FORMAT(1, false, false, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORDX2:
|
||||
return BUFFER_LOAD_FORMAT(2, false, false, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORDX3:
|
||||
return BUFFER_LOAD_FORMAT(3, false, false, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORDX4:
|
||||
return BUFFER_LOAD_FORMAT(4, false, false, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
case Opcode::BUFFER_STORE_DWORD:
|
||||
return BUFFER_STORE_FORMAT(1, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX2:
|
||||
return BUFFER_STORE_FORMAT(2, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX3:
|
||||
return BUFFER_STORE_FORMAT(3, false, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||
case Opcode::BUFFER_STORE_DWORDX4:
|
||||
return BUFFER_STORE_FORMAT(4, false, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
const auto flags = ImageResFlags(inst.control.mimg.dmask);
|
||||
const bool has_mips = flags.test(ImageResComponent::MipCount);
|
||||
const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code));
|
||||
|
@ -157,7 +244,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
|||
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||
info.explicit_lod.Assign(explicit_lod);
|
||||
// info.explicit_lod.Assign(explicit_lod);
|
||||
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
||||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
|
|
|
@ -12,16 +12,16 @@
|
|||
namespace Shader::IR {
|
||||
|
||||
template <typename Pred>
|
||||
auto BreadthFirstSearch(const Value& value, Pred&& pred)
|
||||
-> std::invoke_result_t<Pred, const Inst*> {
|
||||
if (value.IsImmediate()) {
|
||||
// Nothing to do with immediates
|
||||
return std::nullopt;
|
||||
auto BreadthFirstSearch(const Inst* inst, Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> {
|
||||
// Most often case the instruction is the desired already.
|
||||
if (const std::optional result = pred(inst)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Breadth-first search visiting the right most arguments first
|
||||
boost::container::small_vector<const Inst*, 2> visited;
|
||||
std::queue<const Inst*> queue;
|
||||
queue.push(value.InstRecursive());
|
||||
queue.push(inst);
|
||||
|
||||
while (!queue.empty()) {
|
||||
// Pop one instruction from the queue
|
||||
|
@ -49,4 +49,14 @@ auto BreadthFirstSearch(const Value& value, Pred&& pred)
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
template <typename Pred>
|
||||
auto BreadthFirstSearch(const Value& value, Pred&& pred)
|
||||
-> std::invoke_result_t<Pred, const Inst*> {
|
||||
if (value.IsImmediate()) {
|
||||
// Nothing to do with immediates
|
||||
return std::nullopt;
|
||||
}
|
||||
return BreadthFirstSearch(value.InstRecursive(), pred);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -278,7 +278,7 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
|||
case 32:
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
case 64:
|
||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||
return Inst(Opcode::LoadSharedU64, offset);
|
||||
case 128:
|
||||
return Inst(Opcode::LoadSharedU128, offset);
|
||||
default:
|
||||
|
@ -373,6 +373,10 @@ U32 IREmitter::LaneId() {
|
|||
return Inst<U32>(Opcode::LaneId);
|
||||
}
|
||||
|
||||
U32 IREmitter::WarpId() {
|
||||
return Inst<U32>(Opcode::WarpId);
|
||||
}
|
||||
|
||||
U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
|
||||
return Inst<U32>(Opcode::QuadShuffle, value, index);
|
||||
}
|
||||
|
@ -876,6 +880,10 @@ U1 IREmitter::FPIsInf(const F32F64& value) {
|
|||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::FPCmpClass32(const F32& value, const U32& op) {
|
||||
return Inst<U1>(Opcode::FPCmpClass32, value, op);
|
||||
}
|
||||
|
||||
U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
|
||||
if (lhs.Type() != rhs.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
||||
|
@ -1088,6 +1096,10 @@ U32 IREmitter::FindUMsb(const U32& value) {
|
|||
return Inst<U32>(Opcode::FindUMsb32, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::FindILsb(const U32& value) {
|
||||
return Inst<U32>(Opcode::FindILsb32, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SMin(const U32& a, const U32& b) {
|
||||
return Inst<U32>(Opcode::SMin32, a, b);
|
||||
}
|
||||
|
@ -1274,6 +1286,11 @@ U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
switch (value.Type()) {
|
||||
case Type::U16:
|
||||
return Inst<U32>(Opcode::ConvertU32U16, value);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -95,6 +95,7 @@ public:
|
|||
BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U32 LaneId();
|
||||
[[nodiscard]] U32 WarpId();
|
||||
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
|
||||
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
|
||||
|
@ -150,6 +151,7 @@ public:
|
|||
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPIsNan(const F32F64& value);
|
||||
[[nodiscard]] U1 FPIsInf(const F32F64& value);
|
||||
[[nodiscard]] U1 FPCmpClass32(const F32& value, const U32& op);
|
||||
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
|
||||
|
@ -179,6 +181,7 @@ public:
|
|||
|
||||
[[nodiscard]] U32 FindSMsb(const U32& value);
|
||||
[[nodiscard]] U32 FindUMsb(const U32& value);
|
||||
[[nodiscard]] U32 FindILsb(const U32& value);
|
||||
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
|
||||
|
|
|
@ -219,6 +219,7 @@ OPCODE(FPIsNan32, U1, F32,
|
|||
OPCODE(FPIsNan64, U1, F64, )
|
||||
OPCODE(FPIsInf32, U1, F32, )
|
||||
OPCODE(FPIsInf64, U1, F64, )
|
||||
OPCODE(FPCmpClass32, U1, F32, U32 )
|
||||
|
||||
// Integer operations
|
||||
OPCODE(IAdd32, U32, U32, U32, )
|
||||
|
@ -254,6 +255,7 @@ OPCODE(BitwiseNot32, U32, U32,
|
|||
|
||||
OPCODE(FindSMsb32, U32, U32, )
|
||||
OPCODE(FindUMsb32, U32, U32, )
|
||||
OPCODE(FindILsb32, U32, U32, )
|
||||
OPCODE(SMin32, U32, U32, U32, )
|
||||
OPCODE(UMin32, U32, U32, U32, )
|
||||
OPCODE(SMax32, U32, U32, U32, )
|
||||
|
@ -293,6 +295,7 @@ OPCODE(ConvertF64S32, F64, U32,
|
|||
OPCODE(ConvertF64U32, F64, U32, )
|
||||
OPCODE(ConvertF32U16, F32, U16, )
|
||||
OPCODE(ConvertU16U32, U16, U32, )
|
||||
OPCODE(ConvertU32U16, U32, U16, )
|
||||
|
||||
// Image operations
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||
|
@ -323,4 +326,5 @@ OPCODE(ImageAtomicExchange32, U32, Opaq
|
|||
|
||||
// Warp operations
|
||||
OPCODE(LaneId, U32, )
|
||||
OPCODE(WarpId, U32, )
|
||||
OPCODE(QuadShuffle, U32, U32, U32 )
|
||||
|
|
|
@ -238,6 +238,18 @@ void FoldBooleanConvert(IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void FoldCmpClass(IR::Inst& inst) {
|
||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsNan32);
|
||||
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsInf32);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::IAdd32:
|
||||
|
@ -251,6 +263,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
return;
|
||||
case IR::Opcode::FPCmpClass32:
|
||||
FoldCmpClass(inst);
|
||||
return;
|
||||
case IR::Opcode::ShiftRightArithmetic32:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
|
||||
return;
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
|
@ -273,9 +272,18 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
|
|||
}
|
||||
|
||||
SharpLocation TrackSharp(const IR::Inst* inst) {
|
||||
while (inst->GetOpcode() == IR::Opcode::Phi) {
|
||||
inst = inst->Arg(0).InstRecursive();
|
||||
}
|
||||
// Search until we find a potential sharp source.
|
||||
const auto pred0 = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData ||
|
||||
inst->GetOpcode() == IR::Opcode::ReadConst) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(inst, pred0);
|
||||
ASSERT_MSG(result, "Unable to track sharp source");
|
||||
inst = result.value();
|
||||
// If its from user data not much else to do.
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return SharpLocation{
|
||||
.sgpr_base = u32(IR::ScalarReg::Max),
|
||||
|
@ -289,14 +297,14 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive();
|
||||
|
||||
// Retrieve SGPR pair that holds sbase
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
|
||||
const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst->Arg(0).ScalarReg();
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred);
|
||||
const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred);
|
||||
const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred1);
|
||||
const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred1);
|
||||
ASSERT_MSG(base0 && base1, "Nested resource loads not supported");
|
||||
|
||||
// Return retrieved location.
|
||||
|
@ -456,36 +464,26 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
|||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
std::deque<IR::Inst*> insts{&inst};
|
||||
const auto& pred = [](auto opcode) -> bool {
|
||||
return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData);
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
IR::Inst* producer{};
|
||||
while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) {
|
||||
for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) {
|
||||
const auto arg = producer->Arg(arg_idx);
|
||||
if (arg.TryInstRecursive()) {
|
||||
insts.push_back(arg.InstRecursive());
|
||||
}
|
||||
}
|
||||
insts.pop_front();
|
||||
}
|
||||
ASSERT(pred(producer->GetOpcode()));
|
||||
auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
||||
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
||||
return std::make_pair(producer->Arg(0).InstRecursive(),
|
||||
producer->Arg(1).InstRecursive());
|
||||
}
|
||||
return std::make_pair(producer, nullptr);
|
||||
}();
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle);
|
||||
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sgpr_base = tsharp.sgpr_base,
|
||||
.dword_offset = tsharp.dword_offset,
|
||||
|
@ -496,17 +494,32 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
});
|
||||
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
if (ssharp_handle) {
|
||||
const u32 sampler_binding = [&] {
|
||||
if (!has_sampler) {
|
||||
return 0U;
|
||||
}
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
return descriptors.Add(SamplerResource{
|
||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||
.dword_offset = 0,
|
||||
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
|
||||
});
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud);
|
||||
const u32 sampler_binding = descriptors.Add(SamplerResource{
|
||||
return descriptors.Add(SamplerResource{
|
||||
.sgpr_base = ssharp.sgpr_base,
|
||||
.dword_offset = ssharp.dword_offset,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
image_binding |= (sampler_binding << 16);
|
||||
}
|
||||
}();
|
||||
image_binding |= (sampler_binding << 16);
|
||||
|
||||
// Patch image handle
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
@ -607,7 +620,7 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||
|
|
|
@ -20,11 +20,19 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::LoadSharedU8:
|
||||
case IR::Opcode::WriteSharedU8:
|
||||
info.uses_shared_u8 = true;
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedS16:
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
info.uses_shared_u16 = true;
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::ConvertF32F16:
|
||||
case IR::Opcode::BitCastF16U16:
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/enum.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
|
@ -24,6 +25,23 @@ enum class FpDenormMode : u32 {
|
|||
InOutAllow = 3,
|
||||
};
|
||||
|
||||
enum class FloatClassFunc : u32 {
|
||||
SignalingNan = 1 << 0,
|
||||
QuietNan = 1 << 1,
|
||||
NegativeInfinity = 1 << 2,
|
||||
NegativeNormal = 1 << 3,
|
||||
NegativeDenorm = 1 << 4,
|
||||
NegativeZero = 1 << 5,
|
||||
PositiveZero = 1 << 6,
|
||||
PositiveDenorm = 1 << 7,
|
||||
PositiveNormal = 1 << 8,
|
||||
PositiveInfinity = 1 << 9,
|
||||
|
||||
NaN = SignalingNan | QuietNan,
|
||||
Infinity = PositiveInfinity | NegativeInfinity,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc)
|
||||
|
||||
union Mode {
|
||||
BitField<0, 4, FpRoundMode> fp_round;
|
||||
BitField<4, 2, FpDenormMode> fp_denorm_single;
|
||||
|
|
|
@ -9,6 +9,7 @@ namespace Shader {
|
|||
|
||||
struct Profile {
|
||||
u32 supported_spirv{0x00010000};
|
||||
u32 subgroup_size{};
|
||||
bool unified_descriptor_binding{};
|
||||
bool support_descriptor_aliasing{};
|
||||
bool support_int8{};
|
||||
|
|
|
@ -28,7 +28,8 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
|||
}
|
||||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> token, const Info&& info) {
|
||||
std::span<const u32> token, const Info&& info,
|
||||
const Profile& profile) {
|
||||
// Ensure first instruction is expected.
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||
|
@ -49,7 +50,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
|
||||
// Structurize control flow graph and create program.
|
||||
program.info = std::move(info);
|
||||
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
|
||||
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile);
|
||||
program.blocks = GenerateBlocks(program.syntax_list);
|
||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
||||
|
@ -60,9 +61,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||
Shader::Optimization::CollectShaderInfoPass(program);
|
||||
|
||||
fmt::print("Post passes\n\n{}\n", Shader::IR::DumpProgram(program));
|
||||
std::fflush(stdout);
|
||||
LOG_INFO(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
|
@ -9,8 +9,11 @@
|
|||
|
||||
namespace Shader {
|
||||
|
||||
struct Profile;
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> code, const Info&& info);
|
||||
std::span<const u32> code, const Info&& info,
|
||||
const Profile& profile);
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -97,8 +97,11 @@ using ImageResourceList = boost::container::static_vector<ImageResource, 16>;
|
|||
struct SamplerResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
AmdGpu::Sampler inline_sampler{};
|
||||
u32 associated_image : 4;
|
||||
u32 disable_aniso : 1;
|
||||
|
||||
constexpr AmdGpu::Sampler GetSsharp(const Info& info) const noexcept;
|
||||
};
|
||||
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;
|
||||
|
||||
|
@ -175,6 +178,7 @@ struct Info {
|
|||
bool has_image_gather{};
|
||||
bool has_image_query{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_shared{};
|
||||
bool uses_shared_u8{};
|
||||
bool uses_shared_u16{};
|
||||
bool uses_fp16{};
|
||||
|
@ -196,6 +200,10 @@ constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexc
|
|||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Sampler SamplerResource::GetSsharp(const Info& info) const noexcept {
|
||||
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
template <>
|
||||
|
|
|
@ -403,9 +403,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||
}
|
||||
while (!wait_reg_mem->Test()) {
|
||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -506,9 +508,11 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
while (!wait_reg_mem->Test()) {
|
||||
mapped_queues[vqid].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(acb_task_name);
|
||||
regs.cs_program = mapped_queues[vqid].cs_state;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -529,7 +533,6 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||
}
|
||||
|
||||
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||
static constexpr u32 GfxQueueId = 0u;
|
||||
auto& queue = mapped_queues[GfxQueueId];
|
||||
|
||||
auto task = ProcessGraphics(dcb, ccb);
|
||||
|
|
|
@ -36,6 +36,7 @@ namespace AmdGpu {
|
|||
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
|
||||
|
||||
struct Liverpool {
|
||||
static constexpr u32 GfxQueueId = 0u;
|
||||
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
||||
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
||||
static constexpr u32 NumQueuesPerPipe = 8u;
|
||||
|
@ -1061,6 +1062,7 @@ private:
|
|||
struct GpuQueue {
|
||||
std::mutex m_access{};
|
||||
std::queue<Task::Handle> submits{};
|
||||
ComputeProgram cs_state{};
|
||||
};
|
||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||
|
||||
|
|
|
@ -7,6 +7,77 @@
|
|||
|
||||
namespace AmdGpu {
|
||||
|
||||
std::string_view NameOf(DataFormat fmt) {
|
||||
switch (fmt) {
|
||||
case DataFormat::FormatInvalid:
|
||||
return "FormatInvalid";
|
||||
case DataFormat::Format8:
|
||||
return "Format8";
|
||||
case DataFormat::Format16:
|
||||
return "Format16";
|
||||
case DataFormat::Format8_8:
|
||||
return "Format8_8";
|
||||
case DataFormat::Format32:
|
||||
return "Format32";
|
||||
case DataFormat::Format16_16:
|
||||
return "Format16_16";
|
||||
case DataFormat::Format10_11_11:
|
||||
return "Format10_11_11";
|
||||
case DataFormat::Format11_11_10:
|
||||
return "Format11_11_10";
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return "Format10_10_10_2";
|
||||
case DataFormat::Format2_10_10_10:
|
||||
return "Format2_10_10_10";
|
||||
case DataFormat::Format8_8_8_8:
|
||||
return "Format8_8_8_8";
|
||||
case DataFormat::Format32_32:
|
||||
return "Format32_32";
|
||||
case DataFormat::Format16_16_16_16:
|
||||
return "Format16_16_16_16";
|
||||
case DataFormat::Format32_32_32:
|
||||
return "Format32_32_32";
|
||||
case DataFormat::Format32_32_32_32:
|
||||
return "Format32_32_32_32";
|
||||
case DataFormat::Format5_6_5:
|
||||
return "Format5_6_5";
|
||||
case DataFormat::Format1_5_5_5:
|
||||
return "Format1_5_5_5";
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return "Format5_5_5_1";
|
||||
case DataFormat::Format4_4_4_4:
|
||||
return "Format4_4_4_4";
|
||||
case DataFormat::Format8_24:
|
||||
return "Format8_24";
|
||||
case DataFormat::Format24_8:
|
||||
return "Format24_8";
|
||||
case DataFormat::FormatX24_8_32:
|
||||
return "FormatX24_8_32";
|
||||
case DataFormat::FormatGB_GR:
|
||||
return "FormatGB_GR";
|
||||
case DataFormat::FormatBG_RG:
|
||||
return "FormatBG_RG";
|
||||
case DataFormat::Format5_9_9_9:
|
||||
return "Format5_9_9_9";
|
||||
case DataFormat::FormatBc1:
|
||||
return "FormatBc1";
|
||||
case DataFormat::FormatBc2:
|
||||
return "FormatBc2";
|
||||
case DataFormat::FormatBc3:
|
||||
return "FormatBc3";
|
||||
case DataFormat::FormatBc4:
|
||||
return "FormatBc4";
|
||||
case DataFormat::FormatBc5:
|
||||
return "FormatBc5";
|
||||
case DataFormat::FormatBc6:
|
||||
return "FormatBc6";
|
||||
case DataFormat::FormatBc7:
|
||||
return "FormatBc7";
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
std::string_view NameOf(NumberFormat fmt) {
|
||||
switch (fmt) {
|
||||
case NumberFormat::Unorm:
|
||||
|
|
|
@ -61,6 +61,7 @@ enum class NumberFormat : u32 {
|
|||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
||||
|
||||
int NumComponents(DataFormat format);
|
||||
|
@ -70,6 +71,16 @@ s32 ComponentOffset(DataFormat format, u32 comp);
|
|||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::DataFormat> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(AmdGpu::DataFormat fmt, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<AmdGpu::NumberFormat> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
|
|
|
@ -75,7 +75,7 @@ struct Buffer {
|
|||
static_assert(sizeof(Buffer) == 16); // 128bits
|
||||
|
||||
enum class ImageType : u64 {
|
||||
Buffer = 0,
|
||||
Invalid = 0,
|
||||
Color1D = 8,
|
||||
Color2D = 9,
|
||||
Color3D = 10,
|
||||
|
@ -88,8 +88,8 @@ enum class ImageType : u64 {
|
|||
|
||||
constexpr std::string_view NameOf(ImageType type) {
|
||||
switch (type) {
|
||||
case ImageType::Buffer:
|
||||
return "Buffer";
|
||||
case ImageType::Invalid:
|
||||
return "Invalid";
|
||||
case ImageType::Color1D:
|
||||
return "Color1D";
|
||||
case ImageType::Color2D:
|
||||
|
@ -179,6 +179,40 @@ struct Image {
|
|||
return base_address << 8;
|
||||
}
|
||||
|
||||
u32 DstSelect() const {
|
||||
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
||||
}
|
||||
|
||||
static char SelectComp(u32 sel) {
|
||||
switch (sel) {
|
||||
case 0:
|
||||
return '0';
|
||||
case 1:
|
||||
return '1';
|
||||
case 4:
|
||||
return 'R';
|
||||
case 5:
|
||||
return 'G';
|
||||
case 6:
|
||||
return 'B';
|
||||
case 7:
|
||||
return 'A';
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
std::string DstSelectName() const {
|
||||
std::string result = "[";
|
||||
u32 dst_sel = DstSelect();
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
result += SelectComp(dst_sel & 7);
|
||||
dst_sel >>= 3;
|
||||
}
|
||||
result += ']';
|
||||
return result;
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
return pitch + 1;
|
||||
}
|
||||
|
@ -290,6 +324,7 @@ enum class BorderColor : u64 {
|
|||
// Table 8.12 Sampler Resource Definition
|
||||
struct Sampler {
|
||||
union {
|
||||
u64 raw0;
|
||||
BitField<0, 3, ClampMode> clamp_x;
|
||||
BitField<3, 3, ClampMode> clamp_y;
|
||||
BitField<6, 3, ClampMode> clamp_z;
|
||||
|
@ -309,6 +344,7 @@ struct Sampler {
|
|||
BitField<60, 4, u64> perf_z;
|
||||
};
|
||||
union {
|
||||
u64 raw1;
|
||||
BitField<0, 14, u64> lod_bias;
|
||||
BitField<14, 6, u64> lod_bias_sec;
|
||||
BitField<20, 2, Filter> xy_mag_filter;
|
||||
|
@ -323,6 +359,10 @@ struct Sampler {
|
|||
BitField<62, 2, BorderColor> border_color_type;
|
||||
};
|
||||
|
||||
operator bool() const noexcept {
|
||||
return raw0 != 0 || raw1 != 0;
|
||||
}
|
||||
|
||||
float LodBias() const noexcept {
|
||||
return static_cast<float>(static_cast<int16_t>((lod_bias.Value() ^ 0x2000u) - 0x2000u)) /
|
||||
256.0f;
|
||||
|
|
|
@ -297,6 +297,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eBc3UnormBlock,
|
||||
vk::Format::eBc4UnormBlock,
|
||||
vk::Format::eBc5UnormBlock,
|
||||
vk::Format::eBc5SnormBlock,
|
||||
vk::Format::eBc7SrgbBlock,
|
||||
vk::Format::eBc7UnormBlock,
|
||||
vk::Format::eD16Unorm,
|
||||
|
@ -308,6 +309,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eR8G8B8A8Srgb,
|
||||
vk::Format::eR8G8B8A8Uint,
|
||||
vk::Format::eR8G8B8A8Unorm,
|
||||
vk::Format::eR8G8B8A8Snorm,
|
||||
vk::Format::eR8G8B8A8Uscaled,
|
||||
vk::Format::eR8G8Snorm,
|
||||
vk::Format::eR8G8Uint,
|
||||
|
@ -335,6 +337,10 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eR32Sfloat,
|
||||
vk::Format::eR32Sint,
|
||||
vk::Format::eR32Uint,
|
||||
vk::Format::eBc6HUfloatBlock,
|
||||
vk::Format::eR16G16Unorm,
|
||||
vk::Format::eR16G16B16A16Sscaled,
|
||||
vk::Format::eR16G16Sscaled,
|
||||
};
|
||||
return formats;
|
||||
}
|
||||
|
@ -384,10 +390,17 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc5UnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eBc5SnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16B16A16Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sscaled) {
|
||||
return vk::Format::eR16G16B16A16Sscaled;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR16G16Sfloat;
|
||||
|
@ -496,6 +509,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sscaled) {
|
||||
return vk::Format::eR16G16Sscaled;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Uscaled) {
|
||||
return vk::Format::eR8G8B8A8Uscaled;
|
||||
|
@ -518,6 +535,13 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::SnormNz) {
|
||||
return vk::Format::eR16G16B16A16Snorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eR8G8B8A8Snorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc6HUfloatBlock;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : info.samplers) {
|
||||
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||
const auto ssharp = sampler.GetSsharp(info);
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -386,7 +386,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : stage.samplers) {
|
||||
auto ssharp = stage.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||
auto ssharp = sampler.GetSsharp(stage);
|
||||
if (sampler.disable_aniso) {
|
||||
const auto& tsharp = tsharps[sampler.associated_image];
|
||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||
|
|
|
@ -164,10 +164,11 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceVulkan13Features,
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR,
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
|
||||
const vk::StructureChain properties_chain =
|
||||
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
|
||||
vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
|
||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>();
|
||||
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
|
||||
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
|
||||
|
||||
features = feature_chain.get().features;
|
||||
if (available_extensions.empty()) {
|
||||
|
@ -261,6 +262,7 @@ bool Instance::CreateDevice() {
|
|||
.shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats,
|
||||
.shaderStorageImageMultisample = features.shaderStorageImageMultisample,
|
||||
.shaderClipDistance = features.shaderClipDistance,
|
||||
.shaderInt64 = features.shaderInt64,
|
||||
.shaderInt16 = features.shaderInt16,
|
||||
},
|
||||
},
|
||||
|
|
|
@ -188,6 +188,11 @@ public:
|
|||
return properties.limits.nonCoherentAtomSize;
|
||||
}
|
||||
|
||||
/// Returns the subgroup size of the selected physical device.
|
||||
u32 SubgroupSize() const {
|
||||
return subgroup_size;
|
||||
}
|
||||
|
||||
/// Returns the maximum supported elements in a texel buffer
|
||||
u32 MaxTexelBufferElements() const {
|
||||
return properties.limits.maxTexelBufferElements;
|
||||
|
@ -249,6 +254,7 @@ private:
|
|||
bool workgroup_memory_explicit_layout{};
|
||||
bool color_write_en{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
bool debug_utils_supported{};
|
||||
bool has_nsight_graphics{};
|
||||
|
|
|
@ -109,6 +109,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
@ -268,7 +269,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
info.pgm_hash = hash;
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
programs[i] =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||
|
@ -308,7 +310,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
auto program =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
u32 binding{};
|
||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||
pipeline_cache{instance, scheduler, liverpool},
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} {
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 2_GB, BufferType::Upload} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
|
@ -128,6 +128,7 @@ void Rasterizer::BeginRendering() {
|
|||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
|
||||
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
|
||||
state.color_images[state.num_color_attachments] = image.image;
|
||||
state.color_attachments[state.num_color_attachments++] = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
|
@ -152,6 +153,7 @@ void Rasterizer::BeginRendering() {
|
|||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
state.depth_image = image.image;
|
||||
state.depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = image.layout,
|
||||
|
|
|
@ -50,7 +50,32 @@ void Scheduler::EndRendering() {
|
|||
return;
|
||||
}
|
||||
is_rendering = false;
|
||||
boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers;
|
||||
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
|
||||
barriers.push_back(vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = render_state.color_images[i],
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
}
|
||||
current_cmdbuf.endRendering();
|
||||
if (!barriers.empty()) {
|
||||
current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eFragmentShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::Flush(SubmitInfo& info) {
|
||||
|
|
|
@ -15,7 +15,9 @@ class Instance;
|
|||
|
||||
struct RenderState {
|
||||
std::array<vk::RenderingAttachmentInfo, 8> color_attachments{};
|
||||
std::array<vk::Image, 8> color_images{};
|
||||
vk::RenderingAttachmentInfo depth_attachment{};
|
||||
vk::Image depth_image{};
|
||||
u32 num_color_attachments{};
|
||||
u32 num_depth_attachments{};
|
||||
u32 width = std::numeric_limits<u32>::max();
|
||||
|
|
|
@ -47,6 +47,20 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
|
|||
}
|
||||
}
|
||||
|
||||
bool IsIdentityMapping(u32 dst_sel, u32 num_components) {
|
||||
return (num_components == 1 && dst_sel == 0b100) ||
|
||||
(num_components == 2 && dst_sel == 0b101'100) ||
|
||||
(num_components == 3 && dst_sel == 0b110'101'100) ||
|
||||
(num_components == 4 && dst_sel == 0b111'110'101'100);
|
||||
}
|
||||
|
||||
vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
|
||||
if (format == vk::Format::eR8G8B8A8Unorm && dst_sel == 0b111100101110) {
|
||||
return vk::Format::eB8G8R8A8Unorm;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept
|
||||
: is_storage{is_storage} {
|
||||
type = ConvertImageViewType(image.GetType());
|
||||
|
@ -60,9 +74,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexce
|
|||
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
|
||||
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
||||
// Check for unfortunate case of storage images being swizzled
|
||||
if (is_storage && (mapping != vk::ComponentMapping{})) {
|
||||
LOG_ERROR(Render_Vulkan, "Storage image requires swizzling");
|
||||
const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt());
|
||||
const u32 dst_sel = image.DstSelect();
|
||||
if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) {
|
||||
mapping = vk::ComponentMapping{};
|
||||
if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) {
|
||||
format = new_format;
|
||||
return;
|
||||
}
|
||||
LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps,
|
||||
image.DstSelectName());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ struct ImageViewInfo {
|
|||
|
||||
struct Image;
|
||||
|
||||
constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0};
|
||||
|
||||
struct ImageView {
|
||||
explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image,
|
||||
ImageId image_id, std::optional<vk::ImageUsageFlags> usage_override = {});
|
||||
|
|
|
@ -142,14 +142,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
|
|||
image_ids.push_back(image_id);
|
||||
});
|
||||
|
||||
ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
|
||||
// ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
|
||||
|
||||
ImageId image_id{};
|
||||
if (image_ids.empty()) {
|
||||
image_id = slot_images.insert(instance, scheduler, info);
|
||||
RegisterImage(image_id);
|
||||
} else {
|
||||
image_id = image_ids[0];
|
||||
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
|
||||
}
|
||||
|
||||
Image& image = slot_images[image_id];
|
||||
|
@ -183,12 +183,17 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
|||
}
|
||||
|
||||
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
||||
if (info.guest_address == 0) [[unlikely]] {
|
||||
return slot_image_views[NULL_IMAGE_VIEW_ID];
|
||||
}
|
||||
|
||||
const ImageId image_id = FindImage(info);
|
||||
Image& image = slot_images[image_id];
|
||||
auto& usage = image.info.usage;
|
||||
|
||||
if (view_info.is_storage) {
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
|
||||
usage.storage = true;
|
||||
} else {
|
||||
const auto new_layout = image.info.IsDepthStencil()
|
||||
|
@ -206,7 +211,7 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo&
|
|||
view_info_tmp.range.extent.levels > image.info.resources.levels ||
|
||||
view_info_tmp.range.extent.layers > image.info.resources.layers) {
|
||||
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
LOG_DEBUG(Render_Vulkan,
|
||||
"Subresource range ({}~{},{}~{}) exceeds base image extents ({},{})",
|
||||
view_info_tmp.range.base.level, view_info_tmp.range.extent.levels,
|
||||
view_info_tmp.range.base.layer, view_info_tmp.range.extent.layers,
|
||||
|
@ -341,7 +346,7 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead);
|
||||
}
|
||||
|
||||
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||
|
|
Loading…
Reference in a new issue