video_core/shader: Refactor JIT-Engines into JitEngine
type (#7210)
This commit is contained in:
parent
db7b929e47
commit
83b329f6e1
|
@ -37,7 +37,7 @@ static inline u64 ComputeStructHash64(const T& data) noexcept {
|
||||||
* Combines the seed parameter with the provided hash, producing a new unique hash
|
* Combines the seed parameter with the provided hash, producing a new unique hash
|
||||||
* Implementation from: http://boost.sourceforge.net/doc/html/boost/hash_combine.html
|
* Implementation from: http://boost.sourceforge.net/doc/html/boost/hash_combine.html
|
||||||
*/
|
*/
|
||||||
inline u64 HashCombine(std::size_t seed, const u64 hash) {
|
inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -156,13 +156,11 @@ add_library(video_core STATIC
|
||||||
shader/shader.h
|
shader/shader.h
|
||||||
shader/shader_interpreter.cpp
|
shader/shader_interpreter.cpp
|
||||||
shader/shader_interpreter.h
|
shader/shader_interpreter.h
|
||||||
shader/shader_jit_a64.cpp
|
shader/shader_jit.cpp
|
||||||
|
shader/shader_jit.h
|
||||||
shader/shader_jit_a64_compiler.cpp
|
shader/shader_jit_a64_compiler.cpp
|
||||||
shader/shader_jit_a64.h
|
|
||||||
shader/shader_jit_a64_compiler.h
|
shader/shader_jit_a64_compiler.h
|
||||||
shader/shader_jit_x64.cpp
|
|
||||||
shader/shader_jit_x64_compiler.cpp
|
shader/shader_jit_x64_compiler.cpp
|
||||||
shader/shader_jit_x64.h
|
|
||||||
shader/shader_jit_x64_compiler.h
|
shader/shader_jit_x64_compiler.h
|
||||||
texture/etc1.cpp
|
texture/etc1.cpp
|
||||||
texture/etc1.h
|
texture/etc1.h
|
||||||
|
|
|
@ -13,17 +13,15 @@
|
||||||
#include "video_core/regs_shader.h"
|
#include "video_core/regs_shader.h"
|
||||||
#include "video_core/shader/shader.h"
|
#include "video_core/shader/shader.h"
|
||||||
#include "video_core/shader/shader_interpreter.h"
|
#include "video_core/shader/shader_interpreter.h"
|
||||||
#if CITRA_ARCH(x86_64)
|
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
#include "video_core/shader/shader_jit_x64.h"
|
#include "video_core/shader/shader_jit.h"
|
||||||
#elif CITRA_ARCH(arm64)
|
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
#include "video_core/shader/shader_jit_a64.h"
|
|
||||||
#endif
|
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
|
|
||||||
namespace Pica::Shader {
|
namespace Pica::Shader {
|
||||||
|
|
||||||
void OutputVertex::ValidateSemantics(const RasterizerRegs& regs) {
|
void OutputVertex::ValidateSemantics(const RasterizerRegs& regs) {
|
||||||
unsigned int num_attributes = regs.vs_output_total;
|
u32 num_attributes = regs.vs_output_total;
|
||||||
ASSERT(num_attributes <= 7);
|
ASSERT(num_attributes <= 7);
|
||||||
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
|
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
|
||||||
u32 output_register_map = regs.vs_output_attributes[attrib].raw;
|
u32 output_register_map = regs.vs_output_attributes[attrib].raw;
|
||||||
|
@ -54,7 +52,7 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
|
||||||
static_assert(sizeof(std::array<f24, 24>) == sizeof(ret),
|
static_assert(sizeof(std::array<f24, 24>) == sizeof(ret),
|
||||||
"Struct and array have different sizes.");
|
"Struct and array have different sizes.");
|
||||||
|
|
||||||
unsigned int num_attributes = regs.vs_output_total & 7;
|
u32 num_attributes = regs.vs_output_total & 7;
|
||||||
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
|
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
|
||||||
const auto output_register_map = regs.vs_output_attributes[attrib];
|
const auto output_register_map = regs.vs_output_attributes[attrib];
|
||||||
vertex_slots_overflow[output_register_map.map_x] = input.attr[attrib][0];
|
vertex_slots_overflow[output_register_map.map_x] = input.attr[attrib][0];
|
||||||
|
@ -65,7 +63,7 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
|
||||||
|
|
||||||
// The hardware takes the absolute and saturates vertex colors like this, *before* doing
|
// The hardware takes the absolute and saturates vertex colors like this, *before* doing
|
||||||
// interpolation
|
// interpolation
|
||||||
for (unsigned i = 0; i < 4; ++i) {
|
for (u32 i = 0; i < 4; ++i) {
|
||||||
float c = std::fabs(ret.color[i].ToFloat32());
|
float c = std::fabs(ret.color[i].ToFloat32());
|
||||||
ret.color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f);
|
ret.color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f);
|
||||||
}
|
}
|
||||||
|
@ -84,10 +82,10 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input) {
|
void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input) {
|
||||||
const unsigned max_attribute = config.max_input_attribute_index;
|
const u32 max_attribute = config.max_input_attribute_index;
|
||||||
|
|
||||||
for (unsigned attr = 0; attr <= max_attribute; ++attr) {
|
for (u32 attr = 0; attr <= max_attribute; ++attr) {
|
||||||
unsigned reg = config.GetRegisterForAttribute(attr);
|
u32 reg = config.GetRegisterForAttribute(attr);
|
||||||
registers.input[reg] = input.attr[attr];
|
registers.input[reg] = input.attr[attr];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -141,11 +139,9 @@ void GSUnitState::ConfigOutput(const ShaderRegs& config) {
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
|
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
|
||||||
|
|
||||||
#if CITRA_ARCH(x86_64)
|
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
static std::unique_ptr<JitX64Engine> jit_engine;
|
static std::unique_ptr<JitEngine> jit_engine;
|
||||||
#elif CITRA_ARCH(arm64)
|
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
static std::unique_ptr<JitA64Engine> jit_engine;
|
|
||||||
#endif
|
|
||||||
static InterpreterEngine interpreter_engine;
|
static InterpreterEngine interpreter_engine;
|
||||||
|
|
||||||
ShaderEngine* GetEngine() {
|
ShaderEngine* GetEngine() {
|
||||||
|
@ -153,7 +149,7 @@ ShaderEngine* GetEngine() {
|
||||||
// TODO(yuriks): Re-initialize on each change rather than being persistent
|
// TODO(yuriks): Re-initialize on each change rather than being persistent
|
||||||
if (VideoCore::g_shader_jit_enabled) {
|
if (VideoCore::g_shader_jit_enabled) {
|
||||||
if (jit_engine == nullptr) {
|
if (jit_engine == nullptr) {
|
||||||
jit_engine = std::make_unique<decltype(jit_engine)::element_type>();
|
jit_engine = std::make_unique<JitEngine>();
|
||||||
}
|
}
|
||||||
return jit_engine.get();
|
return jit_engine.get();
|
||||||
}
|
}
|
||||||
|
@ -164,7 +160,7 @@ ShaderEngine* GetEngine() {
|
||||||
|
|
||||||
void Shutdown() {
|
void Shutdown() {
|
||||||
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
jit_engine = nullptr;
|
jit_engine.reset();
|
||||||
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,8 +22,8 @@
|
||||||
|
|
||||||
namespace Pica::Shader {
|
namespace Pica::Shader {
|
||||||
|
|
||||||
constexpr unsigned MAX_PROGRAM_CODE_LENGTH = 4096;
|
constexpr u32 MAX_PROGRAM_CODE_LENGTH = 4096;
|
||||||
constexpr unsigned MAX_SWIZZLE_DATA_LENGTH = 4096;
|
constexpr u32 MAX_SWIZZLE_DATA_LENGTH = 4096;
|
||||||
using ProgramCode = std::array<u32, MAX_PROGRAM_CODE_LENGTH>;
|
using ProgramCode = std::array<u32, MAX_PROGRAM_CODE_LENGTH>;
|
||||||
using SwizzleData = std::array<u32, MAX_SWIZZLE_DATA_LENGTH>;
|
using SwizzleData = std::array<u32, MAX_SWIZZLE_DATA_LENGTH>;
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ struct AttributeBuffer {
|
||||||
private:
|
private:
|
||||||
friend class boost::serialization::access;
|
friend class boost::serialization::access;
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int file_version) {
|
void serialize(Archive& ar, const u32 file_version) {
|
||||||
ar& attr;
|
ar& attr;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -62,7 +62,7 @@ struct OutputVertex {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int) {
|
void serialize(Archive& ar, const u32) {
|
||||||
ar& pos;
|
ar& pos;
|
||||||
ar& quat;
|
ar& quat;
|
||||||
ar& color;
|
ar& color;
|
||||||
|
@ -113,7 +113,7 @@ struct GSEmitter {
|
||||||
private:
|
private:
|
||||||
friend class boost::serialization::access;
|
friend class boost::serialization::access;
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int file_version) {
|
void serialize(Archive& ar, const u32 file_version) {
|
||||||
ar& buffer;
|
ar& buffer;
|
||||||
ar& vertex_id;
|
ar& vertex_id;
|
||||||
ar& prim_emit;
|
ar& prim_emit;
|
||||||
|
@ -142,7 +142,7 @@ struct UnitState {
|
||||||
private:
|
private:
|
||||||
friend class boost::serialization::access;
|
friend class boost::serialization::access;
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int file_version) {
|
void serialize(Archive& ar, const u32 file_version) {
|
||||||
ar& input;
|
ar& input;
|
||||||
ar& temporary;
|
ar& temporary;
|
||||||
ar& output;
|
ar& output;
|
||||||
|
@ -158,15 +158,15 @@ struct UnitState {
|
||||||
|
|
||||||
GSEmitter* emitter_ptr;
|
GSEmitter* emitter_ptr;
|
||||||
|
|
||||||
static std::size_t InputOffset(int register_index) {
|
static std::size_t InputOffset(s32 register_index) {
|
||||||
return offsetof(UnitState, registers.input) + register_index * sizeof(Common::Vec4<f24>);
|
return offsetof(UnitState, registers.input) + register_index * sizeof(Common::Vec4<f24>);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::size_t OutputOffset(int register_index) {
|
static std::size_t OutputOffset(s32 register_index) {
|
||||||
return offsetof(UnitState, registers.output) + register_index * sizeof(Common::Vec4<f24>);
|
return offsetof(UnitState, registers.output) + register_index * sizeof(Common::Vec4<f24>);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::size_t TemporaryOffset(int register_index) {
|
static std::size_t TemporaryOffset(s32 register_index) {
|
||||||
return offsetof(UnitState, registers.temporary) +
|
return offsetof(UnitState, registers.temporary) +
|
||||||
register_index * sizeof(Common::Vec4<f24>);
|
register_index * sizeof(Common::Vec4<f24>);
|
||||||
}
|
}
|
||||||
|
@ -184,7 +184,7 @@ struct UnitState {
|
||||||
private:
|
private:
|
||||||
friend class boost::serialization::access;
|
friend class boost::serialization::access;
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int file_version) {
|
void serialize(Archive& ar, const u32 file_version) {
|
||||||
ar& registers;
|
ar& registers;
|
||||||
ar& conditional_code;
|
ar& conditional_code;
|
||||||
ar& address_registers;
|
ar& address_registers;
|
||||||
|
@ -207,7 +207,7 @@ struct GSUnitState : public UnitState {
|
||||||
private:
|
private:
|
||||||
friend class boost::serialization::access;
|
friend class boost::serialization::access;
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int file_version) {
|
void serialize(Archive& ar, const u32 file_version) {
|
||||||
ar& boost::serialization::base_object<UnitState>(*this);
|
ar& boost::serialization::base_object<UnitState>(*this);
|
||||||
ar& emitter;
|
ar& emitter;
|
||||||
}
|
}
|
||||||
|
@ -221,22 +221,22 @@ struct Uniforms {
|
||||||
std::array<bool, 16> b;
|
std::array<bool, 16> b;
|
||||||
std::array<Common::Vec4<u8>, 4> i;
|
std::array<Common::Vec4<u8>, 4> i;
|
||||||
|
|
||||||
static std::size_t GetFloatUniformOffset(unsigned index) {
|
static std::size_t GetFloatUniformOffset(u32 index) {
|
||||||
return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<f24>);
|
return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<f24>);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::size_t GetBoolUniformOffset(unsigned index) {
|
static std::size_t GetBoolUniformOffset(u32 index) {
|
||||||
return offsetof(Uniforms, b) + index * sizeof(bool);
|
return offsetof(Uniforms, b) + index * sizeof(bool);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::size_t GetIntUniformOffset(unsigned index) {
|
static std::size_t GetIntUniformOffset(u32 index) {
|
||||||
return offsetof(Uniforms, i) + index * sizeof(Common::Vec4<u8>);
|
return offsetof(Uniforms, i) + index * sizeof(Common::Vec4<u8>);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class boost::serialization::access;
|
friend class boost::serialization::access;
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int file_version) {
|
void serialize(Archive& ar, const u32 file_version) {
|
||||||
ar& f;
|
ar& f;
|
||||||
ar& b;
|
ar& b;
|
||||||
ar& i;
|
ar& i;
|
||||||
|
@ -251,7 +251,7 @@ struct ShaderSetup {
|
||||||
|
|
||||||
/// Data private to ShaderEngines
|
/// Data private to ShaderEngines
|
||||||
struct EngineData {
|
struct EngineData {
|
||||||
unsigned int entry_point;
|
u32 entry_point;
|
||||||
/// Used by the JIT, points to a compiled shader object.
|
/// Used by the JIT, points to a compiled shader object.
|
||||||
const void* cached_shader = nullptr;
|
const void* cached_shader = nullptr;
|
||||||
} engine_data;
|
} engine_data;
|
||||||
|
@ -288,7 +288,7 @@ private:
|
||||||
|
|
||||||
friend class boost::serialization::access;
|
friend class boost::serialization::access;
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int file_version) {
|
void serialize(Archive& ar, const u32 file_version) {
|
||||||
ar& uniforms;
|
ar& uniforms;
|
||||||
ar& program_code;
|
ar& program_code;
|
||||||
ar& swizzle_data;
|
ar& swizzle_data;
|
||||||
|
@ -307,7 +307,7 @@ public:
|
||||||
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once
|
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once
|
||||||
* per vertex, which would happen within the `Run` function).
|
* per vertex, which would happen within the `Run` function).
|
||||||
*/
|
*/
|
||||||
virtual void SetupBatch(ShaderSetup& setup, unsigned int entry_point) = 0;
|
virtual void SetupBatch(ShaderSetup& setup, u32 entry_point) = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs the currently setup shader.
|
* Runs the currently setup shader.
|
||||||
|
|
|
@ -3,27 +3,32 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include "common/arch.h"
|
#include "common/arch.h"
|
||||||
#if CITRA_ARCH(x86_64)
|
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "video_core/shader/shader.h"
|
#include "video_core/shader/shader.h"
|
||||||
#include "video_core/shader/shader_jit_x64.h"
|
#include "video_core/shader/shader_jit.h"
|
||||||
|
#if CITRA_ARCH(arm64)
|
||||||
|
#include "video_core/shader/shader_jit_a64_compiler.h"
|
||||||
|
#endif
|
||||||
|
#if CITRA_ARCH(x86_64)
|
||||||
#include "video_core/shader/shader_jit_x64_compiler.h"
|
#include "video_core/shader/shader_jit_x64_compiler.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Pica::Shader {
|
namespace Pica::Shader {
|
||||||
|
|
||||||
JitX64Engine::JitX64Engine() = default;
|
JitEngine::JitEngine() = default;
|
||||||
JitX64Engine::~JitX64Engine() = default;
|
JitEngine::~JitEngine() = default;
|
||||||
|
|
||||||
void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
|
void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
|
||||||
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
|
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
|
||||||
setup.engine_data.entry_point = entry_point;
|
setup.engine_data.entry_point = entry_point;
|
||||||
|
|
||||||
u64 code_hash = setup.GetProgramCodeHash();
|
const u64 code_hash = setup.GetProgramCodeHash();
|
||||||
u64 swizzle_hash = setup.GetSwizzleDataHash();
|
const u64 swizzle_hash = setup.GetSwizzleDataHash();
|
||||||
|
|
||||||
u64 cache_key = code_hash ^ swizzle_hash;
|
const u64 cache_key = Common::HashCombine(code_hash, swizzle_hash);
|
||||||
auto iter = cache.find(cache_key);
|
auto iter = cache.find(cache_key);
|
||||||
if (iter != cache.end()) {
|
if (iter != cache.end()) {
|
||||||
setup.engine_data.cached_shader = iter->second.get();
|
setup.engine_data.cached_shader = iter->second.get();
|
||||||
|
@ -37,7 +42,7 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
|
||||||
|
|
||||||
MICROPROFILE_DECLARE(GPU_Shader);
|
MICROPROFILE_DECLARE(GPU_Shader);
|
||||||
|
|
||||||
void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const {
|
void JitEngine::Run(const ShaderSetup& setup, UnitState& state) const {
|
||||||
ASSERT(setup.engine_data.cached_shader != nullptr);
|
ASSERT(setup.engine_data.cached_shader != nullptr);
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(GPU_Shader);
|
MICROPROFILE_SCOPE(GPU_Shader);
|
||||||
|
@ -48,4 +53,4 @@ void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const {
|
||||||
|
|
||||||
} // namespace Pica::Shader
|
} // namespace Pica::Shader
|
||||||
|
|
||||||
#endif // CITRA_ARCH(x86_64)
|
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
|
@ -5,7 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "common/arch.h"
|
#include "common/arch.h"
|
||||||
#if CITRA_ARCH(x86_64)
|
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
@ -16,12 +16,12 @@ namespace Pica::Shader {
|
||||||
|
|
||||||
class JitShader;
|
class JitShader;
|
||||||
|
|
||||||
class JitX64Engine final : public ShaderEngine {
|
class JitEngine final : public ShaderEngine {
|
||||||
public:
|
public:
|
||||||
JitX64Engine();
|
JitEngine();
|
||||||
~JitX64Engine() override;
|
~JitEngine() override;
|
||||||
|
|
||||||
void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
|
void SetupBatch(ShaderSetup& setup, u32 entry_point) override;
|
||||||
void Run(const ShaderSetup& setup, UnitState& state) const override;
|
void Run(const ShaderSetup& setup, UnitState& state) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -30,4 +30,4 @@ private:
|
||||||
|
|
||||||
} // namespace Pica::Shader
|
} // namespace Pica::Shader
|
||||||
|
|
||||||
#endif // CITRA_ARCH(x86_64)
|
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
|
@ -1,51 +0,0 @@
|
||||||
// Copyright 2023 Citra Emulator Project
|
|
||||||
// Licensed under GPLv2 or any later version
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#include "common/arch.h"
|
|
||||||
#if CITRA_ARCH(arm64)
|
|
||||||
|
|
||||||
#include "common/assert.h"
|
|
||||||
#include "common/microprofile.h"
|
|
||||||
#include "video_core/shader/shader.h"
|
|
||||||
#include "video_core/shader/shader_jit_a64.h"
|
|
||||||
#include "video_core/shader/shader_jit_a64_compiler.h"
|
|
||||||
|
|
||||||
namespace Pica::Shader {
|
|
||||||
|
|
||||||
JitA64Engine::JitA64Engine() = default;
|
|
||||||
JitA64Engine::~JitA64Engine() = default;
|
|
||||||
|
|
||||||
void JitA64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
|
|
||||||
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
|
|
||||||
setup.engine_data.entry_point = entry_point;
|
|
||||||
|
|
||||||
u64 code_hash = setup.GetProgramCodeHash();
|
|
||||||
u64 swizzle_hash = setup.GetSwizzleDataHash();
|
|
||||||
|
|
||||||
u64 cache_key = code_hash ^ swizzle_hash;
|
|
||||||
auto iter = cache.find(cache_key);
|
|
||||||
if (iter != cache.end()) {
|
|
||||||
setup.engine_data.cached_shader = iter->second.get();
|
|
||||||
} else {
|
|
||||||
auto shader = std::make_unique<JitShader>();
|
|
||||||
shader->Compile(&setup.program_code, &setup.swizzle_data);
|
|
||||||
setup.engine_data.cached_shader = shader.get();
|
|
||||||
cache.emplace_hint(iter, cache_key, std::move(shader));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MICROPROFILE_DECLARE(GPU_Shader);
|
|
||||||
|
|
||||||
void JitA64Engine::Run(const ShaderSetup& setup, UnitState& state) const {
|
|
||||||
ASSERT(setup.engine_data.cached_shader != nullptr);
|
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(GPU_Shader);
|
|
||||||
|
|
||||||
const JitShader* shader = static_cast<const JitShader*>(setup.engine_data.cached_shader);
|
|
||||||
shader->Run(setup, state, setup.engine_data.entry_point);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Pica::Shader
|
|
||||||
|
|
||||||
#endif // CITRA_ARCH(arm64)
|
|
|
@ -1,33 +0,0 @@
|
||||||
// Copyright 2023 Citra Emulator Project
|
|
||||||
// Licensed under GPLv2 or any later version
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "common/arch.h"
|
|
||||||
#if CITRA_ARCH(arm64)
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include "common/common_types.h"
|
|
||||||
#include "video_core/shader/shader.h"
|
|
||||||
|
|
||||||
namespace Pica::Shader {
|
|
||||||
|
|
||||||
class JitShader;
|
|
||||||
|
|
||||||
class JitA64Engine final : public ShaderEngine {
|
|
||||||
public:
|
|
||||||
JitA64Engine();
|
|
||||||
~JitA64Engine() override;
|
|
||||||
|
|
||||||
void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
|
|
||||||
void Run(const ShaderSetup& setup, UnitState& state) const override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Pica::Shader
|
|
||||||
|
|
||||||
#endif // CITRA_ARCH(arm64)
|
|
|
@ -163,7 +163,7 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {}
|
||||||
* @param src_reg SourceRegister object corresponding to the source register to load
|
* @param src_reg SourceRegister object corresponding to the source register to load
|
||||||
* @param dest Destination QReg register to store the loaded, swizzled source register
|
* @param dest Destination QReg register to store the loaded, swizzled source register
|
||||||
*/
|
*/
|
||||||
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
|
void JitShader::Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegister src_reg,
|
||||||
QReg dest) {
|
QReg dest) {
|
||||||
XReg src_ptr = XZR;
|
XReg src_ptr = XZR;
|
||||||
std::size_t src_offset;
|
std::size_t src_offset;
|
||||||
|
@ -855,7 +855,7 @@ void JitShader::Compile_SETE(Instruction instr) {
|
||||||
l(end);
|
l(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitShader::Compile_Block(unsigned end) {
|
void JitShader::Compile_Block(u32 end) {
|
||||||
while (program_counter < end) {
|
while (program_counter < end) {
|
||||||
Compile_NextInstr();
|
Compile_NextInstr();
|
||||||
}
|
}
|
||||||
|
@ -957,7 +957,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||||
BR(ABI_PARAM3);
|
BR(ABI_PARAM3);
|
||||||
|
|
||||||
// Compile entire program
|
// Compile entire program
|
||||||
Compile_Block(static_cast<unsigned>(program_code->size()));
|
Compile_Block(static_cast<u32>(program_code->size()));
|
||||||
|
|
||||||
// Free memory that's no longer needed
|
// Free memory that's no longer needed
|
||||||
program_code = nullptr;
|
program_code = nullptr;
|
||||||
|
|
|
@ -37,7 +37,7 @@ class JitShader : private oaknut::CodeBlock, public oaknut::CodeGenerator {
|
||||||
public:
|
public:
|
||||||
JitShader();
|
JitShader();
|
||||||
|
|
||||||
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
|
void Run(const ShaderSetup& setup, UnitState& state, u32 offset) const {
|
||||||
program(&setup.uniforms, &state, instruction_labels[offset].ptr<const std::byte*>());
|
program(&setup.uniforms, &state, instruction_labels[offset].ptr<const std::byte*>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,10 +75,10 @@ public:
|
||||||
void Compile_SETE(Instruction instr);
|
void Compile_SETE(Instruction instr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Compile_Block(unsigned end);
|
void Compile_Block(u32 end);
|
||||||
void Compile_NextInstr();
|
void Compile_NextInstr();
|
||||||
|
|
||||||
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
|
void Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegister src_reg,
|
||||||
oaknut::QReg dest);
|
oaknut::QReg dest);
|
||||||
void Compile_DestEnable(Instruction instr, oaknut::QReg dest);
|
void Compile_DestEnable(Instruction instr, oaknut::QReg dest);
|
||||||
|
|
||||||
|
@ -129,9 +129,9 @@ private:
|
||||||
std::vector<oaknut::Label> loop_break_labels;
|
std::vector<oaknut::Label> loop_break_labels;
|
||||||
|
|
||||||
/// Offsets in code where a return needs to be inserted
|
/// Offsets in code where a return needs to be inserted
|
||||||
std::vector<unsigned> return_offsets;
|
std::vector<u32> return_offsets;
|
||||||
|
|
||||||
unsigned program_counter = 0; ///< Offset of the next instruction to decode
|
u32 program_counter = 0; ///< Offset of the next instruction to decode
|
||||||
u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
|
u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
|
||||||
|
|
||||||
using CompiledShader = void(const void* setup, void* state, const std::byte* start_addr);
|
using CompiledShader = void(const void* setup, void* state, const std::byte* start_addr);
|
||||||
|
|
|
@ -187,7 +187,7 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {
|
||||||
* @param src_reg SourceRegister object corresponding to the source register to load
|
* @param src_reg SourceRegister object corresponding to the source register to load
|
||||||
* @param dest Destination XMM register to store the loaded, swizzled source register
|
* @param dest Destination XMM register to store the loaded, swizzled source register
|
||||||
*/
|
*/
|
||||||
void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
|
void JitShader::Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegister src_reg,
|
||||||
Xmm dest) {
|
Xmm dest) {
|
||||||
Reg64 src_ptr;
|
Reg64 src_ptr;
|
||||||
std::size_t src_offset;
|
std::size_t src_offset;
|
||||||
|
@ -213,13 +213,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
||||||
ASSERT_MSG(src_offset == static_cast<std::size_t>(src_offset_disp),
|
ASSERT_MSG(src_offset == static_cast<std::size_t>(src_offset_disp),
|
||||||
"Source register offset too large for int type");
|
"Source register offset too large for int type");
|
||||||
|
|
||||||
unsigned operand_desc_id;
|
u32 operand_desc_id;
|
||||||
|
|
||||||
const bool is_inverted =
|
const bool is_inverted =
|
||||||
(0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
|
(0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
|
||||||
|
|
||||||
unsigned address_register_index;
|
u32 address_register_index;
|
||||||
unsigned offset_src;
|
u32 offset_src;
|
||||||
|
|
||||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
|
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
|
||||||
instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
|
instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
|
||||||
|
@ -254,7 +254,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
||||||
|
|
||||||
// First we add 128 to address_reg so the first comparison is turned to
|
// First we add 128 to address_reg so the first comparison is turned to
|
||||||
// address_reg >= 0 && address_reg < 256 which can be performed with
|
// address_reg >= 0 && address_reg < 256 which can be performed with
|
||||||
// a single unsigned comparison (cmovb)
|
// a single u32 comparison (cmovb)
|
||||||
lea(eax, ptr[address_reg + 128]);
|
lea(eax, ptr[address_reg + 128]);
|
||||||
mov(ebx, src_reg.GetIndex());
|
mov(ebx, src_reg.GetIndex());
|
||||||
mov(ecx, address_reg.cvt32());
|
mov(ecx, address_reg.cvt32());
|
||||||
|
@ -297,7 +297,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
||||||
|
|
||||||
void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
|
void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
|
||||||
DestRegister dest;
|
DestRegister dest;
|
||||||
unsigned operand_desc_id;
|
u32 operand_desc_id;
|
||||||
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
|
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
|
||||||
instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
|
instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
|
||||||
operand_desc_id = instr.mad.operand_desc_id;
|
operand_desc_id = instr.mad.operand_desc_id;
|
||||||
|
@ -915,7 +915,7 @@ void JitShader::Compile_SETE(Instruction instr) {
|
||||||
L(end);
|
L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitShader::Compile_Block(unsigned end) {
|
void JitShader::Compile_Block(u32 end) {
|
||||||
while (program_counter < end) {
|
while (program_counter < end) {
|
||||||
Compile_NextInstr();
|
Compile_NextInstr();
|
||||||
}
|
}
|
||||||
|
@ -943,7 +943,7 @@ void JitShader::Compile_NextInstr() {
|
||||||
Instruction instr = {(*program_code)[program_counter++]};
|
Instruction instr = {(*program_code)[program_counter++]};
|
||||||
|
|
||||||
OpCode::Id opcode = instr.opcode.Value();
|
OpCode::Id opcode = instr.opcode.Value();
|
||||||
auto instr_func = instr_table[static_cast<unsigned>(opcode)];
|
auto instr_func = instr_table[static_cast<u32>(opcode)];
|
||||||
|
|
||||||
if (instr_func) {
|
if (instr_func) {
|
||||||
// JIT the instruction!
|
// JIT the instruction!
|
||||||
|
@ -1023,7 +1023,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||||
jmp(ABI_PARAM3);
|
jmp(ABI_PARAM3);
|
||||||
|
|
||||||
// Compile entire program
|
// Compile entire program
|
||||||
Compile_Block(static_cast<unsigned>(program_code->size()));
|
Compile_Block(static_cast<u32>(program_code->size()));
|
||||||
|
|
||||||
// Free memory that's no longer needed
|
// Free memory that's no longer needed
|
||||||
program_code = nullptr;
|
program_code = nullptr;
|
||||||
|
|
|
@ -36,7 +36,7 @@ class JitShader : public Xbyak::CodeGenerator {
|
||||||
public:
|
public:
|
||||||
JitShader();
|
JitShader();
|
||||||
|
|
||||||
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
|
void Run(const ShaderSetup& setup, UnitState& state, u32 offset) const {
|
||||||
program(&setup.uniforms, &state, instruction_labels[offset].getAddress());
|
program(&setup.uniforms, &state, instruction_labels[offset].getAddress());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,10 +74,10 @@ public:
|
||||||
void Compile_SETE(Instruction instr);
|
void Compile_SETE(Instruction instr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Compile_Block(unsigned end);
|
void Compile_Block(u32 end);
|
||||||
void Compile_NextInstr();
|
void Compile_NextInstr();
|
||||||
|
|
||||||
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
|
void Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegister src_reg,
|
||||||
Xbyak::Xmm dest);
|
Xbyak::Xmm dest);
|
||||||
void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest);
|
void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest);
|
||||||
|
|
||||||
|
@ -128,9 +128,9 @@ private:
|
||||||
std::vector<Xbyak::Label> loop_break_labels;
|
std::vector<Xbyak::Label> loop_break_labels;
|
||||||
|
|
||||||
/// Offsets in code where a return needs to be inserted
|
/// Offsets in code where a return needs to be inserted
|
||||||
std::vector<unsigned> return_offsets;
|
std::vector<u32> return_offsets;
|
||||||
|
|
||||||
unsigned program_counter = 0; ///< Offset of the next instruction to decode
|
u32 program_counter = 0; ///< Offset of the next instruction to decode
|
||||||
u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
|
u8 loop_depth = 0; ///< Depth of the (nested) loops currently compiled
|
||||||
|
|
||||||
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
|
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
|
||||||
|
|
Loading…
Reference in a new issue