Merge pull request #4168 from ReinUsesLisp/global-memory
gl_arb_decompiler: Use NV_shader_buffer_{load,store} on assembly shaders
This commit is contained in:
commit
61e4c0f83d
|
@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GlobalMemoryName(const GlobalMemoryBase& base) {
|
|
||||||
return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
class ARBDecompiler final {
|
class ARBDecompiler final {
|
||||||
public:
|
public:
|
||||||
explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||||
|
@ -199,6 +195,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void DefineGlobalMemory();
|
||||||
|
|
||||||
void DeclareHeader();
|
void DeclareHeader();
|
||||||
void DeclareVertex();
|
void DeclareVertex();
|
||||||
void DeclareGeometry();
|
void DeclareGeometry();
|
||||||
|
@ -228,6 +226,7 @@ private:
|
||||||
|
|
||||||
std::pair<std::string, std::size_t> BuildCoords(Operation);
|
std::pair<std::string, std::size_t> BuildCoords(Operation);
|
||||||
std::string BuildAoffi(Operation);
|
std::string BuildAoffi(Operation);
|
||||||
|
std::string GlobalMemoryPointer(const GmemNode& gmem);
|
||||||
void Exit();
|
void Exit();
|
||||||
|
|
||||||
std::string Assign(Operation);
|
std::string Assign(Operation);
|
||||||
|
@ -378,10 +377,8 @@ private:
|
||||||
std::string address;
|
std::string address;
|
||||||
std::string_view opname;
|
std::string_view opname;
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
|
address = GlobalMemoryPointer(*gmem);
|
||||||
Visit(gmem->GetBaseAddress()));
|
opname = "ATOM";
|
||||||
address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
|
|
||||||
opname = "ATOMB";
|
|
||||||
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||||
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
||||||
opname = "ATOMS";
|
opname = "ATOMS";
|
||||||
|
@ -456,9 +453,13 @@ private:
|
||||||
shader_source += '\n';
|
shader_source += '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string AllocTemporary() {
|
std::string AllocLongVectorTemporary() {
|
||||||
max_temporaries = std::max(max_temporaries, num_temporaries + 1);
|
max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
|
||||||
return fmt::format("T{}.x", num_temporaries++);
|
return fmt::format("L{}", num_long_temporaries++);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string AllocLongTemporary() {
|
||||||
|
return fmt::format("{}.x", AllocLongVectorTemporary());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string AllocVectorTemporary() {
|
std::string AllocVectorTemporary() {
|
||||||
|
@ -466,8 +467,13 @@ private:
|
||||||
return fmt::format("T{}", num_temporaries++);
|
return fmt::format("T{}", num_temporaries++);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string AllocTemporary() {
|
||||||
|
return fmt::format("{}.x", AllocVectorTemporary());
|
||||||
|
}
|
||||||
|
|
||||||
void ResetTemporaries() noexcept {
|
void ResetTemporaries() noexcept {
|
||||||
num_temporaries = 0;
|
num_temporaries = 0;
|
||||||
|
num_long_temporaries = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Device& device;
|
const Device& device;
|
||||||
|
@ -478,6 +484,11 @@ private:
|
||||||
std::size_t num_temporaries = 0;
|
std::size_t num_temporaries = 0;
|
||||||
std::size_t max_temporaries = 0;
|
std::size_t max_temporaries = 0;
|
||||||
|
|
||||||
|
std::size_t num_long_temporaries = 0;
|
||||||
|
std::size_t max_long_temporaries = 0;
|
||||||
|
|
||||||
|
std::map<GlobalMemoryBase, u32> global_memory_names;
|
||||||
|
|
||||||
std::string shader_source;
|
std::string shader_source;
|
||||||
|
|
||||||
static constexpr std::string_view ADD_F32 = "ADD.F32";
|
static constexpr std::string_view ADD_F32 = "ADD.F32";
|
||||||
|
@ -784,6 +795,8 @@ private:
|
||||||
ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||||
ShaderType stage, std::string_view identifier)
|
ShaderType stage, std::string_view identifier)
|
||||||
: device{device}, ir{ir}, registry{registry}, stage{stage} {
|
: device{device}, ir{ir}, registry{registry}, stage{stage} {
|
||||||
|
DefineGlobalMemory();
|
||||||
|
|
||||||
AddLine("TEMP RC;");
|
AddLine("TEMP RC;");
|
||||||
AddLine("TEMP FSWZA[4];");
|
AddLine("TEMP FSWZA[4];");
|
||||||
AddLine("TEMP FSWZB[4];");
|
AddLine("TEMP FSWZB[4];");
|
||||||
|
@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ARBDecompiler::DefineGlobalMemory() {
|
||||||
|
u32 binding = 0;
|
||||||
|
for (const auto& pair : ir.GetGlobalMemory()) {
|
||||||
|
const GlobalMemoryBase base = pair.first;
|
||||||
|
global_memory_names.emplace(base, binding);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ARBDecompiler::DeclareHeader() {
|
void ARBDecompiler::DeclareHeader() {
|
||||||
AddLine("!!NV{}5.0", HeaderStageName(stage));
|
AddLine("!!NV{}5.0", HeaderStageName(stage));
|
||||||
// Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
|
// Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
|
||||||
AddLine("OPTION NV_internal;");
|
AddLine("OPTION NV_internal;");
|
||||||
AddLine("OPTION NV_gpu_program_fp64;");
|
AddLine("OPTION NV_gpu_program_fp64;");
|
||||||
AddLine("OPTION NV_shader_storage_buffer;");
|
|
||||||
AddLine("OPTION NV_shader_thread_group;");
|
AddLine("OPTION NV_shader_thread_group;");
|
||||||
if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
|
if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
|
||||||
AddLine("OPTION NV_shader_thread_shuffle;");
|
AddLine("OPTION NV_shader_thread_shuffle;");
|
||||||
|
@ -951,11 +972,10 @@ void ARBDecompiler::DeclareLocalMemory() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARBDecompiler::DeclareGlobalMemory() {
|
void ARBDecompiler::DeclareGlobalMemory() {
|
||||||
u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
|
const std::size_t num_entries = ir.GetGlobalMemory().size();
|
||||||
for (const auto& pair : ir.GetGlobalMemory()) {
|
if (num_entries > 0) {
|
||||||
const auto& base = pair.first;
|
const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
|
||||||
AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
|
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
|
||||||
++binding;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -977,6 +997,9 @@ void ARBDecompiler::DeclareTemporaries() {
|
||||||
for (std::size_t i = 0; i < max_temporaries; ++i) {
|
for (std::size_t i = 0; i < max_temporaries; ++i) {
|
||||||
AddLine("TEMP T{};", i);
|
AddLine("TEMP T{};", i);
|
||||||
}
|
}
|
||||||
|
for (std::size_t i = 0; i < max_long_temporaries; ++i) {
|
||||||
|
AddLine("LONG TEMP L{};", i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARBDecompiler::DeclarePredicates() {
|
void ARBDecompiler::DeclarePredicates() {
|
||||||
|
@ -1339,10 +1362,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
|
||||||
|
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||||
std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
|
AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
|
||||||
Visit(gmem->GetBaseAddress()));
|
|
||||||
AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
|
|
||||||
temporary);
|
|
||||||
return temporary;
|
return temporary;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1419,6 +1439,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
|
||||||
return fmt::format(", offset({})", temporary);
|
return fmt::format(", offset({})", temporary);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
|
||||||
|
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
|
||||||
|
const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
|
||||||
|
|
||||||
|
const std::string pointer = AllocLongVectorTemporary();
|
||||||
|
std::string temporary = AllocTemporary();
|
||||||
|
|
||||||
|
const u32 local_index = binding / 2;
|
||||||
|
AddLine("PK64.U {}, c[{}];", pointer, local_index);
|
||||||
|
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
|
||||||
|
Visit(gmem.GetBaseAddress()));
|
||||||
|
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
|
||||||
|
AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
|
||||||
|
return fmt::format("{}.x", pointer);
|
||||||
|
}
|
||||||
|
|
||||||
void ARBDecompiler::Exit() {
|
void ARBDecompiler::Exit() {
|
||||||
if (stage != ShaderType::Fragment) {
|
if (stage != ShaderType::Fragment) {
|
||||||
AddLine("RET;");
|
AddLine("RET;");
|
||||||
|
@ -1515,11 +1551,7 @@ std::string ARBDecompiler::Assign(Operation operation) {
|
||||||
ResetTemporaries();
|
ResetTemporaries();
|
||||||
return {};
|
return {};
|
||||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||||
const std::string temporary = AllocTemporary();
|
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
|
||||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
|
|
||||||
Visit(gmem->GetBaseAddress()));
|
|
||||||
AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
|
|
||||||
temporary);
|
|
||||||
ResetTemporaries();
|
ResetTemporaries();
|
||||||
return {};
|
return {};
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
|
||||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||||
if (device.HasVertexBufferUnifiedMemory()) {
|
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,6 +139,18 @@ void oglEnable(GLenum cap, bool state) {
|
||||||
(state ? glEnable : glDisable)(cap);
|
(state ? glEnable : glDisable)(cap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
|
||||||
|
if (num_entries == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (num_entries % 2 == 1) {
|
||||||
|
pointers[num_entries] = 0;
|
||||||
|
}
|
||||||
|
const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
|
||||||
|
glProgramLocalParametersI4uivNV(target, 0, num_vectors,
|
||||||
|
reinterpret_cast<const GLuint*>(pointers));
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
|
@ -324,7 +336,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||||
auto& gpu = system.GPU().Maxwell3D();
|
auto& gpu = system.GPU().Maxwell3D();
|
||||||
std::size_t num_ssbos = 0;
|
|
||||||
u32 clip_distances = 0;
|
u32 clip_distances = 0;
|
||||||
|
|
||||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||||
|
@ -347,29 +358,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Currently this stages are not supported in the OpenGL backend.
|
// Currently this stages are not supported in the OpenGL backend.
|
||||||
// Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
|
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
|
||||||
if (program == Maxwell::ShaderProgram::TesselationControl) {
|
if (program == Maxwell::ShaderProgram::TesselationControl ||
|
||||||
continue;
|
program == Maxwell::ShaderProgram::TesselationEval) {
|
||||||
} else if (program == Maxwell::ShaderProgram::TesselationEval) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader* shader = shader_cache.GetStageProgram(program, async_shaders);
|
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
|
||||||
|
|
||||||
if (device.UseAssemblyShaders()) {
|
|
||||||
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
|
|
||||||
// all stages share the same bindings.
|
|
||||||
const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
|
|
||||||
ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
|
|
||||||
num_ssbos += num_stage_ssbos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stage indices are 0 - 5
|
|
||||||
const std::size_t stage = index == 0 ? 0 : index - 1;
|
|
||||||
SetupDrawConstBuffers(stage, shader);
|
|
||||||
SetupDrawGlobalMemory(stage, shader);
|
|
||||||
SetupDrawTextures(stage, shader);
|
|
||||||
SetupDrawImages(stage, shader);
|
|
||||||
|
|
||||||
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
||||||
switch (program) {
|
switch (program) {
|
||||||
|
@ -388,6 +383,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
shader_config.enable.Value(), shader_config.offset);
|
shader_config.enable.Value(), shader_config.offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stage indices are 0 - 5
|
||||||
|
const std::size_t stage = index == 0 ? 0 : index - 1;
|
||||||
|
SetupDrawConstBuffers(stage, shader);
|
||||||
|
SetupDrawGlobalMemory(stage, shader);
|
||||||
|
SetupDrawTextures(stage, shader);
|
||||||
|
SetupDrawImages(stage, shader);
|
||||||
|
|
||||||
// Workaround for Intel drivers.
|
// Workaround for Intel drivers.
|
||||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||||
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
||||||
|
@ -749,6 +751,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
current_cbuf = 0;
|
current_cbuf = 0;
|
||||||
|
|
||||||
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
||||||
|
program_manager.BindCompute(kernel->GetHandle());
|
||||||
|
|
||||||
SetupComputeTextures(kernel);
|
SetupComputeTextures(kernel);
|
||||||
SetupComputeImages(kernel);
|
SetupComputeImages(kernel);
|
||||||
|
|
||||||
|
@ -763,7 +767,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
buffer_cache.Unmap();
|
buffer_cache.Unmap();
|
||||||
|
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
program_manager.BindCompute(kernel->GetHandle());
|
|
||||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
}
|
}
|
||||||
|
@ -1023,40 +1026,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
||||||
|
static constexpr std::array TARGET_LUT = {
|
||||||
|
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||||
|
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||||
|
};
|
||||||
|
|
||||||
auto& gpu{system.GPU()};
|
auto& gpu{system.GPU()};
|
||||||
auto& memory_manager{gpu.MemoryManager()};
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
||||||
|
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||||
|
|
||||||
u32 binding =
|
std::array<GLuint64EXT, 32> pointers;
|
||||||
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
ASSERT(entries.size() < pointers.size());
|
||||||
for (const auto& entry : shader->GetEntries().global_memory_entries) {
|
|
||||||
|
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||||
|
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||||
|
for (const auto& entry : entries) {
|
||||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||||
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
|
||||||
const u32 size{memory_manager.Read<u32>(addr + 8)};
|
const u32 size{memory_manager.Read<u32>(addr + 8)};
|
||||||
SetupGlobalMemory(binding++, entry, gpu_addr, size);
|
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
if (assembly_shaders) {
|
||||||
|
UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||||
auto& gpu{system.GPU()};
|
auto& gpu{system.GPU()};
|
||||||
auto& memory_manager{gpu.MemoryManager()};
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
||||||
|
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||||
|
|
||||||
|
std::array<GLuint64EXT, 32> pointers;
|
||||||
|
ASSERT(entries.size() < pointers.size());
|
||||||
|
|
||||||
u32 binding = 0;
|
u32 binding = 0;
|
||||||
for (const auto& entry : kernel->GetEntries().global_memory_entries) {
|
for (const auto& entry : entries) {
|
||||||
const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||||
const auto gpu_addr{memory_manager.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
|
||||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
const u32 size{memory_manager.Read<u32>(addr + 8)};
|
||||||
SetupGlobalMemory(binding++, entry, gpu_addr, size);
|
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||||
GPUVAddr gpu_addr, std::size_t size) {
|
GPUVAddr gpu_addr, std::size_t size,
|
||||||
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
GLuint64EXT* pointer) {
|
||||||
|
const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
*pointer = info.address + info.offset;
|
||||||
|
} else {
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
||||||
|
|
|
@ -124,9 +124,9 @@ private:
|
||||||
/// Configures the current global memory entries to use for the kernel invocation.
|
/// Configures the current global memory entries to use for the kernel invocation.
|
||||||
void SetupComputeGlobalMemory(Shader* kernel);
|
void SetupComputeGlobalMemory(Shader* kernel);
|
||||||
|
|
||||||
/// Configures a constant buffer.
|
/// Configures a global memory buffer.
|
||||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||||
std::size_t size);
|
std::size_t size, GLuint64EXT* pointer);
|
||||||
|
|
||||||
/// Configures the current textures to use for the draw command.
|
/// Configures the current textures to use for the draw command.
|
||||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||||
|
|
|
@ -11,8 +11,30 @@
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
ProgramManager::ProgramManager(const Device& device) {
|
namespace {
|
||||||
use_assembly_programs = device.UseAssemblyShaders();
|
|
||||||
|
void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
|
||||||
|
if (current == old) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (current == 0) {
|
||||||
|
if (enabled) {
|
||||||
|
enabled = false;
|
||||||
|
glDisable(stage);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!enabled) {
|
||||||
|
enabled = true;
|
||||||
|
glEnable(stage);
|
||||||
|
}
|
||||||
|
glBindProgramARB(stage, current);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
ProgramManager::ProgramManager(const Device& device)
|
||||||
|
: use_assembly_programs{device.UseAssemblyShaders()} {
|
||||||
if (use_assembly_programs) {
|
if (use_assembly_programs) {
|
||||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||||
} else {
|
} else {
|
||||||
|
@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProgramManager::BindGraphicsPipeline() {
|
void ProgramManager::BindGraphicsPipeline() {
|
||||||
if (use_assembly_programs) {
|
if (!use_assembly_programs) {
|
||||||
UpdateAssemblyPrograms();
|
|
||||||
} else {
|
|
||||||
UpdateSourcePrograms();
|
UpdateSourcePrograms();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProgramManager::UpdateAssemblyPrograms() {
|
void ProgramManager::UseVertexShader(GLuint program) {
|
||||||
const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
|
if (use_assembly_programs) {
|
||||||
if (current == old) {
|
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if (current == 0) {
|
current_state.vertex = program;
|
||||||
if (enabled) {
|
}
|
||||||
enabled = false;
|
|
||||||
glDisable(stage);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!enabled) {
|
|
||||||
enabled = true;
|
|
||||||
glEnable(stage);
|
|
||||||
}
|
|
||||||
glBindProgramARB(stage, current);
|
|
||||||
};
|
|
||||||
|
|
||||||
update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
|
void ProgramManager::UseGeometryShader(GLuint program) {
|
||||||
update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
|
if (use_assembly_programs) {
|
||||||
old_state.geometry);
|
BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
|
||||||
update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
|
}
|
||||||
old_state.fragment);
|
current_state.geometry = program;
|
||||||
|
}
|
||||||
|
|
||||||
old_state = current_state;
|
void ProgramManager::UseFragmentShader(GLuint program) {
|
||||||
|
if (use_assembly_programs) {
|
||||||
|
BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
|
||||||
|
}
|
||||||
|
current_state.fragment = program;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProgramManager::UpdateSourcePrograms() {
|
void ProgramManager::UpdateSourcePrograms() {
|
||||||
|
|
|
@ -45,17 +45,9 @@ public:
|
||||||
/// Rewinds BindHostPipeline state changes.
|
/// Rewinds BindHostPipeline state changes.
|
||||||
void RestoreGuestPipeline();
|
void RestoreGuestPipeline();
|
||||||
|
|
||||||
void UseVertexShader(GLuint program) {
|
void UseVertexShader(GLuint program);
|
||||||
current_state.vertex = program;
|
void UseGeometryShader(GLuint program);
|
||||||
}
|
void UseFragmentShader(GLuint program);
|
||||||
|
|
||||||
void UseGeometryShader(GLuint program) {
|
|
||||||
current_state.geometry = program;
|
|
||||||
}
|
|
||||||
|
|
||||||
void UseFragmentShader(GLuint program) {
|
|
||||||
current_state.fragment = program;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct PipelineState {
|
struct PipelineState {
|
||||||
|
@ -64,9 +56,6 @@ private:
|
||||||
GLuint fragment = 0;
|
GLuint fragment = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Update NV_gpu_program5 programs.
|
|
||||||
void UpdateAssemblyPrograms();
|
|
||||||
|
|
||||||
/// Update GLSL programs.
|
/// Update GLSL programs.
|
||||||
void UpdateSourcePrograms();
|
void UpdateSourcePrograms();
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver
|
||||||
mapped_ptr = static_cast<u8*>(
|
mapped_ptr = static_cast<u8*>(
|
||||||
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||||
|
|
||||||
if (device.HasVertexBufferUnifiedMemory()) {
|
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue