gl_arb_decompiler: Use NV_shader_buffer_{load,store} on assembly shaders
NV_shader_buffer_{load,store} is a 2010 extension that allows GL applications to use what in Vulkan is known as physical pointers, this is basically C pointers. On GLASM these is exposed through the LOAD/STORE/ATOM instructions. Up until now, assembly shaders were using NV_shader_storage_buffer_object. These work fine, but have a (probably unintended) limitation that forces us to have the limit of a single stage for all shader stages. In contrast, with NV_shader_buffer_{load,store} we can pass GPU addresses to the shader through local parameters (GLASM equivalent uniform constants, or push constants on Vulkan). Local parameters have the advantage of being per stage, allowing us to generate code without worrying about binding overlaps.
This commit is contained in:
parent
90cbcaa44a
commit
a8a2526128
|
@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GlobalMemoryName(const GlobalMemoryBase& base) {
|
|
||||||
return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
class ARBDecompiler final {
|
class ARBDecompiler final {
|
||||||
public:
|
public:
|
||||||
explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||||
|
@ -199,6 +195,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void DefineGlobalMemory();
|
||||||
|
|
||||||
void DeclareHeader();
|
void DeclareHeader();
|
||||||
void DeclareVertex();
|
void DeclareVertex();
|
||||||
void DeclareGeometry();
|
void DeclareGeometry();
|
||||||
|
@ -228,6 +226,7 @@ private:
|
||||||
|
|
||||||
std::pair<std::string, std::size_t> BuildCoords(Operation);
|
std::pair<std::string, std::size_t> BuildCoords(Operation);
|
||||||
std::string BuildAoffi(Operation);
|
std::string BuildAoffi(Operation);
|
||||||
|
std::string GlobalMemoryPointer(const GmemNode& gmem);
|
||||||
void Exit();
|
void Exit();
|
||||||
|
|
||||||
std::string Assign(Operation);
|
std::string Assign(Operation);
|
||||||
|
@ -378,10 +377,8 @@ private:
|
||||||
std::string address;
|
std::string address;
|
||||||
std::string_view opname;
|
std::string_view opname;
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
|
address = GlobalMemoryPointer(*gmem);
|
||||||
Visit(gmem->GetBaseAddress()));
|
opname = "ATOM";
|
||||||
address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
|
|
||||||
opname = "ATOMB";
|
|
||||||
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||||
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
||||||
opname = "ATOMS";
|
opname = "ATOMS";
|
||||||
|
@ -456,9 +453,13 @@ private:
|
||||||
shader_source += '\n';
|
shader_source += '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string AllocTemporary() {
|
std::string AllocLongVectorTemporary() {
|
||||||
max_temporaries = std::max(max_temporaries, num_temporaries + 1);
|
max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
|
||||||
return fmt::format("T{}.x", num_temporaries++);
|
return fmt::format("L{}", num_long_temporaries++);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string AllocLongTemporary() {
|
||||||
|
return fmt::format("{}.x", AllocLongVectorTemporary());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string AllocVectorTemporary() {
|
std::string AllocVectorTemporary() {
|
||||||
|
@ -466,8 +467,13 @@ private:
|
||||||
return fmt::format("T{}", num_temporaries++);
|
return fmt::format("T{}", num_temporaries++);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string AllocTemporary() {
|
||||||
|
return fmt::format("{}.x", AllocVectorTemporary());
|
||||||
|
}
|
||||||
|
|
||||||
void ResetTemporaries() noexcept {
|
void ResetTemporaries() noexcept {
|
||||||
num_temporaries = 0;
|
num_temporaries = 0;
|
||||||
|
num_long_temporaries = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Device& device;
|
const Device& device;
|
||||||
|
@ -478,6 +484,11 @@ private:
|
||||||
std::size_t num_temporaries = 0;
|
std::size_t num_temporaries = 0;
|
||||||
std::size_t max_temporaries = 0;
|
std::size_t max_temporaries = 0;
|
||||||
|
|
||||||
|
std::size_t num_long_temporaries = 0;
|
||||||
|
std::size_t max_long_temporaries = 0;
|
||||||
|
|
||||||
|
std::map<GlobalMemoryBase, u32> global_memory_names;
|
||||||
|
|
||||||
std::string shader_source;
|
std::string shader_source;
|
||||||
|
|
||||||
static constexpr std::string_view ADD_F32 = "ADD.F32";
|
static constexpr std::string_view ADD_F32 = "ADD.F32";
|
||||||
|
@ -784,6 +795,8 @@ private:
|
||||||
ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||||
ShaderType stage, std::string_view identifier)
|
ShaderType stage, std::string_view identifier)
|
||||||
: device{device}, ir{ir}, registry{registry}, stage{stage} {
|
: device{device}, ir{ir}, registry{registry}, stage{stage} {
|
||||||
|
DefineGlobalMemory();
|
||||||
|
|
||||||
AddLine("TEMP RC;");
|
AddLine("TEMP RC;");
|
||||||
AddLine("TEMP FSWZA[4];");
|
AddLine("TEMP FSWZA[4];");
|
||||||
AddLine("TEMP FSWZB[4];");
|
AddLine("TEMP FSWZB[4];");
|
||||||
|
@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ARBDecompiler::DefineGlobalMemory() {
|
||||||
|
u32 binding = 0;
|
||||||
|
for (const auto& pair : ir.GetGlobalMemory()) {
|
||||||
|
const GlobalMemoryBase base = pair.first;
|
||||||
|
global_memory_names.emplace(base, binding);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ARBDecompiler::DeclareHeader() {
|
void ARBDecompiler::DeclareHeader() {
|
||||||
AddLine("!!NV{}5.0", HeaderStageName(stage));
|
AddLine("!!NV{}5.0", HeaderStageName(stage));
|
||||||
// Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
|
// Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
|
||||||
AddLine("OPTION NV_internal;");
|
AddLine("OPTION NV_internal;");
|
||||||
AddLine("OPTION NV_gpu_program_fp64;");
|
AddLine("OPTION NV_gpu_program_fp64;");
|
||||||
AddLine("OPTION NV_shader_storage_buffer;");
|
|
||||||
AddLine("OPTION NV_shader_thread_group;");
|
AddLine("OPTION NV_shader_thread_group;");
|
||||||
if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
|
if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
|
||||||
AddLine("OPTION NV_shader_thread_shuffle;");
|
AddLine("OPTION NV_shader_thread_shuffle;");
|
||||||
|
@ -951,11 +972,10 @@ void ARBDecompiler::DeclareLocalMemory() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARBDecompiler::DeclareGlobalMemory() {
|
void ARBDecompiler::DeclareGlobalMemory() {
|
||||||
u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
|
const std::size_t num_entries = ir.GetGlobalMemory().size();
|
||||||
for (const auto& pair : ir.GetGlobalMemory()) {
|
if (num_entries > 0) {
|
||||||
const auto& base = pair.first;
|
const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
|
||||||
AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
|
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
|
||||||
++binding;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -977,6 +997,9 @@ void ARBDecompiler::DeclareTemporaries() {
|
||||||
for (std::size_t i = 0; i < max_temporaries; ++i) {
|
for (std::size_t i = 0; i < max_temporaries; ++i) {
|
||||||
AddLine("TEMP T{};", i);
|
AddLine("TEMP T{};", i);
|
||||||
}
|
}
|
||||||
|
for (std::size_t i = 0; i < max_long_temporaries; ++i) {
|
||||||
|
AddLine("LONG TEMP L{};", i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARBDecompiler::DeclarePredicates() {
|
void ARBDecompiler::DeclarePredicates() {
|
||||||
|
@ -1339,10 +1362,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
|
||||||
|
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||||
std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
|
AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
|
||||||
Visit(gmem->GetBaseAddress()));
|
|
||||||
AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
|
|
||||||
temporary);
|
|
||||||
return temporary;
|
return temporary;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1419,6 +1439,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
|
||||||
return fmt::format(", offset({})", temporary);
|
return fmt::format(", offset({})", temporary);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
|
||||||
|
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
|
||||||
|
const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
|
||||||
|
|
||||||
|
const std::string pointer = AllocLongVectorTemporary();
|
||||||
|
std::string temporary = AllocTemporary();
|
||||||
|
|
||||||
|
const u32 local_index = binding / 2;
|
||||||
|
AddLine("PK64.U {}, c[{}];", pointer, local_index);
|
||||||
|
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
|
||||||
|
Visit(gmem.GetBaseAddress()));
|
||||||
|
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
|
||||||
|
AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
|
||||||
|
return fmt::format("{}.x", pointer);
|
||||||
|
}
|
||||||
|
|
||||||
void ARBDecompiler::Exit() {
|
void ARBDecompiler::Exit() {
|
||||||
if (stage != ShaderType::Fragment) {
|
if (stage != ShaderType::Fragment) {
|
||||||
AddLine("RET;");
|
AddLine("RET;");
|
||||||
|
@ -1515,11 +1551,7 @@ std::string ARBDecompiler::Assign(Operation operation) {
|
||||||
ResetTemporaries();
|
ResetTemporaries();
|
||||||
return {};
|
return {};
|
||||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||||
const std::string temporary = AllocTemporary();
|
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
|
||||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
|
|
||||||
Visit(gmem->GetBaseAddress()));
|
|
||||||
AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
|
|
||||||
temporary);
|
|
||||||
ResetTemporaries();
|
ResetTemporaries();
|
||||||
return {};
|
return {};
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
|
||||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||||
if (device.HasVertexBufferUnifiedMemory()) {
|
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,6 +139,18 @@ void oglEnable(GLenum cap, bool state) {
|
||||||
(state ? glEnable : glDisable)(cap);
|
(state ? glEnable : glDisable)(cap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
|
||||||
|
if (num_entries == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (num_entries % 2 == 1) {
|
||||||
|
pointers[num_entries] = 0;
|
||||||
|
}
|
||||||
|
const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
|
||||||
|
glProgramLocalParametersI4uivNV(target, 0, num_vectors,
|
||||||
|
reinterpret_cast<const GLuint*>(pointers));
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
|
@ -324,7 +336,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||||
auto& gpu = system.GPU().Maxwell3D();
|
auto& gpu = system.GPU().Maxwell3D();
|
||||||
std::size_t num_ssbos = 0;
|
|
||||||
u32 clip_distances = 0;
|
u32 clip_distances = 0;
|
||||||
|
|
||||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||||
|
@ -347,29 +358,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Currently this stages are not supported in the OpenGL backend.
|
// Currently this stages are not supported in the OpenGL backend.
|
||||||
// Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
|
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
|
||||||
if (program == Maxwell::ShaderProgram::TesselationControl) {
|
if (program == Maxwell::ShaderProgram::TesselationControl ||
|
||||||
continue;
|
program == Maxwell::ShaderProgram::TesselationEval) {
|
||||||
} else if (program == Maxwell::ShaderProgram::TesselationEval) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader* shader = shader_cache.GetStageProgram(program, async_shaders);
|
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
|
||||||
|
|
||||||
if (device.UseAssemblyShaders()) {
|
|
||||||
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
|
|
||||||
// all stages share the same bindings.
|
|
||||||
const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
|
|
||||||
ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
|
|
||||||
num_ssbos += num_stage_ssbos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stage indices are 0 - 5
|
|
||||||
const std::size_t stage = index == 0 ? 0 : index - 1;
|
|
||||||
SetupDrawConstBuffers(stage, shader);
|
|
||||||
SetupDrawGlobalMemory(stage, shader);
|
|
||||||
SetupDrawTextures(stage, shader);
|
|
||||||
SetupDrawImages(stage, shader);
|
|
||||||
|
|
||||||
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
||||||
switch (program) {
|
switch (program) {
|
||||||
|
@ -388,6 +383,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
shader_config.enable.Value(), shader_config.offset);
|
shader_config.enable.Value(), shader_config.offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stage indices are 0 - 5
|
||||||
|
const std::size_t stage = index == 0 ? 0 : index - 1;
|
||||||
|
SetupDrawConstBuffers(stage, shader);
|
||||||
|
SetupDrawGlobalMemory(stage, shader);
|
||||||
|
SetupDrawTextures(stage, shader);
|
||||||
|
SetupDrawImages(stage, shader);
|
||||||
|
|
||||||
// Workaround for Intel drivers.
|
// Workaround for Intel drivers.
|
||||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||||
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
||||||
|
@ -749,6 +751,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
current_cbuf = 0;
|
current_cbuf = 0;
|
||||||
|
|
||||||
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
||||||
|
program_manager.BindCompute(kernel->GetHandle());
|
||||||
|
|
||||||
SetupComputeTextures(kernel);
|
SetupComputeTextures(kernel);
|
||||||
SetupComputeImages(kernel);
|
SetupComputeImages(kernel);
|
||||||
|
|
||||||
|
@ -763,7 +767,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
buffer_cache.Unmap();
|
buffer_cache.Unmap();
|
||||||
|
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
program_manager.BindCompute(kernel->GetHandle());
|
|
||||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
}
|
}
|
||||||
|
@ -1023,41 +1026,67 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
||||||
|
static constexpr std::array TARGET_LUT = {
|
||||||
|
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||||
|
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||||
|
};
|
||||||
|
|
||||||
auto& gpu{system.GPU()};
|
auto& gpu{system.GPU()};
|
||||||
auto& memory_manager{gpu.MemoryManager()};
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
||||||
|
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||||
|
|
||||||
u32 binding =
|
std::array<GLuint64EXT, 32> pointers;
|
||||||
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
ASSERT(entries.size() < pointers.size());
|
||||||
for (const auto& entry : shader->GetEntries().global_memory_entries) {
|
|
||||||
|
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||||
|
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||||
|
for (const auto& entry : entries) {
|
||||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||||
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
|
||||||
const u32 size{memory_manager.Read<u32>(addr + 8)};
|
const u32 size{memory_manager.Read<u32>(addr + 8)};
|
||||||
SetupGlobalMemory(binding++, entry, gpu_addr, size);
|
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
if (assembly_shaders) {
|
||||||
|
UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||||
auto& gpu{system.GPU()};
|
auto& gpu{system.GPU()};
|
||||||
auto& memory_manager{gpu.MemoryManager()};
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
||||||
|
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||||
|
|
||||||
|
std::array<GLuint64EXT, 32> pointers;
|
||||||
|
ASSERT(entries.size() < pointers.size());
|
||||||
|
|
||||||
u32 binding = 0;
|
u32 binding = 0;
|
||||||
for (const auto& entry : kernel->GetEntries().global_memory_entries) {
|
for (const auto& entry : entries) {
|
||||||
const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||||
const auto gpu_addr{memory_manager.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
|
||||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
const u32 size{memory_manager.Read<u32>(addr + 8)};
|
||||||
SetupGlobalMemory(binding++, entry, gpu_addr, size);
|
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||||
GPUVAddr gpu_addr, std::size_t size) {
|
GPUVAddr gpu_addr, std::size_t size,
|
||||||
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
GLuint64EXT* pointer) {
|
||||||
|
const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
*pointer = info.address + info.offset;
|
||||||
|
} else {
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||||
|
|
|
@ -124,9 +124,9 @@ private:
|
||||||
/// Configures the current global memory entries to use for the kernel invocation.
|
/// Configures the current global memory entries to use for the kernel invocation.
|
||||||
void SetupComputeGlobalMemory(Shader* kernel);
|
void SetupComputeGlobalMemory(Shader* kernel);
|
||||||
|
|
||||||
/// Configures a constant buffer.
|
/// Configures a global memory buffer.
|
||||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||||
std::size_t size);
|
std::size_t size, GLuint64EXT* pointer);
|
||||||
|
|
||||||
/// Configures the current textures to use for the draw command.
|
/// Configures the current textures to use for the draw command.
|
||||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||||
|
|
|
@ -11,8 +11,30 @@
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
ProgramManager::ProgramManager(const Device& device) {
|
namespace {
|
||||||
use_assembly_programs = device.UseAssemblyShaders();
|
|
||||||
|
void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
|
||||||
|
if (current == old) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (current == 0) {
|
||||||
|
if (enabled) {
|
||||||
|
enabled = false;
|
||||||
|
glDisable(stage);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!enabled) {
|
||||||
|
enabled = true;
|
||||||
|
glEnable(stage);
|
||||||
|
}
|
||||||
|
glBindProgramARB(stage, current);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
ProgramManager::ProgramManager(const Device& device)
|
||||||
|
: use_assembly_programs{device.UseAssemblyShaders()} {
|
||||||
if (use_assembly_programs) {
|
if (use_assembly_programs) {
|
||||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||||
} else {
|
} else {
|
||||||
|
@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProgramManager::BindGraphicsPipeline() {
|
void ProgramManager::BindGraphicsPipeline() {
|
||||||
if (use_assembly_programs) {
|
if (!use_assembly_programs) {
|
||||||
UpdateAssemblyPrograms();
|
|
||||||
} else {
|
|
||||||
UpdateSourcePrograms();
|
UpdateSourcePrograms();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProgramManager::UpdateAssemblyPrograms() {
|
void ProgramManager::UseVertexShader(GLuint program) {
|
||||||
const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
|
if (use_assembly_programs) {
|
||||||
if (current == old) {
|
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if (current == 0) {
|
current_state.vertex = program;
|
||||||
if (enabled) {
|
|
||||||
enabled = false;
|
|
||||||
glDisable(stage);
|
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!enabled) {
|
|
||||||
enabled = true;
|
|
||||||
glEnable(stage);
|
|
||||||
}
|
|
||||||
glBindProgramARB(stage, current);
|
|
||||||
};
|
|
||||||
|
|
||||||
update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
|
void ProgramManager::UseGeometryShader(GLuint program) {
|
||||||
update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
|
if (use_assembly_programs) {
|
||||||
old_state.geometry);
|
BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
|
||||||
update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
|
}
|
||||||
old_state.fragment);
|
current_state.geometry = program;
|
||||||
|
}
|
||||||
|
|
||||||
old_state = current_state;
|
void ProgramManager::UseFragmentShader(GLuint program) {
|
||||||
|
if (use_assembly_programs) {
|
||||||
|
BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
|
||||||
|
}
|
||||||
|
current_state.fragment = program;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProgramManager::UpdateSourcePrograms() {
|
void ProgramManager::UpdateSourcePrograms() {
|
||||||
|
|
|
@ -45,17 +45,9 @@ public:
|
||||||
/// Rewinds BindHostPipeline state changes.
|
/// Rewinds BindHostPipeline state changes.
|
||||||
void RestoreGuestPipeline();
|
void RestoreGuestPipeline();
|
||||||
|
|
||||||
void UseVertexShader(GLuint program) {
|
void UseVertexShader(GLuint program);
|
||||||
current_state.vertex = program;
|
void UseGeometryShader(GLuint program);
|
||||||
}
|
void UseFragmentShader(GLuint program);
|
||||||
|
|
||||||
void UseGeometryShader(GLuint program) {
|
|
||||||
current_state.geometry = program;
|
|
||||||
}
|
|
||||||
|
|
||||||
void UseFragmentShader(GLuint program) {
|
|
||||||
current_state.fragment = program;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct PipelineState {
|
struct PipelineState {
|
||||||
|
@ -64,9 +56,6 @@ private:
|
||||||
GLuint fragment = 0;
|
GLuint fragment = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Update NV_gpu_program5 programs.
|
|
||||||
void UpdateAssemblyPrograms();
|
|
||||||
|
|
||||||
/// Update GLSL programs.
|
/// Update GLSL programs.
|
||||||
void UpdateSourcePrograms();
|
void UpdateSourcePrograms();
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver
|
||||||
mapped_ptr = static_cast<u8*>(
|
mapped_ptr = static_cast<u8*>(
|
||||||
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||||
|
|
||||||
if (device.HasVertexBufferUnifiedMemory()) {
|
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue