vk_pipeline_cache: Unify pipeline cache keys into a single operation
This allows us to call Common::CityHash and std::memcmp only once for GraphicsPipelineCacheKey. While we are at it, do the same for compute.
This commit is contained in:
parent
f665c92114
commit
8c37cd1af6
|
@ -140,6 +140,12 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
|
||||||
enable.Assign(1);
|
enable.Assign(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FixedPipelineState::Fill(const Maxwell& regs) {
|
||||||
|
rasterizer.Fill(regs);
|
||||||
|
depth_stencil.Fill(regs);
|
||||||
|
color_blending.Fill(regs);
|
||||||
|
}
|
||||||
|
|
||||||
std::size_t FixedPipelineState::Hash() const noexcept {
|
std::size_t FixedPipelineState::Hash() const noexcept {
|
||||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
||||||
return static_cast<std::size_t>(hash);
|
return static_cast<std::size_t>(hash);
|
||||||
|
@ -149,15 +155,6 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep
|
||||||
return std::memcmp(this, &rhs, sizeof *this) == 0;
|
return std::memcmp(this, &rhs, sizeof *this) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
|
|
||||||
FixedPipelineState fixed_state;
|
|
||||||
fixed_state.rasterizer.Fill(regs);
|
|
||||||
fixed_state.depth_stencil.Fill(regs);
|
|
||||||
fixed_state.color_blending.Fill(regs);
|
|
||||||
fixed_state.padding = {};
|
|
||||||
return fixed_state;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
|
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
|
||||||
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
|
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
|
||||||
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
|
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
|
||||||
|
|
|
@ -17,7 +17,7 @@ namespace Vulkan {
|
||||||
|
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
struct alignas(32) FixedPipelineState {
|
struct FixedPipelineState {
|
||||||
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
|
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
|
||||||
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
|
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
|
||||||
|
|
||||||
|
@ -237,7 +237,8 @@ struct alignas(32) FixedPipelineState {
|
||||||
Rasterizer rasterizer;
|
Rasterizer rasterizer;
|
||||||
DepthStencil depth_stencil;
|
DepthStencil depth_stencil;
|
||||||
ColorBlending color_blending;
|
ColorBlending color_blending;
|
||||||
std::array<u8, 20> padding;
|
|
||||||
|
void Fill(const Maxwell& regs);
|
||||||
|
|
||||||
std::size_t Hash() const noexcept;
|
std::size_t Hash() const noexcept;
|
||||||
|
|
||||||
|
@ -250,9 +251,6 @@ struct alignas(32) FixedPipelineState {
|
||||||
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
|
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
|
||||||
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
|
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
|
||||||
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
|
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
|
||||||
static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned");
|
|
||||||
|
|
||||||
FixedPipelineState GetFixedPipelineState(const Maxwell& regs);
|
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
||||||
|
|
|
@ -161,6 +161,24 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
|
||||||
|
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
||||||
|
return static_cast<std::size_t>(hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||||
|
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t ComputePipelineCacheKey::Hash() const noexcept {
|
||||||
|
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
||||||
|
return static_cast<std::size_t>(hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
|
||||||
|
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
|
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
|
||||||
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
|
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
|
||||||
u32 main_offset)
|
u32 main_offset)
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <tuple>
|
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -51,42 +50,38 @@ using ProgramCode = std::vector<u64>;
|
||||||
|
|
||||||
struct GraphicsPipelineCacheKey {
|
struct GraphicsPipelineCacheKey {
|
||||||
FixedPipelineState fixed_state;
|
FixedPipelineState fixed_state;
|
||||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
|
|
||||||
RenderPassParams renderpass_params;
|
RenderPassParams renderpass_params;
|
||||||
|
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
|
||||||
|
u64 padding; // This is necessary for unique object representations
|
||||||
|
|
||||||
std::size_t Hash() const noexcept {
|
std::size_t Hash() const noexcept;
|
||||||
std::size_t hash = fixed_state.Hash();
|
|
||||||
for (const auto& shader : shaders) {
|
|
||||||
boost::hash_combine(hash, shader);
|
|
||||||
}
|
|
||||||
boost::hash_combine(hash, renderpass_params.Hash());
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
|
||||||
return std::tie(fixed_state, shaders, renderpass_params) ==
|
|
||||||
std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
|
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||||
|
return !operator==(rhs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
|
||||||
|
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
|
||||||
|
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
|
||||||
|
|
||||||
struct ComputePipelineCacheKey {
|
struct ComputePipelineCacheKey {
|
||||||
GPUVAddr shader{};
|
GPUVAddr shader;
|
||||||
u32 shared_memory_size{};
|
u32 shared_memory_size;
|
||||||
std::array<u32, 3> workgroup_size{};
|
std::array<u32, 3> workgroup_size;
|
||||||
|
|
||||||
std::size_t Hash() const noexcept {
|
std::size_t Hash() const noexcept;
|
||||||
return static_cast<std::size_t>(shader) ^
|
|
||||||
((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
|
|
||||||
static_cast<std::size_t>(workgroup_size[0]) ^
|
|
||||||
(static_cast<std::size_t>(workgroup_size[1]) << 16) ^
|
|
||||||
(static_cast<std::size_t>(workgroup_size[2]) << 24);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
|
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
|
||||||
return std::tie(shader, shared_memory_size, workgroup_size) ==
|
|
||||||
std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
|
bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
|
||||||
|
return !operator==(rhs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
|
||||||
|
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
|
||||||
|
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
||||||
|
|
|
@ -316,7 +316,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||||
query_cache.UpdateCounters();
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
|
GraphicsPipelineCacheKey key;
|
||||||
|
key.fixed_state.Fill(gpu.regs);
|
||||||
|
|
||||||
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
|
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
|
||||||
|
|
||||||
|
@ -334,10 +335,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||||
|
|
||||||
buffer_cache.Unmap();
|
buffer_cache.Unmap();
|
||||||
|
|
||||||
const auto texceptions = UpdateAttachments();
|
const Texceptions texceptions = UpdateAttachments();
|
||||||
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
|
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
|
||||||
|
|
||||||
key.renderpass_params = GetRenderPassParams(texceptions);
|
key.renderpass_params = GetRenderPassParams(texceptions);
|
||||||
|
key.padding = 0;
|
||||||
|
|
||||||
auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
|
auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
|
||||||
scheduler.BindGraphicsPipeline(pipeline.GetHandle());
|
scheduler.BindGraphicsPipeline(pipeline.GetHandle());
|
||||||
|
@ -453,10 +455,12 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||||
query_cache.UpdateCounters();
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
const ComputePipelineCacheKey key{
|
ComputePipelineCacheKey key;
|
||||||
code_addr,
|
key.shader = code_addr;
|
||||||
launch_desc.shared_alloc,
|
key.shared_memory_size = launch_desc.shared_alloc;
|
||||||
{launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
|
key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
|
||||||
|
launch_desc.block_dim_z};
|
||||||
|
|
||||||
auto& pipeline = pipeline_cache.GetComputePipeline(key);
|
auto& pipeline = pipeline_cache.GetComputePipeline(key);
|
||||||
|
|
||||||
// Compute dispatches can't be executed inside a renderpass
|
// Compute dispatches can't be executed inside a renderpass
|
||||||
|
|
Loading…
Reference in a new issue