graphics_pipeline: Move some depth configuration to dynamic state (#931)

* graphics_pipeline: More proper masking

* pipeline_cache: Skip setting depth/stencil fields when test is disabled

* pipeline_cache: More fixes to depth stencil state

* vk_rasterizer: Use dynamic state for depth bounds and bias

* pipeline_cache: Missed depth bias enable

* vk_rasterizer: Add stencil dynamic states

* thread: Reduce spammy log

* Remove some leftover state

* pipeline_cache: Allocate pipelines from pools

* vk_graphics_pipeline: Remove bindings member

Saves about 1KB from each pipeline
This commit is contained in:
TheTurtle 2024-09-15 23:42:14 +03:00 committed by GitHub
parent c8d0d5637a
commit b09b28c7f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 104 additions and 87 deletions

View file

@ -654,7 +654,7 @@ int PS4_SYSV_ABI scePthreadCondInit(ScePthreadCond* cond, const ScePthreadCondat
int result = pthread_cond_init(&(*cond)->cond, &(*attr)->cond_attr);
if (name != nullptr) {
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*cond)->name, result);
LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*cond)->name, result);
}
switch (result) {

View file

@ -473,6 +473,11 @@ struct Liverpool {
CullMode CullingMode() const {
return static_cast<CullMode>(cull_front | cull_back << 1);
}
bool NeedsBias() const {
return enable_polygon_offset_back || enable_polygon_offset_front ||
enable_polygon_offset_para;
}
};
union VsOutputConfig {
@ -506,6 +511,11 @@ struct Liverpool {
u32 GetMask(int buf_id) const {
return (raw >> (buf_id * 4)) & 0xfu;
}
void SetMask(int buf_id, u32 mask) {
raw &= ~(0xf << (buf_id * 4));
raw |= (mask << (buf_id * 4));
}
};
struct IndexBufferBase {

View file

@ -95,9 +95,6 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
? vk::FrontFace::eClockwise
: vk::FrontFace::eCounterClockwise,
.depthBiasEnable = bool(key.depth_bias_enable),
.depthBiasConstantFactor = key.depth_bias_const_factor,
.depthBiasClamp = key.depth_bias_clamp,
.depthBiasSlopeFactor = key.depth_bias_slope_factor,
.lineWidth = 1.0f,
};
@ -134,9 +131,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
};
boost::container::static_vector<vk::DynamicState, 14> dynamic_states = {
vk::DynamicState::eViewport,
vk::DynamicState::eScissor,
vk::DynamicState::eBlendConstants,
vk::DynamicState::eViewport, vk::DynamicState::eScissor,
vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthBounds,
vk::DynamicState::eDepthBias, vk::DynamicState::eStencilReference,
vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask,
};
if (instance.IsColorWriteEnableSupported()) {
@ -153,39 +151,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
};
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
.depthTestEnable = key.depth.depth_enable,
.depthWriteEnable = key.depth.depth_write_enable,
.depthCompareOp = LiverpoolToVK::CompareOp(key.depth.depth_func),
.depthBoundsTestEnable = key.depth.depth_bounds_enable,
.stencilTestEnable = key.depth.stencil_enable,
.depthTestEnable = key.depth_stencil.depth_enable,
.depthWriteEnable = key.depth_stencil.depth_write_enable,
.depthCompareOp = LiverpoolToVK::CompareOp(key.depth_stencil.depth_func),
.depthBoundsTestEnable = key.depth_stencil.depth_bounds_enable,
.stencilTestEnable = key.depth_stencil.stencil_enable,
.front{
.failOp = LiverpoolToVK::StencilOp(key.stencil.stencil_fail_front),
.passOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zpass_front),
.depthFailOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zfail_front),
.compareOp = LiverpoolToVK::CompareOp(key.depth.stencil_ref_func),
.compareMask = key.stencil_ref_front.stencil_mask,
.writeMask = key.stencil_ref_front.stencil_write_mask,
.reference = key.stencil_ref_front.stencil_test_val,
.compareOp = LiverpoolToVK::CompareOp(key.depth_stencil.stencil_ref_func),
},
.back{
.failOp = LiverpoolToVK::StencilOp(key.depth.backface_enable
.failOp = LiverpoolToVK::StencilOp(key.depth_stencil.backface_enable
? key.stencil.stencil_fail_back.Value()
: key.stencil.stencil_fail_front.Value()),
.passOp = LiverpoolToVK::StencilOp(key.depth.backface_enable
.passOp = LiverpoolToVK::StencilOp(key.depth_stencil.backface_enable
? key.stencil.stencil_zpass_back.Value()
: key.stencil.stencil_zpass_front.Value()),
.depthFailOp = LiverpoolToVK::StencilOp(key.depth.backface_enable
.depthFailOp = LiverpoolToVK::StencilOp(key.depth_stencil.backface_enable
? key.stencil.stencil_zfail_back.Value()
: key.stencil.stencil_zfail_front.Value()),
.compareOp = LiverpoolToVK::CompareOp(key.depth.backface_enable
? key.depth.stencil_bf_func.Value()
: key.depth.stencil_ref_func.Value()),
.compareMask = key.stencil_ref_back.stencil_mask,
.writeMask = key.stencil_ref_back.stencil_write_mask,
.reference = key.stencil_ref_back.stencil_test_val,
.compareOp = LiverpoolToVK::CompareOp(key.depth_stencil.backface_enable
? key.depth_stencil.stencil_bf_func.Value()
: key.depth_stencil.stencil_ref_func.Value()),
},
.minDepthBounds = key.depth_bounds_min,
.maxDepthBounds = key.depth_bounds_max,
};
auto stage = u32(Shader::Stage::Vertex);
@ -302,6 +292,7 @@ GraphicsPipeline::~GraphicsPipeline() = default;
void GraphicsPipeline::BuildDescSetLayout() {
u32 binding{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
for (const auto* stage : stages) {
if (!stage) {
continue;

View file

@ -30,17 +30,10 @@ struct GraphicsPipelineKey {
vk::Format depth_format;
vk::Format stencil_format;
Liverpool::DepthControl depth;
float depth_bounds_min;
float depth_bounds_max;
float depth_bias_const_factor;
float depth_bias_slope_factor;
float depth_bias_clamp;
Liverpool::DepthControl depth_stencil;
u32 depth_bias_enable;
u32 num_samples = 1;
u32 num_samples;
Liverpool::StencilControl stencil;
Liverpool::StencilRefMask stencil_ref_front;
Liverpool::StencilRefMask stencil_ref_back;
Liverpool::PrimitiveType prim_type;
u32 enable_primitive_restart;
u32 primitive_restart_index;
@ -48,7 +41,7 @@ struct GraphicsPipelineKey {
Liverpool::CullMode cull_mode;
Liverpool::FrontFace front_face;
Liverpool::ClipSpace clip_space;
Liverpool::ColorBufferMask cb_shader_mask{};
Liverpool::ColorBufferMask cb_shader_mask;
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
@ -91,7 +84,7 @@ public:
}
bool IsDepthEnabled() const {
return key.depth.depth_enable.Value();
return key.depth_stencil.depth_enable.Value();
}
private:
@ -107,7 +100,6 @@ private:
std::array<const Shader::Info*, MaxShaderStages> stages{};
GraphicsPipelineKey key;
bool uses_push_descriptors{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
};
} // namespace Vulkan

View file

@ -167,11 +167,10 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
}
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) {
it.value() = std::make_unique<GraphicsPipeline>(
instance, scheduler, desc_heap, graphics_key, *pipeline_cache, infos, modules);
it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key,
*pipeline_cache, infos, modules);
}
const GraphicsPipeline* pipeline = it->second.get();
return pipeline;
return it->second;
}
const ComputePipeline* PipelineCache::GetComputePipeline() {
@ -180,11 +179,10 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
}
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
if (is_new) {
it.value() = std::make_unique<ComputePipeline>(
instance, scheduler, desc_heap, *pipeline_cache, compute_key, *infos[0], modules[0]);
it.value() = compute_pipeline_pool.Create(instance, scheduler, desc_heap, *pipeline_cache,
compute_key, *infos[0], modules[0]);
}
const ComputePipeline* pipeline = it->second.get();
return pipeline;
return it->second;
}
bool ShouldSkipShader(u64 shader_hash, const char* shader_type) {
@ -197,28 +195,36 @@ bool ShouldSkipShader(u64 shader_hash, const char* shader_type) {
}
bool PipelineCache::RefreshGraphicsKey() {
std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey));
auto& regs = liverpool->regs;
auto& key = graphics_key;
key.depth = regs.depth_control;
key.depth.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() &&
!regs.depth_render_control.depth_clear_enable);
key.depth_bounds_min = regs.depth_bounds_min;
key.depth_bounds_max = regs.depth_bounds_max;
key.depth_bias_enable = regs.polygon_control.enable_polygon_offset_back ||
regs.polygon_control.enable_polygon_offset_front ||
regs.polygon_control.enable_polygon_offset_para;
if (regs.polygon_control.enable_polygon_offset_front) {
key.depth_bias_const_factor = regs.poly_offset.front_offset;
key.depth_bias_slope_factor = regs.poly_offset.front_scale;
key.depth_stencil = regs.depth_control;
key.depth_stencil.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() &&
!regs.depth_render_control.depth_clear_enable);
key.depth_bias_enable = regs.polygon_control.NeedsBias();
const auto& db = regs.depth_buffer;
const auto ds_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
if (db.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid) {
key.depth_format = ds_format;
} else {
key.depth_bias_const_factor = regs.poly_offset.back_offset;
key.depth_bias_slope_factor = regs.poly_offset.back_scale;
key.depth_format = vk::Format::eUndefined;
}
if (regs.depth_control.depth_enable) {
key.depth_stencil.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
}
key.depth_bias_clamp = regs.poly_offset.depth_bias;
key.stencil = regs.stencil_control;
key.stencil_ref_front = regs.stencil_ref_front;
key.stencil_ref_back = regs.stencil_ref_back;
if (db.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid) {
key.stencil_format = key.depth_format;
} else {
key.stencil_format = vk::Format::eUndefined;
}
if (key.depth_stencil.stencil_enable) {
key.depth_stencil.stencil_enable.Assign(key.stencil_format != vk::Format::eUndefined);
}
key.prim_type = regs.primitive_type;
key.enable_primitive_restart = regs.enable_primitive_restart & 1;
key.primitive_restart_index = regs.primitive_restart_index;
@ -228,27 +234,6 @@ bool PipelineCache::RefreshGraphicsKey() {
key.front_face = regs.polygon_control.front_face;
key.num_samples = regs.aa_config.NumSamples();
const auto& db = regs.depth_buffer;
const auto ds_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
if (db.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid) {
key.depth_format = ds_format;
} else {
key.depth_format = vk::Format::eUndefined;
}
if (key.depth.depth_enable) {
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
}
if (db.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid) {
key.stencil_format = key.depth_format;
} else {
key.stencil_format = vk::Format::eUndefined;
}
if (key.depth.stencil_enable) {
key.depth.stencil_enable.Assign(key.stencil_format != vk::Format::eUndefined);
}
const auto skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
@ -277,7 +262,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
!col_buf.info.blend_bypass);
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
key.cb_shader_mask = regs.color_shader_mask;
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
++remapped_cb;
}

View file

@ -74,8 +74,10 @@ private:
Shader::Pools pools;
tsl::robin_map<size_t, Program*> program_cache;
Common::ObjectPool<Program> program_pool;
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
Common::ObjectPool<GraphicsPipeline> graphics_pipeline_pool;
Common::ObjectPool<ComputePipeline> compute_pipeline_pool;
tsl::robin_map<size_t, ComputePipeline*> compute_pipelines;
tsl::robin_map<GraphicsPipelineKey, GraphicsPipeline*> graphics_pipelines;
std::array<const Shader::Info*, MaxShaderStages> infos{};
std::array<vk::ShaderModule, MaxShaderStages> modules{};
GraphicsPipelineKey graphics_key{};

View file

@ -297,6 +297,43 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) {
cmdbuf.setColorWriteEnableEXT(write_ens);
cmdbuf.setColorWriteMaskEXT(0, write_masks);
}
if (regs.depth_control.depth_bounds_enable) {
cmdbuf.setDepthBounds(regs.depth_bounds_min, regs.depth_bounds_max);
}
if (regs.polygon_control.NeedsBias()) {
if (regs.polygon_control.enable_polygon_offset_front) {
cmdbuf.setDepthBias(regs.poly_offset.front_offset, regs.poly_offset.depth_bias,
regs.poly_offset.front_scale);
} else {
cmdbuf.setDepthBias(regs.poly_offset.back_offset, regs.poly_offset.depth_bias,
regs.poly_offset.back_scale);
}
}
if (regs.depth_control.stencil_enable) {
const auto front = regs.stencil_ref_front;
const auto back = regs.stencil_ref_back;
if (front.stencil_test_val == back.stencil_test_val) {
cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
front.stencil_test_val);
} else {
cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front.stencil_test_val);
cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back.stencil_test_val);
}
if (front.stencil_write_mask == back.stencil_write_mask) {
cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
front.stencil_write_mask);
} else {
cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front.stencil_write_mask);
cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back.stencil_write_mask);
}
if (front.stencil_mask == back.stencil_mask) {
cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
front.stencil_mask);
} else {
cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front.stencil_mask);
cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back.stencil_mask);
}
}
}
void Rasterizer::UpdateViewportScissorState() {