diff --git a/src/xrt/compositor/CMakeLists.txt b/src/xrt/compositor/CMakeLists.txt index 5c81f1cdb..df10d350a 100644 --- a/src/xrt/compositor/CMakeLists.txt +++ b/src/xrt/compositor/CMakeLists.txt @@ -120,6 +120,7 @@ if(XRT_HAVE_VULKAN) set(SHADERS shaders/clear.comp shaders/distortion.comp + shaders/layer.comp shaders/mesh.frag shaders/mesh.vert shaders/layer.frag diff --git a/src/xrt/compositor/render/render_compute.c b/src/xrt/compositor/render/render_compute.c index 22c9fac06..17a0b423a 100644 --- a/src/xrt/compositor/render/render_compute.c +++ b/src/xrt/compositor/render/render_compute.c @@ -85,6 +85,74 @@ calc_dispatch_dims(const struct render_viewport_data views[2], uint32_t *out_w, * */ +XRT_MAYBE_UNUSED static void +update_compute_layer_descriptor_set(struct vk_bundle *vk, + uint32_t src_binding, + VkSampler src_samplers[COMP_MAX_IMAGES], + VkImageView src_image_views[COMP_MAX_IMAGES], + uint32_t image_count, + uint32_t target_binding, + VkImageView target_image_view, + uint32_t ubo_binding, + VkBuffer ubo_buffer, + VkDeviceSize ubo_size, + VkDescriptorSet descriptor_set) +{ + assert(image_count <= COMP_MAX_IMAGES); + + VkDescriptorImageInfo src_image_info[COMP_MAX_IMAGES]; + for (uint32_t i = 0; i < image_count; i++) { + src_image_info[i].sampler = src_samplers[i]; + src_image_info[i].imageView = src_image_views[i]; + src_image_info[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + + VkDescriptorImageInfo target_image_info = { + .imageView = target_image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + VkDescriptorBufferInfo buffer_info = { + .buffer = ubo_buffer, + .offset = 0, + .range = ubo_size, + }; + + VkWriteDescriptorSet write_descriptor_sets[3] = { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptor_set, + .dstBinding = src_binding, + .descriptorCount = image_count, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = src_image_info, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptor_set, + .dstBinding = target_binding, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &target_image_info, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptor_set, + .dstBinding = ubo_binding, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .pBufferInfo = &buffer_info, + }, + }; + + vk->vkUpdateDescriptorSets( // + vk->device, // + ARRAY_SIZE(write_descriptor_sets), // descriptorWriteCount + write_descriptor_sets, // pDescriptorWrites + 0, // descriptorCopyCount + NULL); // pDescriptorCopies +} + XRT_MAYBE_UNUSED static void update_compute_distortion_descriptor_set(struct vk_bundle *vk, uint32_t src_binding, @@ -262,6 +330,12 @@ render_compute_init(struct render_compute *crc, struct render_resources *r) struct vk_bundle *vk = r->vk; crc->r = r; + C(vk_create_descriptor_set( // + vk, // + r->compute.descriptor_pool, // descriptor_pool + r->compute.layer.descriptor_set_layout, // descriptor_set_layout + &crc->descriptor_set)); // descriptor_set + C(vk_create_descriptor_set( // vk, // r->compute.descriptor_pool, // descriptor_pool @@ -326,6 +400,7 @@ render_compute_close(struct render_compute *crc) struct vk_bundle *vk = vk_from_crc(crc); // Reclaimed by vkResetDescriptorPool. + crc->descriptor_set = VK_NULL_HANDLE; crc->distortion_descriptor_set = VK_NULL_HANDLE; vk->vkResetDescriptorPool(vk->device, crc->r->compute.descriptor_pool, 0); @@ -333,6 +408,110 @@ render_compute_close(struct render_compute *crc) crc->r = NULL; } +void +render_compute_layers(struct render_compute *crc, + VkSampler src_samplers[COMP_MAX_IMAGES], + VkImageView src_image_views[COMP_MAX_IMAGES], + uint32_t image_count, + VkImage target_image, + VkImageView target_image_view, + VkImageLayout transition_to, + bool timewarp) +{ + assert(crc->r != NULL); + + struct vk_bundle *vk = vk_from_crc(crc); + struct render_resources *r = crc->r; + + struct render_compute_layer_ubo_data *ubo_data = + (struct render_compute_layer_ubo_data *)crc->r->compute.layer.ubo.mapped; + + /* + * Source, target and distortion images. + */ + + VkImageSubresourceRange subresource_range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + vk_cmd_image_barrier_gpu_locked( // + vk, // + r->cmd, // + target_image, // + 0, // + VK_ACCESS_SHADER_WRITE_BIT, // + VK_IMAGE_LAYOUT_UNDEFINED, // + VK_IMAGE_LAYOUT_GENERAL, // + subresource_range); // + + update_compute_layer_descriptor_set( // + vk, // + r->compute.src_binding, // + src_samplers, // + src_image_views, // + image_count, // + r->compute.target_binding, // + target_image_view, // + r->compute.ubo_binding, // + r->compute.layer.ubo.buffer, // + VK_WHOLE_SIZE, // + crc->descriptor_set); // + + vk->vkCmdBindPipeline( // + crc->r->cmd, // commandBuffer + VK_PIPELINE_BIND_POINT_COMPUTE, // pipelineBindPoint + timewarp ? r->compute.layer.timewarp_pipeline : r->compute.layer.non_timewarp_pipeline); // pipeline + + vk->vkCmdBindDescriptorSets( // + r->cmd, // commandBuffer + VK_PIPELINE_BIND_POINT_COMPUTE, // pipelineBindPoint + r->compute.layer.pipeline_layout, // layout + 0, // firstSet + 1, // descriptorSetCount + &crc->descriptor_set, // pDescriptorSets + 0, // dynamicOffsetCount + NULL); // pDynamicOffsets + + + uint32_t w = 0, h = 0; + calc_dispatch_dims(ubo_data->views, &w, &h); + assert(w != 0 && h != 0); + + vk->vkCmdDispatch( // + r->cmd, // commandBuffer + w, // groupCountX + h, // groupCountY + 2); // groupCountZ + + VkImageMemoryBarrier memoryBarrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = transition_to, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = target_image, + .subresourceRange = subresource_range, + }; + + vk->vkCmdPipelineBarrier( // + r->cmd, // + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, // + 0, // + 0, // + NULL, // + 0, // + NULL, // + 1, // + &memoryBarrier); // +} + void render_compute_projection_timewarp(struct render_compute *crc, VkSampler src_samplers[2], diff --git a/src/xrt/compositor/render/render_interface.h b/src/xrt/compositor/render/render_interface.h index 789239934..b3c6562b7 100644 --- a/src/xrt/compositor/render/render_interface.h +++ b/src/xrt/compositor/render/render_interface.h @@ -11,6 +11,9 @@ #pragma once #define COMP_MAX_LAYERS 16 +#define COMP_VIEWS_PER_LAYER 2 +#define COMP_MAX_IMAGES 32 + #include "xrt/xrt_compiler.h" #include "xrt/xrt_defines.h" @@ -76,6 +79,7 @@ render_calc_time_warp_matrix(const struct xrt_pose *src_pose, struct render_shaders { VkShaderModule clear_comp; + VkShaderModule layer_comp; VkShaderModule distortion_comp; VkShaderModule mesh_vert; @@ -308,6 +312,27 @@ struct render_resources //! Default sampler for null images. VkSampler default_sampler; + struct + { + //! Descriptor set layout for compute. + VkDescriptorSetLayout descriptor_set_layout; + + //! Pipeline layout used for compute distortion. + VkPipelineLayout pipeline_layout; + + //! Doesn't depend on target so is static. + VkPipeline non_timewarp_pipeline; + + //! Doesn't depend on target so is static. + VkPipeline timewarp_pipeline; + + //! Size of combined image sampler array + uint32_t image_array_size; + + //! Target info. + struct render_buffer ubo; + } layer; + struct { //! Descriptor set layout for compute distortion. @@ -333,6 +358,8 @@ struct render_resources //! Target info. struct render_buffer ubo; + + //! @todo other resources } clear; } compute; @@ -673,9 +700,73 @@ struct render_compute struct render_resources *r; //! Shared descriptor set between clear, projection and timewarp. + VkDescriptorSet descriptor_set; + + //! Descriptor set for distortion. VkDescriptorSet distortion_descriptor_set; }; +/*! + * UBO data that is sent to the compute layer shaders. + * + * Used in @ref render_compute + */ +struct render_compute_layer_ubo_data +{ + struct render_viewport_data views[2]; + struct xrt_normalized_rect pre_transforms[2]; + struct xrt_normalized_rect post_transforms[COMP_MAX_LAYERS * COMP_VIEWS_PER_LAYER]; + + //! std140 uvec2, corresponds to enum xrt_layer_type and unpremultiplied alpha. + struct + { + uint32_t val; + uint32_t unpremultiplied; + uint32_t padding[2]; + } layer_type[COMP_MAX_LAYERS]; + + //! Which image/sampler(s) correspond to each layer. + struct + { + uint32_t images[2]; + //! @todo Implement separated samplers and images (and change to samplers[2]) + uint32_t padding[2]; + } images_samplers[COMP_MAX_LAYERS * 2]; + + + /*! + * For projection layers + */ + + //! Timewarp matrices + struct xrt_matrix_4x4 transforms[COMP_MAX_LAYERS * COMP_VIEWS_PER_LAYER]; + + + /*! + * For quad layers + */ + + //! All quad transforms and coordinates are in view space + struct + { + struct xrt_vec3 val; + float padding; + } quad_position[COMP_MAX_LAYERS * 2]; + struct + { + struct xrt_vec3 val; + float padding; + } quad_normal[COMP_MAX_LAYERS * 2]; + struct xrt_matrix_4x4 inverse_quad_transform[COMP_MAX_LAYERS * 2]; + + //! Quad extent in world scale + struct + { + struct xrt_vec2 val; + float padding[2]; + } quad_extent[COMP_MAX_LAYERS]; +}; + /*! * UBO data that is sent to the compute distortion shaders. * @@ -723,6 +814,19 @@ render_compute_begin(struct render_compute *crc); bool render_compute_end(struct render_compute *crc); +/*! + * @public @memberof render_compute + */ +void +render_compute_layers(struct render_compute *crc, // + VkSampler src_samplers[COMP_MAX_IMAGES], // + VkImageView src_image_views[COMP_MAX_IMAGES], // + uint32_t image_count, // + VkImage target_image, // + VkImageView target_image_view, // + VkImageLayout transition_to, // + bool timewarp); // + /*! * @public @memberof render_compute */ diff --git a/src/xrt/compositor/render/render_resources.c b/src/xrt/compositor/render/render_resources.c index d8b8e92d9..63b074fad 100644 --- a/src/xrt/compositor/render/render_resources.c +++ b/src/xrt/compositor/render/render_resources.c @@ -181,6 +181,59 @@ init_mesh_ubo_buffers(struct vk_bundle *vk, struct render_buffer *l_ubo, struct * */ +static VkResult +create_compute_layer_descriptor_set_layout(struct vk_bundle *vk, + uint32_t src_binding, + uint32_t target_binding, + uint32_t ubo_binding, + uint32_t source_images_count, + VkDescriptorSetLayout *out_descriptor_set_layout) +{ + VkResult ret; + + VkDescriptorSetLayoutBinding set_layout_bindings[3] = { + { + .binding = src_binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = source_images_count, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .binding = target_binding, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .binding = ubo_binding, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + VkDescriptorSetLayoutCreateInfo set_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = ARRAY_SIZE(set_layout_bindings), + .pBindings = set_layout_bindings, + }; + + VkDescriptorSetLayout descriptor_set_layout = VK_NULL_HANDLE; + ret = vk->vkCreateDescriptorSetLayout( // + vk->device, // + &set_layout_info, // + NULL, // + &descriptor_set_layout); // + if (ret != VK_SUCCESS) { + VK_ERROR(vk, "vkCreateDescriptorSetLayout failed: %s", vk_result_string(ret)); + return ret; + } + + *out_descriptor_set_layout = descriptor_set_layout; + + return VK_SUCCESS; +} + static VkResult create_compute_distortion_descriptor_set_layout(struct vk_bundle *vk, uint32_t src_binding, @@ -240,12 +293,61 @@ create_compute_distortion_descriptor_set_layout(struct vk_bundle *vk, return VK_SUCCESS; } +struct compute_layer_params +{ + VkBool32 do_timewarp; + VkBool32 do_color_correction; + uint32_t max_layers; + uint32_t views_per_layer; + uint32_t image_array_size; +}; + struct compute_distortion_params { uint32_t distortion_texel_count; VkBool32 do_timewarp; }; +static VkResult +create_compute_layer_pipeline(struct vk_bundle *vk, + VkPipelineCache pipeline_cache, + VkShaderModule shader, + VkPipelineLayout pipeline_layout, + const struct compute_layer_params *params, + VkPipeline *out_compute_pipeline) +{ +#define ENTRY(ID, FIELD) \ + { \ + .constantID = ID, \ + .offset = offsetof(struct compute_layer_params, FIELD), \ + sizeof(params->FIELD), \ + } + + VkSpecializationMapEntry entries[] = { + ENTRY(1, do_timewarp), // + ENTRY(2, do_color_correction), // + ENTRY(3, max_layers), // + ENTRY(4, views_per_layer), // + ENTRY(5, image_array_size), // + }; +#undef ENTRY + + VkSpecializationInfo specialization_info = { + .mapEntryCount = ARRAY_SIZE(entries), + .pMapEntries = entries, + .dataSize = sizeof(*params), + .pData = params, + }; + + return vk_create_compute_pipeline( // + vk, // vk_bundle + pipeline_cache, // pipeline_cache + shader, // shader + pipeline_layout, // pipeline_layout + &specialization_info, // specialization_info + out_compute_pipeline); // out_compute_pipeline +} + static VkResult create_compute_distortion_pipeline(struct vk_bundle *vk, VkPipelineCache pipeline_cache, @@ -705,6 +807,11 @@ render_resources_init(struct render_resources *r, r->compute.target_binding = 2; r->compute.ubo_binding = 3; + r->compute.layer.image_array_size = vk->features.max_per_stage_descriptor_sampled_images; + if (r->compute.layer.image_array_size > COMP_MAX_IMAGES) { + r->compute.layer.image_array_size = COMP_MAX_IMAGES; + } + /* * Mock, used as a default image empty image. @@ -841,12 +948,14 @@ render_resources_init(struct render_resources *r, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // clamp_mode &r->compute.default_sampler)); // out_sampler + struct vk_descriptor_pool_info compute_pool_info = { .uniform_per_descriptor_count = 1, - .sampler_per_descriptor_count = 8, + // layer images + .sampler_per_descriptor_count = r->compute.layer.image_array_size + 6, .storage_image_per_descriptor_count = 1, .storage_buffer_per_descriptor_count = 0, - .descriptor_count = 1, + .descriptor_count = 2, .freeable = false, }; @@ -856,6 +965,68 @@ render_resources_init(struct render_resources *r, &r->compute.descriptor_pool)); // out_descriptor_pool + /* + * Layer pipeline + */ + + C(create_compute_layer_descriptor_set_layout( // + vk, // vk_bundle + r->compute.src_binding, // src_binding, + r->compute.target_binding, // target_binding, + r->compute.ubo_binding, // ubo_binding, + r->compute.layer.image_array_size, // source_images_count, + &r->compute.layer.descriptor_set_layout)); // out_descriptor_set_layout + + C(vk_create_pipeline_layout( // + vk, // vk_bundle + r->compute.layer.descriptor_set_layout, // descriptor_set_layout + &r->compute.layer.pipeline_layout)); // out_pipeline_layout + + struct compute_layer_params layer_params = { + .do_timewarp = false, + .do_color_correction = true, + .max_layers = COMP_MAX_LAYERS, + .views_per_layer = COMP_VIEWS_PER_LAYER, + .image_array_size = r->compute.layer.image_array_size, + }; + + C(create_compute_layer_pipeline( // + vk, // vk_bundle + r->pipeline_cache, // pipeline_cache + r->shaders->layer_comp, // shader + r->compute.layer.pipeline_layout, // pipeline_layout + &layer_params, // params + &r->compute.layer.non_timewarp_pipeline)); // out_compute_pipeline + + struct compute_layer_params layer_timewarp_params = { + .do_timewarp = true, + .do_color_correction = true, + .max_layers = COMP_MAX_LAYERS, + .views_per_layer = COMP_VIEWS_PER_LAYER, + .image_array_size = r->compute.layer.image_array_size, + }; + + C(create_compute_layer_pipeline( // + vk, // vk_bundle + r->pipeline_cache, // pipeline_cache + r->shaders->layer_comp, // shader + r->compute.layer.pipeline_layout, // pipeline_layout + &layer_timewarp_params, // params + &r->compute.layer.timewarp_pipeline)); // out_compute_pipeline + + size_t layer_ubo_size = sizeof(struct render_compute_layer_ubo_data); + + C(render_buffer_init( // + vk, // vk_bundle + &r->compute.layer.ubo, // buffer + ubo_usage_flags, // usage_flags + memory_property_flags, // memory_property_flags + layer_ubo_size)); // size + C(render_buffer_map( // + vk, // vk_bundle + &r->compute.layer.ubo)); // buffer + + /* * Distortion pipeline */ @@ -1108,6 +1279,12 @@ render_resources_close(struct render_resources *r) render_buffer_close(vk, &r->mesh.ubos[1]); D(DescriptorPool, r->compute.descriptor_pool); + + D(DescriptorSetLayout, r->compute.layer.descriptor_set_layout); + D(Pipeline, r->compute.layer.non_timewarp_pipeline); + D(Pipeline, r->compute.layer.timewarp_pipeline); + D(PipelineLayout, r->compute.layer.pipeline_layout); + D(DescriptorSetLayout, r->compute.distortion.descriptor_set_layout); D(Pipeline, r->compute.distortion.pipeline); D(Pipeline, r->compute.distortion.timewarp_pipeline); @@ -1119,6 +1296,7 @@ render_resources_close(struct render_resources *r) render_distortion_buffer_close(r); render_buffer_close(vk, &r->compute.clear.ubo); + render_buffer_close(vk, &r->compute.layer.ubo); render_buffer_close(vk, &r->compute.distortion.ubo); teardown_scratch_image(r); diff --git a/src/xrt/compositor/render/render_shaders.c b/src/xrt/compositor/render/render_shaders.c index f1d9f7dfb..22b1ddbc3 100644 --- a/src/xrt/compositor/render/render_shaders.c +++ b/src/xrt/compositor/render/render_shaders.c @@ -24,6 +24,7 @@ #include "xrt/xrt_config_build.h" #include "shaders/clear.comp.h" +#include "shaders/layer.comp.h" #include "shaders/distortion.comp.h" #include "shaders/layer.frag.h" #include "shaders/layer.vert.h" @@ -93,6 +94,11 @@ render_shaders_load(struct render_shaders *s, struct vk_bundle *vk) sizeof(shaders_clear_comp), // size &s->clear_comp)); // out + C(shader_load(vk, // vk_bundle + shaders_layer_comp, // data + sizeof(shaders_layer_comp), // size + &s->layer_comp)); // out + C(shader_load(vk, // vk_bundle shaders_distortion_comp, // data sizeof(shaders_distortion_comp), // size @@ -161,6 +167,7 @@ render_shaders_close(struct render_shaders *s, struct vk_bundle *vk) { D(clear_comp); D(distortion_comp); + D(layer_comp); D(mesh_vert); D(mesh_frag); D(equirect1_vert); diff --git a/src/xrt/compositor/shaders/layer.comp b/src/xrt/compositor/shaders/layer.comp new file mode 100644 index 000000000..a1024ca4a --- /dev/null +++ b/src/xrt/compositor/shaders/layer.comp @@ -0,0 +1,283 @@ +// Copyright 2021-2022, Collabora Ltd. +// Author: Jakob Bornecrantz <jakob@collabora.com> +// Author: Christoph Haag <christoph.haag@collabora.com> +// SPDX-License-Identifier: BSL-1.0 + +#version 460 +#extension GL_GOOGLE_include_directive : require + +#include "srgb.inc.glsl" + +//! @todo should this be a spcialization const? +#define XRT_LAYER_STEREO_PROJECTION 0 +#define XRT_LAYER_STEREO_PROJECTION_DEPTH 1 +#define XRT_LAYER_QUAD 2 +#define XRT_LAYER_CUBE 3 +#define XRT_LAYER_CYLINDER 4 +#define XRT_LAYER_EQUIRECT1 5 +#define XRT_LAYER_EQUIRECT2 6 + +// Should we do timewarp. +layout(constant_id = 1) const bool do_timewarp = false; +layout(constant_id = 2) const bool do_color_correction = true; +layout(constant_id = 3) const int COMP_MAX_LAYERS = 16; +layout(constant_id = 4) const int COMP_VIEWS_PER_LAYER = 2; +layout(constant_id = 5) const int SAMPLER_ARRAY_SIZE = 16; + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +// layer 0 left color, layer 0 right color, [optional: layer 0 left depth, layer 0 right depth], layer 1 left, layer 1 right, ... +layout(set = 0, binding = 0) uniform sampler2D source[SAMPLER_ARRAY_SIZE]; +layout(set = 0, binding = 2) uniform writeonly restrict image2D target; +layout(set = 0, binding = 3, std140) uniform restrict Config +{ + ivec4 views[2]; + vec4 pre_transform[2]; + vec4 post_transform[COMP_MAX_LAYERS][2]; + + // corresponds to enum xrt_layer_type + uvec2 layer_type_and_unpremultiplied[COMP_MAX_LAYERS]; + + // which image/sampler(s) correspond to each layer + ivec2 images_samplers[COMP_MAX_LAYERS][2]; + + // for projection layers + + // timewarp matrices + mat4 transform[COMP_MAX_LAYERS][2]; + + + // for quad layers + + // all quad transforms and coordinates are in view space + vec4 quad_position[COMP_MAX_LAYERS][2]; + vec4 quad_normal[COMP_MAX_LAYERS][2]; + mat4 inverse_quad_transform[COMP_MAX_LAYERS][2]; + + // quad extent in world scale + vec2 quad_extent[COMP_MAX_LAYERS]; +} ubo; + + +vec2 position_to_view_uv(ivec2 extent, uint ix, uint iy) +{ + // Turn the index into floating point. + vec2 xy = vec2(float(ix), float(iy)); + + // The inverse of the extent of a view image is the pixel size in [0 .. 1] space. + vec2 extent_pixel_size = vec2(1.0 / float(extent.x), 1.0 / float(extent.y)); + + // Per-target pixel we move the size of the pixels. + vec2 view_uv = xy * extent_pixel_size; + + // Emulate a triangle sample position by offset half target pixel size. + view_uv = view_uv + extent_pixel_size / 2.0; + + return view_uv; +} + +vec2 transform_uv_subimage(vec2 uv, uint iz, uint layer) +{ + vec2 values = uv; + + // To deal with OpenGL flip and sub image view. + values.xy = values.xy * ubo.post_transform[layer][iz].zw + ubo.post_transform[layer][iz].xy; + + // Ready to be used. + return values.xy; +} + +vec2 transform_uv_timewarp(vec2 uv, uint view_index, uint layer) +{ + vec4 values = vec4(uv, -1, 1); + + // From uv to tan angle (tanget space). + values.xy = values.xy * ubo.pre_transform[view_index].zw + ubo.pre_transform[view_index].xy; + values.y = -values.y; // Flip to OpenXR coordinate system. + + // Timewarp. + values = ubo.transform[layer][view_index] * values; + values.xy = values.xy * (1.0 / max(values.w, 0.00001)); + + // From [-1, 1] to [0, 1] + values.xy = values.xy * 0.5 + 0.5; + + // To deal with OpenGL flip and sub image view. + values.xy = values.xy * ubo.post_transform[layer][view_index].zw + ubo.post_transform[layer][view_index].xy; + + // Done. + return values.xy; +} + +vec2 transform_uv(vec2 uv, uint view_index, uint layer) +{ + if (do_timewarp) { + return transform_uv_timewarp(uv, view_index, layer); + } else { + return transform_uv_subimage(uv, view_index, layer); + } +} + +vec4 do_projection(uint view_index, vec2 view_uv, uint layer) +{ + uint source_image_index = ubo.images_samplers[layer][view_index].x; + + // Do any transformation needed. + vec2 uv = transform_uv(view_uv, view_index, layer); + + // Sample the source. + vec4 colour = vec4(texture(source[source_image_index], uv).rgba); + + return colour; +} + +vec3 get_direction(vec2 uv, uint view_index) +{ + // Skip the DIM/STRETCH/OFFSET stuff and go directly to values + vec4 values = vec4(uv, -1, 1); + + // From uv to tan angle (tangent space). + values.xy = values.xy * ubo.pre_transform[view_index].zw + ubo.pre_transform[view_index].xy; + values.y = -values.y; // Flip to OpenXR coordinate system. + + vec3 direction = normalize(values.xyz); + return direction; +} + +vec4 do_quad(uint view_index, vec2 view_uv, uint layer) +{ + uint source_image_index = ubo.images_samplers[layer][view_index].x; + + // center point of the plane in view space. + vec3 quad_position = ubo.quad_position[layer][view_index].xyz; + + // normal vector of the plane. + vec3 normal = ubo.quad_normal[layer][view_index].xyz; + normal = normalize(normal); + + // coordinate system is the view space, therefore the camera/eye position is in the origin. + vec3 camera = vec3(0.0, 0.0, 0.0); + + // default color white should never be visible + vec4 colour = vec4(1.0, 1.0, 1.0, 1.0); + + //! @todo can we get better "pixel stuck" on projection layers with timewarp uv? + // never use the timewarp uv here because it depends on the projection layer pose + vec2 uv = view_uv; + + /* + * To fill in the view_uv texel on the target texture, animaginary ray is shot hrough texels on the target + * texture. When this imaginary ray hits a quad layer, it means that when the respective color at the hit + * intersection is picked for the current view_uv texel, the final image as seen through the headset will + * show this view_uv texel at the respective location. + */ + vec3 direction = get_direction(uv, view_index); + direction = normalize(direction); + + float denominator = dot(direction, normal); + + // denominator is negative when vectors point towards each other, 0 when perpendicular, + // and positive when vectors point in a similar direction, i.e. direction vector faces quad backface, which we don't render. + if (denominator < 0.00001) { + // shortest distance between origin and plane defined by normal + quad_position + float dist = dot(camera - quad_position, normal); + + // distance between origin and intersection point on the plane. + float intersection_dist = (dot(camera, normal) + dist) / -denominator; + + // layer is behind camera as defined by direction vector + if (intersection_dist < 0) { + colour = vec4(0.0, 0.0, 0.0, 0.0); + return colour; + } + + vec3 intersection = camera + intersection_dist * direction; + + // ps for "plane space" + vec2 intersection_ps = (ubo.inverse_quad_transform[layer][view_index] * vec4(intersection.xyz, 1.0)).xy; + + bool in_plane_bounds = + intersection_ps.x >= - ubo.quad_extent[layer].x / 2. && // + intersection_ps.x <= ubo.quad_extent[layer].x / 2. && // + intersection_ps.y >= - ubo.quad_extent[layer].y / 2. && // + intersection_ps.y <= ubo.quad_extent[layer].y / 2.; + + if (in_plane_bounds) { + // intersection_ps is in [-quad_extent .. quad_extent]. Transform to [0 .. quad_extent], then scale to [ 0 .. 1 ] for sampling + vec2 plane_uv = (intersection_ps.xy + ubo.quad_extent[layer] / 2.) / ubo.quad_extent[layer]; + + // sample on the desired subimage, not the entire texture + plane_uv = plane_uv * ubo.post_transform[layer][view_index].zw + ubo.post_transform[layer][view_index].xy; + + colour = texture(source[source_image_index], plane_uv); + } else { + // intersection on infinite plane outside of plane bounds + colour = vec4(0.0, 0.0, 0.0, 0.0); + return colour; + } + } else { + // no intersection with front face of infinite plane or perpendicular + colour = vec4(0.0, 0.0, 0.0, 0.0); + return colour; + } + + return vec4(colour); +} + +vec4 do_layers(vec2 view_uv, uint view_index) +{ + vec4 accum = vec4(0, 0, 0, 0); + for (uint layer = 0; layer < COMP_MAX_LAYERS; layer++) { + bool use_layer = false; + + vec4 rgba = vec4(0, 0, 0, 0); + switch (ubo.layer_type_and_unpremultiplied[layer].x) { + case XRT_LAYER_STEREO_PROJECTION: + case XRT_LAYER_STEREO_PROJECTION_DEPTH: + rgba = do_projection(view_index, view_uv, layer); + use_layer = true; + break; + case XRT_LAYER_QUAD: + rgba = do_quad(view_index, view_uv, layer); + use_layer = true; + break; + default: break; + } + + if (use_layer) { + if (ubo.layer_type_and_unpremultiplied[layer].y != 0) { + // Unpremultipled blend factor of src.a. + accum.rgb = mix(accum.rgb, rgba.rgb, rgba.a); + } else { + // Premultiplied bland foctor of 1. + accum.rgb = (accum.rgb * (1 - rgba.a)) + rgba.rgb; + } + } + } + return accum; +} + +void main() +{ + uint ix = gl_GlobalInvocationID.x; + uint iy = gl_GlobalInvocationID.y; + uint iz = gl_GlobalInvocationID.z; + + ivec2 offset = ivec2(ubo.views[iz].xy); + ivec2 extent = ivec2(ubo.views[iz].zw); + + if (ix >= extent.x || iy >= extent.y) { + return; + } + + vec2 view_uv = position_to_view_uv(extent, ix, iy); + + vec4 colour = do_layers(view_uv, iz); + + if (do_color_correction) { + // Do colour correction here since there are no automatic conversion in hardware available. + colour = vec4(from_linear_to_srgb(colour.rgb), 1); + } + + imageStore(target, ivec2(offset.x + ix, offset.y + iy), colour); +}