diff --git a/src/xrt/drivers/CMakeLists.txt b/src/xrt/drivers/CMakeLists.txt index 789e7610b..74c80f625 100644 --- a/src/xrt/drivers/CMakeLists.txt +++ b/src/xrt/drivers/CMakeLists.txt @@ -215,14 +215,8 @@ endif() if(XRT_BUILD_DRIVER_HANDTRACKING) add_library( drv_ht STATIC - ht/ht_algorithm.cpp - ht/ht_driver.cpp - ht/ht_driver.hpp + ht/ht_driver.c ht/ht_interface.h - ht/ht_model.cpp - ht/ht_hand_math.cpp - ht/ht_image_math.cpp - ht/ht_nms.cpp ) target_link_libraries( drv_ht @@ -234,6 +228,8 @@ if(XRT_BUILD_DRIVER_HANDTRACKING) aux_gstreamer ONNXRuntime::ONNXRuntime ${OpenCV_LIBRARIES} + t_ht_old_rgb + hand_async ) target_include_directories(drv_ht PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR}) list(APPEND ENABLED_DRIVERS ht) diff --git a/src/xrt/drivers/ht/ht_algorithm.hpp b/src/xrt/drivers/ht/ht_algorithm.hpp deleted file mode 100644 index 0b8ab5925..000000000 --- a/src/xrt/drivers/ht/ht_algorithm.hpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021, Collabora, Ltd. -// SPDX-License-Identifier: BSL-1.0 -/*! - * @file - * @brief Camera based hand tracking mainloop algorithm. - * @author Moses Turner - * @ingroup drv_ht - */ - -#pragma once - -struct ht_device; - -void -htRunAlgorithm(struct ht_device *htd); diff --git a/src/xrt/drivers/ht/ht_driver.c b/src/xrt/drivers/ht/ht_driver.c new file mode 100644 index 000000000..5f7deceb1 --- /dev/null +++ b/src/xrt/drivers/ht/ht_driver.c @@ -0,0 +1,321 @@ +// Copyright 2021, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Camera based hand tracking driver code. + * @author Moses Turner + * @author Jakob Bornecrantz + * @ingroup drv_ht + */ + +#include "gstreamer/gst_pipeline.h" +#include "gstreamer/gst_sink.h" +#include "ht_interface.h" + +#include "../depthai/depthai_interface.h" + +#include "util/u_var.h" +#include "xrt/xrt_defines.h" +#include "xrt/xrt_frame.h" +#include "xrt/xrt_frameserver.h" +#include "xrt/xrt_prober.h" + +#include "os/os_time.h" +#include "os/os_threading.h" + +#include "math/m_api.h" + +#include "util/u_device.h" +#include "util/u_frame.h" +#include "util/u_sink.h" +#include "util/u_format.h" +#include "util/u_logging.h" +#include "util/u_time.h" +#include "util/u_trace_marker.h" +#include "util/u_time.h" +#include "util/u_json.h" +#include "util/u_config_json.h" +#include "util/u_debug.h" + + +// #include "tracking/t_frame_cv_mat_wrapper.hpp" +// #include "tracking/t_calibration_opencv.hpp" +#include "tracking/t_hand_tracking.h" + +// Save me, Obi-Wan! +#include "../../tracking/hand/old_rgb/rgb_interface.h" + + +#include + +DEBUG_GET_ONCE_LOG_OPTION(ht_log, "HT_LOG", U_LOGGING_WARN) + + +#define HT_TRACE(htd, ...) U_LOG_XDEV_IFL_T(&htd->base, htd->log_level, __VA_ARGS__) +#define HT_DEBUG(htd, ...) U_LOG_XDEV_IFL_D(&htd->base, htd->log_level, __VA_ARGS__) +#define HT_INFO(htd, ...) U_LOG_XDEV_IFL_I(&htd->base, htd->log_level, __VA_ARGS__) +#define HT_WARN(htd, ...) U_LOG_XDEV_IFL_W(&htd->base, htd->log_level, __VA_ARGS__) +#define HT_ERROR(htd, ...) U_LOG_XDEV_IFL_E(&htd->base, htd->log_level, __VA_ARGS__) + + + +struct ht_device +{ + struct xrt_device base; + + struct xrt_tracking_origin tracking_origin; // probably cargo-culted + + enum xrt_format desired_format; + + struct xrt_frame_context xfctx; + + struct xrt_fs *xfs; + + struct xrt_fs_mode mode; + + struct xrt_prober *prober; + + struct t_hand_tracking_sync *sync; + struct t_hand_tracking_async *async; + + enum u_logging_level log_level; +}; + +static inline struct ht_device * +ht_device(struct xrt_device *xdev) +{ + return (struct ht_device *)xdev; +} + +#if 0 +static void +getStartupConfig(struct ht_device *htd, const cJSON *startup_config) +{ + const cJSON *uvc_wire_format = u_json_get(startup_config, "uvc_wire_format"); + + if (cJSON_IsString(uvc_wire_format)) { + bool is_yuv = (strcmp(cJSON_GetStringValue(uvc_wire_format), "yuv") == 0); + bool is_mjpeg = (strcmp(cJSON_GetStringValue(uvc_wire_format), "mjpeg") == 0); + if (!is_yuv && !is_mjpeg) { + HT_WARN(htd, "Unknown wire format type %s - should be \"yuv\" or \"mjpeg\"", + cJSON_GetStringValue(uvc_wire_format)); + } + if (is_yuv) { + HT_DEBUG(htd, "Using YUYV422!"); + htd->desired_format = XRT_FORMAT_YUYV422; + } else { + HT_DEBUG(htd, "Using MJPEG!"); + htd->desired_format = XRT_FORMAT_MJPEG; + } + } +} + +static void +getUserConfig(struct ht_device *htd) +{ + // The game here is to avoid bugs + be paranoid, not to be fast. If you see something that seems "slow" - don't + // fix it. Any of the tracking code is way stickier than this could ever be. + + struct u_config_json config_json = {0}; + + u_config_json_open_or_create_main_file(&config_json); + if (!config_json.file_loaded) { + return; + } + + cJSON *ht_config_json = cJSON_GetObjectItemCaseSensitive(config_json.root, "config_ht"); + if (ht_config_json == NULL) { + return; + } + + // Don't get it twisted: initializing these to NULL is not cargo-culting. + // Uninitialized values on the stack aren't guaranteed to be 0, so these could end up pointing to what we + // *think* is a valid address but what is *not* one. + char *startup_config_string = NULL; + + { + const cJSON *startup_config_string_json = u_json_get(ht_config_json, "startup_config_index"); + if (cJSON_IsString(startup_config_string_json)) { + startup_config_string = cJSON_GetStringValue(startup_config_string_json); + } + } + + if (startup_config_string != NULL) { + const cJSON *startup_config_obj = + u_json_get(u_json_get(ht_config_json, "startup_configs"), startup_config_string); + getStartupConfig(htd, startup_config_obj); + } + + cJSON_Delete(config_json.root); + return; +} + +static void +userConfigSetDefaults(struct ht_device *htd) +{ + htd->desired_format = XRT_FORMAT_YUYV422; +} +#endif + +static void +on_video_device(struct xrt_prober *xp, + struct xrt_prober_device *pdev, + const char *product, + const char *manufacturer, + const char *serial, + void *ptr) +{ + // Stolen from gui_scene_record + + struct ht_device *htd = (struct ht_device *)ptr; + + // Hardcoded for the Index. + if (product != NULL && manufacturer != NULL) { + if ((strcmp(product, "3D Camera") == 0) && (strcmp(manufacturer, "Etron Technology, Inc.") == 0)) { + xrt_prober_open_video_device(xp, pdev, &htd->xfctx, &htd->xfs); + return; + } + } +} + +/*! + * xrt_device function implementations + */ + +static void +ht_device_update_inputs(struct xrt_device *xdev) +{ + // Empty +} + +static void +ht_device_get_hand_tracking(struct xrt_device *xdev, + enum xrt_input_name name, + uint64_t at_timestamp_ns, + struct xrt_hand_joint_set *out_value, + uint64_t *out_timestamp_ns) +{ + struct ht_device *htd = ht_device(xdev); + + if (name != XRT_INPUT_GENERIC_HAND_TRACKING_LEFT && name != XRT_INPUT_GENERIC_HAND_TRACKING_RIGHT) { + HT_ERROR(htd, "unknown input name for hand tracker"); + return; + } + + htd->async->get_hand(htd->async, name, at_timestamp_ns, out_value, out_timestamp_ns); +} + +static void +ht_device_destroy(struct xrt_device *xdev) +{ + struct ht_device *htd = ht_device(xdev); + HT_DEBUG(htd, "called!"); + + + xrt_frame_context_destroy_nodes(&htd->xfctx); + // Remove the variable tracking. + u_var_remove_root(htd); + + u_device_free(&htd->base); +} + +struct xrt_device * +ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *calib) +{ + XRT_TRACE_MARKER(); + assert(calib != NULL); + + enum u_device_alloc_flags flags = U_DEVICE_ALLOC_NO_FLAGS; + + //! @todo 2 hands hardcoded + int num_hands = 2; + + // Allocate device + struct ht_device *htd = U_DEVICE_ALLOCATE(struct ht_device, flags, num_hands, 0); + + // Setup logging first. We like logging. + htd->log_level = debug_get_log_option_ht_log(); + + // Set defaults - most people won't have a config json and it won't get past here. + htd->desired_format = XRT_FORMAT_YUYV422; + + htd->prober = xp; + htd->xfs = NULL; + + xrt_prober_list_video_devices(htd->prober, on_video_device, htd); + + if (htd->xfs == NULL) { + return NULL; + } + + htd->base.tracking_origin = &htd->tracking_origin; + htd->base.tracking_origin->type = XRT_TRACKING_TYPE_RGB; + htd->base.tracking_origin->offset.position.x = 0.0f; + htd->base.tracking_origin->offset.position.y = 0.0f; + htd->base.tracking_origin->offset.position.z = 0.0f; + htd->base.tracking_origin->offset.orientation.w = 1.0f; + + + htd->base.update_inputs = ht_device_update_inputs; + htd->base.get_hand_tracking = ht_device_get_hand_tracking; + htd->base.destroy = ht_device_destroy; + + snprintf(htd->base.str, XRT_DEVICE_NAME_LEN, "Camera based Hand Tracker"); + snprintf(htd->base.serial, XRT_DEVICE_NAME_LEN, "Camera based Hand Tracker"); + + htd->base.inputs[0].name = XRT_INPUT_GENERIC_HAND_TRACKING_LEFT; + htd->base.inputs[1].name = XRT_INPUT_GENERIC_HAND_TRACKING_RIGHT; + + // Yes, you need all of these. Yes, I tried disabling them all one at a time. You need all of these. + htd->base.name = XRT_DEVICE_HAND_TRACKER; + htd->base.device_type = XRT_DEVICE_TYPE_HAND_TRACKER; + htd->base.orientation_tracking_supported = true; + htd->base.position_tracking_supported = true; + htd->base.hand_tracking_supported = true; + + htd->sync = t_hand_tracking_sync_old_rgb_create(calib); + htd->async = t_hand_tracking_async_default_create(&htd->xfctx, htd->sync); + + struct xrt_frame_sink *tmp = NULL; + + u_sink_stereo_sbs_to_slam_sbs_create(&htd->xfctx, &htd->async->left, &htd->async->right, &tmp); + + // Converts images (we'd expect YUV422 or MJPEG) to R8G8B8. Can take a long time, especially on unoptimized + // builds. If it's really slow, triple-check that you built Monado with optimizations! + //!@todo + u_sink_create_format_converter(&htd->xfctx, XRT_FORMAT_R8G8B8, tmp, &tmp); + + // This puts u_sink_create_to_r8g8b8_or_l8 on its own thread, so that nothing gets backed up if it runs slower + // than the native camera framerate. + u_sink_queue_create(&htd->xfctx, 1, tmp, &tmp); + + struct xrt_fs_mode *modes; + uint32_t count; + + xrt_fs_enumerate_modes(htd->xfs, &modes, &count); + + // Index should only have XRT_FORMAT_YUYV422 or XRT_FORMAT_MJPEG. + + bool found_mode = false; + uint32_t selected_mode = 0; + + for (; selected_mode < count; selected_mode++) { + if (modes[selected_mode].format == htd->desired_format) { + found_mode = true; + break; + } + } + + if (!found_mode) { + selected_mode = 0; + HT_WARN(htd, "Couldn't find desired camera mode! Something's probably wrong."); + } + + free(modes); + + xrt_fs_stream_start(htd->xfs, tmp, XRT_FS_CAPTURE_TYPE_TRACKING, selected_mode); + + HT_DEBUG(htd, "Hand Tracker initialized!"); + + return &htd->base; +} diff --git a/src/xrt/drivers/ht/ht_driver.cpp b/src/xrt/drivers/ht/ht_driver.cpp deleted file mode 100644 index f5b1c325a..000000000 --- a/src/xrt/drivers/ht/ht_driver.cpp +++ /dev/null @@ -1,782 +0,0 @@ -// Copyright 2021, Collabora, Ltd. -// SPDX-License-Identifier: BSL-1.0 -/*! - * @file - * @brief Camera based hand tracking driver code. - * @author Moses Turner - * @author Jakob Bornecrantz - * @ingroup drv_ht - */ - -#include "gstreamer/gst_pipeline.h" -#include "gstreamer/gst_sink.h" -#include "ht_interface.h" -#include "ht_driver.hpp" - -#include "../depthai/depthai_interface.h" - -#include "xrt/xrt_defines.h" -#include "xrt/xrt_frame.h" -#include "xrt/xrt_frameserver.h" - -#include "os/os_time.h" -#include "os/os_threading.h" - -#include "math/m_api.h" -#include "math/m_eigen_interop.hpp" - -#include "util/u_device.h" -#include "util/u_frame.h" -#include "util/u_sink.h" -#include "util/u_format.h" -#include "util/u_logging.h" -#include "util/u_time.h" -#include "util/u_trace_marker.h" -#include "util/u_time.h" -#include "util/u_json.h" -#include "util/u_config_json.h" - -#include "tracking/t_frame_cv_mat_wrapper.hpp" -#include "tracking/t_calibration_opencv.hpp" - -#include "ht_algorithm.hpp" -#include "ht_model.hpp" - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -DEBUG_GET_ONCE_LOG_OPTION(ht_log, "HT_LOG", U_LOGGING_WARN) - -/*! - * Setup helper functions. - */ - -static bool -getCalibration(struct ht_device *htd, t_stereo_camera_calibration *calibration) -{ - xrt::auxiliary::tracking::StereoCameraCalibrationWrapper wrap(calibration); - xrt_vec3 trans = {(float)wrap.camera_translation_mat(0, 0), (float)wrap.camera_translation_mat(1, 0), - (float)wrap.camera_translation_mat(2, 0)}; - htd->baseline = m_vec3_len(trans); - -#if 0 - std::cout << "\n\nTRANSLATION VECTOR IS\n" << wrap.camera_translation_mat; - std::cout << "\n\nROTATION FROM LEFT TO RIGHT IS\n" << wrap.camera_rotation_mat << "\n"; -#endif - - cv::Matx34d P1; - cv::Matx34d P2; - - cv::Matx44d Q; - - // The only reason we're calling stereoRectify is because we want R1 and R2 for the - cv::stereoRectify(wrap.view[0].intrinsics_mat, // cameraMatrix1 - wrap.view[0].distortion_mat, // distCoeffs1 - wrap.view[1].intrinsics_mat, // cameraMatrix2 - wrap.view[1].distortion_mat, // distCoeffs2 - wrap.view[0].image_size_pixels_cv, // imageSize* - wrap.camera_rotation_mat, // R - wrap.camera_translation_mat, // T - htd->views[0].rotate_camera_to_stereo_camera, // R1 - htd->views[1].rotate_camera_to_stereo_camera, // R2 - P1, // P1 - P2, // P2 - Q, // Q - 0, // flags - -1.0f, // alpha - cv::Size(), // newImageSize - NULL, // validPixROI1 - NULL); // validPixROI2 - - //* Good enough guess that view 0 and view 1 are the same size. - - for (int i = 0; i < 2; i++) { - htd->views[i].cameraMatrix = wrap.view[i].intrinsics_mat; - - htd->views[i].distortion = wrap.view[i].distortion_fisheye_mat; - } - - htd->camera.one_view_size_px.w = wrap.view[0].image_size_pixels.w; - htd->camera.one_view_size_px.h = wrap.view[0].image_size_pixels.h; - - - cv::Matx33d rotate_stereo_camera_to_left_camera = htd->views[0].rotate_camera_to_stereo_camera.inv(); - - xrt_matrix_3x3 s; - s.v[0] = rotate_stereo_camera_to_left_camera(0, 0); - s.v[1] = rotate_stereo_camera_to_left_camera(0, 1); - s.v[2] = rotate_stereo_camera_to_left_camera(0, 2); - - s.v[3] = rotate_stereo_camera_to_left_camera(1, 0); - s.v[4] = rotate_stereo_camera_to_left_camera(1, 1); - s.v[5] = rotate_stereo_camera_to_left_camera(1, 2); - - s.v[6] = rotate_stereo_camera_to_left_camera(2, 0); - s.v[7] = rotate_stereo_camera_to_left_camera(2, 1); - s.v[8] = rotate_stereo_camera_to_left_camera(2, 2); - - xrt_quat tmp; - - math_quat_from_matrix_3x3(&s, &tmp); - - // Weird that I have to invert this quat, right? I think at some point - like probably just above this - I must - // have swapped row-major and col-major - remember, if you transpose a rotation matrix, you get its inverse. - // Doesn't matter that I don't understand - non-inverted looks definitely wrong, inverted looks definitely - // right. - math_quat_invert(&tmp, &htd->stereo_camera_to_left_camera); - -#if 0 - U_LOG_E("%f %f %f %f", htd->stereo_camera_to_left_camera.w, htd->stereo_camera_to_left_camera.x, - htd->stereo_camera_to_left_camera.y, htd->stereo_camera_to_left_camera.z); -#endif - - return true; -} - -static void -getStartupConfig(struct ht_device *htd, const cJSON *startup_config) -{ - const cJSON *palm_detection_type = u_json_get(startup_config, "palm_detection_model"); - const cJSON *keypoint_estimation_type = u_json_get(startup_config, "keypoint_estimation_model"); - const cJSON *uvc_wire_format = u_json_get(startup_config, "uvc_wire_format"); - - // IsString does its own null-checking - if (cJSON_IsString(palm_detection_type)) { - bool is_collabora = (strcmp(cJSON_GetStringValue(palm_detection_type), "collabora") == 0); - bool is_mediapipe = (strcmp(cJSON_GetStringValue(palm_detection_type), "mediapipe") == 0); - if (!is_collabora && !is_mediapipe) { - HT_WARN(htd, "Unknown palm detection type %s - should be \"collabora\" or \"mediapipe\"", - cJSON_GetStringValue(palm_detection_type)); - } - htd->startup_config.palm_detection_use_mediapipe = is_mediapipe; - } - - if (cJSON_IsString(keypoint_estimation_type)) { - bool is_collabora = (strcmp(cJSON_GetStringValue(keypoint_estimation_type), "collabora") == 0); - bool is_mediapipe = (strcmp(cJSON_GetStringValue(keypoint_estimation_type), "mediapipe") == 0); - if (!is_collabora && !is_mediapipe) { - HT_WARN(htd, "Unknown keypoint estimation type %s - should be \"collabora\" or \"mediapipe\"", - cJSON_GetStringValue(keypoint_estimation_type)); - } - htd->startup_config.keypoint_estimation_use_mediapipe = is_mediapipe; - } - - if (cJSON_IsString(uvc_wire_format)) { - bool is_yuv = (strcmp(cJSON_GetStringValue(uvc_wire_format), "yuv") == 0); - bool is_mjpeg = (strcmp(cJSON_GetStringValue(uvc_wire_format), "mjpeg") == 0); - if (!is_yuv && !is_mjpeg) { - HT_WARN(htd, "Unknown wire format type %s - should be \"yuv\" or \"mjpeg\"", - cJSON_GetStringValue(uvc_wire_format)); - } - if (is_yuv) { - HT_DEBUG(htd, "Using YUYV422!"); - htd->startup_config.desired_format = XRT_FORMAT_YUYV422; - } else { - HT_DEBUG(htd, "Using MJPEG!"); - htd->startup_config.desired_format = XRT_FORMAT_MJPEG; - } - } -} - -static void -getUserConfig(struct ht_device *htd) -{ - // The game here is to avoid bugs + be paranoid, not to be fast. If you see something that seems "slow" - don't - // fix it. Any of the tracking code is way stickier than this could ever be. - - struct u_config_json config_json = {}; - - u_config_json_open_or_create_main_file(&config_json); - if (!config_json.file_loaded) { - return; - } - - cJSON *ht_config_json = cJSON_GetObjectItemCaseSensitive(config_json.root, "config_ht"); - if (ht_config_json == NULL) { - return; - } - - // Don't get it twisted: initializing these to NULL is not cargo-culting. - // Uninitialized values on the stack aren't guaranteed to be 0, so these could end up pointing to what we - // *think* is a valid address but what is *not* one. - char *startup_config_string = NULL; - char *dynamic_config_string = NULL; - - { - const cJSON *startup_config_string_json = u_json_get(ht_config_json, "startup_config_index"); - if (cJSON_IsString(startup_config_string_json)) { - startup_config_string = cJSON_GetStringValue(startup_config_string_json); - } - - const cJSON *dynamic_config_string_json = u_json_get(ht_config_json, "dynamic_config_index"); - if (cJSON_IsString(dynamic_config_string_json)) { - dynamic_config_string = cJSON_GetStringValue(dynamic_config_string_json); - } - } - - if (startup_config_string != NULL) { - const cJSON *startup_config_obj = - u_json_get(u_json_get(ht_config_json, "startup_configs"), startup_config_string); - getStartupConfig(htd, startup_config_obj); - } - - if (dynamic_config_string != NULL) { - const cJSON *dynamic_config_obj = - u_json_get(u_json_get(ht_config_json, "dynamic_configs"), dynamic_config_string); - { - ht_dynamic_config *hdc = &htd->dynamic_config; - // Do the thing - u_json_get_string_into_array(u_json_get(dynamic_config_obj, "name"), hdc->name, 64); - - u_json_get_float(u_json_get(dynamic_config_obj, "hand_fc_min"), &hdc->hand_fc_min.val); - u_json_get_float(u_json_get(dynamic_config_obj, "hand_fc_min_d"), &hdc->hand_fc_min_d.val); - u_json_get_float(u_json_get(dynamic_config_obj, "hand_beta"), &hdc->hand_beta.val); - - u_json_get_float(u_json_get(dynamic_config_obj, "nms_iou"), &hdc->nms_iou.val); - u_json_get_float(u_json_get(dynamic_config_obj, "nms_threshold"), &hdc->nms_threshold.val); - - u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_nms_detections"), - &hdc->scribble_nms_detections); - u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_raw_detections"), - &hdc->scribble_raw_detections); - u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_2d_keypoints"), - &hdc->scribble_2d_keypoints); - u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_bounding_box"), - &hdc->scribble_bounding_box); - - char *dco_str = cJSON_Print(dynamic_config_obj); - U_LOG_D("Config %s %s", dynamic_config_string, dco_str); - free(dco_str); - } - } - - - - cJSON_Delete(config_json.root); - return; -} - - -static void -userConfigSetDefaults(struct ht_device *htd) -{ - // Admit defeat: for now, Mediapipe's are still better than ours. - htd->startup_config.palm_detection_use_mediapipe = true; - htd->startup_config.keypoint_estimation_use_mediapipe = true; - - // Make sure you build DebugOptimized! - htd->startup_config.desired_format = XRT_FORMAT_YUYV422; - - - ht_dynamic_config *hdc = &htd->dynamic_config; - - hdc->scribble_nms_detections = true; - hdc->scribble_raw_detections = false; - hdc->scribble_2d_keypoints = true; - hdc->scribble_bounding_box = false; - - hdc->hand_fc_min.min = 0.0f; - hdc->hand_fc_min.max = 50.0f; - hdc->hand_fc_min.step = 0.05f; - hdc->hand_fc_min.val = FCMIN_HAND; - - hdc->hand_fc_min_d.min = 0.0f; - hdc->hand_fc_min_d.max = 50.0f; - hdc->hand_fc_min_d.step = 0.05f; - hdc->hand_fc_min_d.val = FCMIN_D_HAND; - - - hdc->hand_beta.min = 0.0f; - hdc->hand_beta.max = 50.0f; - hdc->hand_beta.step = 0.05f; - hdc->hand_beta.val = BETA_HAND; - - hdc->max_vel.min = 0.0f; - hdc->max_vel.max = 50.0f; - hdc->max_vel.step = 0.05f; - hdc->max_vel.val = 30.0f; // 30 m/s; about 108 kph. If your hand is going this fast, our tracking failing is the - // least of your problems. - - hdc->max_acc.min = 0.0f; - hdc->max_acc.max = 100.0f; - hdc->max_acc.step = 0.1f; - hdc->max_acc.val = 100.0f; // 100 m/s^2; about 10 Gs. Ditto. - - hdc->nms_iou.min = 0.0f; - hdc->nms_iou.max = 1.0f; - hdc->nms_iou.step = 0.01f; - - - hdc->nms_threshold.min = 0.0f; - hdc->nms_threshold.max = 1.0f; - hdc->nms_threshold.step = 0.01f; - - hdc->new_detection_threshold.min = 0.0f; - hdc->new_detection_threshold.max = 1.0f; - hdc->new_detection_threshold.step = 0.01f; - - - hdc->nms_iou.val = 0.05f; - hdc->nms_threshold.val = 0.3f; - hdc->new_detection_threshold.val = 0.6f; -} - - -static void -getModelsFolder(struct ht_device *htd) -{ -// Please bikeshed me on this! I don't know where is the best place to put this stuff! -#if 0 - char exec_location[1024] = {}; - readlink("/proc/self/exe", exec_location, 1024); - - HT_DEBUG(htd, "Exec at %s\n", exec_location); - - int end = 0; - while (exec_location[end] != '\0') { - HT_DEBUG(htd, "%d", end); - end++; - } - - while (exec_location[end] != '/' && end != 0) { - HT_DEBUG(htd, "%d %c", end, exec_location[end]); - exec_location[end] = '\0'; - end--; - } - - strcat(exec_location, "../share/monado/hand-tracking-models/"); - strcpy(htd->startup_config.model_slug, exec_location); -#else - const char *xdg_home = getenv("XDG_CONFIG_HOME"); - const char *home = getenv("HOME"); - if (xdg_home != NULL) { - strcpy(htd->startup_config.model_slug, xdg_home); - } else if (home != NULL) { - strcpy(htd->startup_config.model_slug, home); - } else { - assert(false); - } - strcat(htd->startup_config.model_slug, "/.local/share/monado/hand-tracking-models/"); -#endif -} - -#if defined(EXPERIMENTAL_DATASET_RECORDING) - -static void -htStartJsonCB(void *ptr) -{ - struct ht_device *htd = (struct ht_device *)ptr; - HT_INFO(htd, "Magic button pressed!"); - - // Wait for the hand tracker to be totally done with the current frame, then make it wait trying to relock this - // mutex for us to be done. - os_mutex_lock(&htd->unlocked_between_frames); - - if (htd->tracking_should_record_dataset == false) { - // Then we're starting up the pipeline. - HT_INFO(htd, "Starting dataset recording!"); - - - const char *source_name = "source_name"; - char pipeline_string[2048]; - - /* - None (0) – No preset - ultrafast (1) – ultrafast - superfast (2) – superfast - veryfast (3) – veryfast - faster (4) – faster - fast (5) – fast - medium (6) – medium - slow (7) – slow - slower (8) – slower - veryslow (9) – veryslow - placebo (10) – placebo - */ - -#if 0 - snprintf(pipeline_string, // - sizeof(pipeline_string), // - "appsrc name=\"%s\" ! " - "queue ! " - "videoconvert ! " - "queue ! " - "x264enc pass=qual quantizer=0 tune=film bitrate=\"%s\" speed-preset=\"%s\" ! " - "h264parse ! " - "queue ! " - "mp4mux ! " - "filesink location=\"%s\"", - source_name, "16384", "fast", "/tmp/moses.mp4"); -#elif 1 - snprintf(pipeline_string, // - sizeof(pipeline_string), // - "appsrc name=\"%s\" ! " - "queue ! " - "videoconvert ! " - "queue ! " - "x264enc pass=quant quantizer=20 tune=\"film\" speed-preset=\"veryfast\" ! " - "h264parse ! " - "queue ! " - "matroskamux ! " - "filesink location=\"%s\"", - source_name, "/tmp/moses.mkv"); -#elif 1 - snprintf(pipeline_string, // - sizeof(pipeline_string), // - "appsrc name=\"%s\" ! " - "queue ! " - "videoconvert ! " - "x265enc ! " - "h265parse ! " - "matroskamux ! " - "filesink location=\"%s\"", - source_name, "/tmp/moses.mkv"); -#endif - - gstreamer_pipeline_create_from_string(&htd->gst.xfctx, pipeline_string, &htd->gst.gp); - - gstreamer_sink_create_with_pipeline(htd->gst.gp, 2560, 800, XRT_FORMAT_R8G8B8, source_name, - &htd->gst.gs, &htd->gst.sink); - gstreamer_pipeline_play(htd->gst.gp); - - - htd->gst.output_root = cJSON_CreateObject(); - htd->gst.output_array = cJSON_CreateArray(); - cJSON_AddItemToObject(htd->gst.output_root, "hand_array", htd->gst.output_array); - - strcpy(htd->gui.start_json_record.label, "Stop recording and save dataset!"); - htd->gst.current_index = 0; - htd->tracking_should_record_dataset = true; - - } else { - // Then the pipeline was created sometime in the past and we have to destroy it + save everything to a - // file. - - gstreamer_pipeline_stop(htd->gst.gp); - - xrt_frame_context_destroy_nodes(&htd->gst.xfctx); - - - cJSON_AddNumberToObject(htd->gst.output_root, "num_frames", htd->gst.current_index); - cJSON_AddNumberToObject(htd->gst.output_root, "length_ns", htd->gst.last_frame_ns); - const char *string = cJSON_Print(htd->gst.output_root); - FILE *fp; - fp = fopen("/tmp/moses.json", "w"); - fprintf(fp, "%s", string); - fclose(fp); - cJSON_Delete(htd->gst.output_root); - - strcpy(htd->gui.start_json_record.label, "Start recording dataset!"); - htd->tracking_should_record_dataset = false; - } - - // We're done; let the hand tracker go about its business - os_mutex_unlock(&htd->unlocked_between_frames); -} -#endif - -static void -on_video_device(struct xrt_prober *xp, - struct xrt_prober_device *pdev, - const char *product, - const char *manufacturer, - const char *serial, - void *ptr) -{ - // Stolen from gui_scene_record - - struct ht_device *htd = (struct ht_device *)ptr; - - // Hardcoded for the Index. - if (product != NULL && manufacturer != NULL) { - if ((strcmp(product, "3D Camera") == 0) && (strcmp(manufacturer, "Etron Technology, Inc.") == 0)) { - xrt_prober_open_video_device(xp, pdev, &htd->camera.xfctx, &htd->camera.xfs); - return; - } - } -} - -/*! - * xrt_frame_sink function implementations - */ - -static void -ht_sink_push_frame(struct xrt_frame_sink *xs, struct xrt_frame *xf) -{ - XRT_TRACE_MARKER(); - struct ht_device *htd = container_of(xs, struct ht_device, sink); - assert(xf != NULL); - - if (!htd->tracking_should_die) { - os_mutex_lock(&htd->unlocked_between_frames); - - xrt_frame_reference(&htd->frame_for_process, xf); - htRunAlgorithm(htd); - xrt_frame_reference(&htd->frame_for_process, NULL); // Could let go of it a little earlier but nah - - os_mutex_unlock(&htd->unlocked_between_frames); - } -} - -/*! - * xrt_frame_node function implementations - */ - -static void -ht_node_break_apart(struct xrt_frame_node *node) -{ - struct ht_device *htd = container_of(node, struct ht_device, node); - HT_DEBUG(htd, "called!"); - // wrong but don't care -} - -static void -ht_node_destroy(struct xrt_frame_node *node) -{ - struct ht_device *htd = container_of(node, struct ht_device, node); - - HT_DEBUG(htd, "called!"); -} - -/*! - * xrt_device function implementations - */ - -static void -ht_device_update_inputs(struct xrt_device *xdev) -{ - // Empty -} - -static void -ht_device_get_hand_tracking(struct xrt_device *xdev, - enum xrt_input_name name, - uint64_t at_timestamp_ns, - struct xrt_hand_joint_set *out_value, - uint64_t *out_timestamp_ns) -{ - struct ht_device *htd = ht_device(xdev); - - if (name != XRT_INPUT_GENERIC_HAND_TRACKING_LEFT && name != XRT_INPUT_GENERIC_HAND_TRACKING_RIGHT) { - HT_ERROR(htd, "unknown input name for hand tracker"); - return; - } - bool hand_index = (name == XRT_INPUT_GENERIC_HAND_TRACKING_RIGHT); // left=0, right=1 - - - - os_mutex_lock(&htd->openxr_hand_data_mediator); - memcpy(out_value, &htd->hands_for_openxr[hand_index], sizeof(struct xrt_hand_joint_set)); - // Instead of pose-predicting, we tell the caller that this joint set is a little old - *out_timestamp_ns = htd->hands_for_openxr_timestamp; - os_mutex_unlock(&htd->openxr_hand_data_mediator); -} - -static void -ht_device_destroy(struct xrt_device *xdev) -{ - struct ht_device *htd = ht_device(xdev); - HT_DEBUG(htd, "called!"); - - - xrt_frame_context_destroy_nodes(&htd->camera.xfctx); -#ifdef EXPERIMENTAL_DATASET_RECORDING - xrt_frame_context_destroy_nodes(&htd->gst.xfctx); -#endif - htd->tracking_should_die = true; - - // Lock this mutex so we don't try to free things as they're being used on the last iteration - os_mutex_lock(&htd->unlocked_between_frames); - // Remove the variable tracking. - u_var_remove_root(htd); - - // Shhhhhhhhhhh, it's okay. It'll all be okay. - htd->histories_3d.~vector(); - htd->views[0].bbox_histories.~vector(); - htd->views[1].bbox_histories.~vector(); - // Okay, fine, since we're mixing C and C++ idioms here, I couldn't find a clean way to implicitly - // call the destructors on these (ht_device doesn't have a destructor; neither do most of its members; and if - // you read u_device_allocate and u_device_free you'll agree it'd be somewhat annoying to write a - // constructor/destructor for ht_device), so we just manually call the destructors for things like std::vector's - // that need their destructors to be called to not leak. - - delete htd->views[0].htm; - delete htd->views[1].htm; - - u_device_free(&htd->base); -} - -extern "C" struct xrt_device * -ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *calib) -{ - enum ht_run_type run_type = HT_RUN_TYPE_VALVE_INDEX; - XRT_TRACE_MARKER(); - enum u_device_alloc_flags flags = U_DEVICE_ALLOC_NO_FLAGS; - - //! @todo 2 hands hardcoded - int num_hands = 2; - - // Allocate device - struct ht_device *htd = U_DEVICE_ALLOCATE(struct ht_device, flags, num_hands, 0); - - // Setup logging first. We like logging. - htd->log_level = debug_get_log_option_ht_log(); - - /* - * Get configuration - */ - - assert(calib != NULL); - htd->run_type = run_type; - getCalibration(htd, calib); - // Set defaults - most people won't have a config json and it won't get past here. - userConfigSetDefaults(htd); - getUserConfig(htd); - getModelsFolder(htd); - - /* - * Add our xrt_frame_sink and xrt_frame_node implementations to ourselves - */ - - htd->sink.push_frame = &ht_sink_push_frame; - htd->node.break_apart = &ht_node_break_apart; - htd->node.destroy = &ht_node_destroy; - // Add ourselves to the frame context - xrt_frame_context_add(&htd->camera.xfctx, &htd->node); - - - - htd->camera.prober = xp; - htd->camera.xfs = NULL; // paranoia - - xrt_prober_list_video_devices(htd->camera.prober, on_video_device, htd); - - if (htd->camera.xfs == NULL) { - return NULL; - } - - - htd->views[0].htd = htd; - htd->views[1].htd = htd; // :) - - htd->views[0].htm = new ht_model(htd); - htd->views[1].htm = new ht_model(htd); - - htd->views[0].view = 0; - htd->views[1].view = 1; - - htd->base.tracking_origin = &htd->tracking_origin; - htd->base.tracking_origin->type = XRT_TRACKING_TYPE_RGB; - htd->base.tracking_origin->offset.position.x = 0.0f; - htd->base.tracking_origin->offset.position.y = 0.0f; - htd->base.tracking_origin->offset.position.z = 0.0f; - htd->base.tracking_origin->offset.orientation.w = 1.0f; - - os_mutex_init(&htd->openxr_hand_data_mediator); - os_mutex_init(&htd->unlocked_between_frames); - - htd->base.update_inputs = ht_device_update_inputs; - htd->base.get_hand_tracking = ht_device_get_hand_tracking; - htd->base.destroy = ht_device_destroy; - - snprintf(htd->base.str, XRT_DEVICE_NAME_LEN, "Camera based Hand Tracker"); - snprintf(htd->base.serial, XRT_DEVICE_NAME_LEN, "Camera based Hand Tracker"); - - htd->base.inputs[0].name = XRT_INPUT_GENERIC_HAND_TRACKING_LEFT; - htd->base.inputs[1].name = XRT_INPUT_GENERIC_HAND_TRACKING_RIGHT; - - // Yes, you need all of these. Yes, I tried disabling them all one at a time. You need all of these. - htd->base.name = XRT_DEVICE_HAND_TRACKER; - htd->base.device_type = XRT_DEVICE_TYPE_HAND_TRACKER; - htd->base.orientation_tracking_supported = true; - htd->base.position_tracking_supported = true; - htd->base.hand_tracking_supported = true; - - struct xrt_frame_sink *tmp = &htd->sink; - - - // This puts u_sink_create_to_r8g8b8_or_l8 on its own thread, so that nothing gets backed up if it runs slower - // than the native camera framerate. - u_sink_queue_create(&htd->camera.xfctx, 1, tmp, &tmp); - - // Converts images (we'd expect YUV422 or MJPEG) to R8G8B8. Can take a long time, especially on unoptimized - // builds. If it's really slow, triple-check that you built Monado with optimizations! - u_sink_create_format_converter(&htd->camera.xfctx, XRT_FORMAT_R8G8B8, tmp, &tmp); - - // Puts the hand tracking code on its own thread, so that nothing upstream of it gets backed up if the hand - // tracking code runs slower than the upstream framerate. - u_sink_queue_create(&htd->camera.xfctx, 1, tmp, &tmp); - - xrt_fs_mode *modes; - uint32_t count; - - xrt_fs_enumerate_modes(htd->camera.xfs, &modes, &count); - - // Index should only have XRT_FORMAT_YUYV422 or XRT_FORMAT_MJPEG. - - bool found_mode = false; - uint32_t selected_mode = 0; - - for (; selected_mode < count; selected_mode++) { - if (modes[selected_mode].format == htd->startup_config.desired_format) { - found_mode = true; - break; - } - } - - if (!found_mode) { - selected_mode = 0; - HT_WARN(htd, "Couldn't find desired camera mode! Something's probably wrong."); - } - - free(modes); - - u_var_add_root(htd, "Camera-based Hand Tracker", true); - - u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_fc_min, "hand_fc_min"); - u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_fc_min_d, "hand_fc_min_d"); - u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_beta, "hand_beta"); - u_var_add_draggable_f32(htd, &htd->dynamic_config.nms_iou, "nms_iou"); - u_var_add_draggable_f32(htd, &htd->dynamic_config.nms_threshold, "nms_threshold"); - u_var_add_draggable_f32(htd, &htd->dynamic_config.new_detection_threshold, "new_detection_threshold"); - - u_var_add_bool(htd, &htd->dynamic_config.scribble_raw_detections, "Scribble raw detections"); - u_var_add_bool(htd, &htd->dynamic_config.scribble_nms_detections, "Scribble NMS detections"); - u_var_add_bool(htd, &htd->dynamic_config.scribble_2d_keypoints, "Scribble 2D keypoints"); - u_var_add_bool(htd, &htd->dynamic_config.scribble_bounding_box, "Scribble bounding box"); - -#ifdef EXPERIMENTAL_DATASET_RECORDING - htd->gui.start_json_record.ptr = htd; - htd->gui.start_json_record.cb = htStartJsonCB; - strcpy(htd->gui.start_json_record.label, "Start recording dataset!"); - u_var_add_button(htd, &htd->gui.start_json_record, ""); -#endif - - u_var_add_sink_debug(htd, &htd->debug_sink, "i"); - - xrt_fs_stream_start(htd->camera.xfs, tmp, XRT_FS_CAPTURE_TYPE_TRACKING, selected_mode); - - HT_DEBUG(htd, "Hand Tracker initialized!"); - - - return &htd->base; -} diff --git a/src/xrt/drivers/ht/ht_hand_math.hpp b/src/xrt/drivers/ht/ht_hand_math.hpp deleted file mode 100644 index b3e414bb8..000000000 --- a/src/xrt/drivers/ht/ht_hand_math.hpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2021, Collabora, Ltd. -// SPDX-License-Identifier: BSL-1.0 -/*! - * @file - * @brief Helper math to do things with 3D hands for the camera-based hand tracker - * @author Moses Turner - * @author Nick Klingensmith - * @ingroup drv_ht - */ - -#pragma once - -struct Hand2D; -struct Hand3D; -struct HandHistory3D; -struct ht_device; -struct xrt_hand_joint_set; - -float -sumOfHandJointDistances(const Hand3D &one, const Hand3D &two); - -float -errHandHistory(const HandHistory3D &history_hand, const Hand3D &present_hand); -float -errHandDisparity(const Hand2D &left_rays, const Hand2D &right_rays); - -void -applyJointWidths(struct xrt_hand_joint_set *set); -void -applyThumbIndexDrag(Hand3D *hand); -void -applyJointOrientations(struct xrt_hand_joint_set *set, bool is_right); - -float -handednessJointSet(Hand3D *set); -void -handednessHandHistory3D(HandHistory3D *history); - -void -handEuroFiltersInit(HandHistory3D *history, double fc_min, double fc_min_d, double beta); -void -handEuroFiltersRun(struct ht_device *htd, HandHistory3D *f, Hand3D *out_hand); - -bool -rejectTooFar(struct ht_device *htd, Hand3D *hand); -bool -rejectTooClose(struct ht_device *htd, Hand3D *hand); -bool -rejectTinyPalm(struct ht_device *htd, Hand3D *hand); diff --git a/src/xrt/drivers/ht/ht_image_math.hpp b/src/xrt/drivers/ht/ht_image_math.hpp deleted file mode 100644 index 15fe9bb74..000000000 --- a/src/xrt/drivers/ht/ht_image_math.hpp +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2021, Collabora, Ltd. -// SPDX-License-Identifier: BSL-1.0 -/*! - * @file - * @brief Helper math to do things with images for the camera-based hand tracker - * @author Moses Turner - * @ingroup drv_ht - */ - -#pragma once - -#include "math/m_vec3.h" - -#include "ht_driver.hpp" - -#include -#include - -struct ht_view; - -cv::Scalar -hsv2rgb(float fH, float fS, float fV); - -struct xrt_vec3 -raycoord(struct ht_view *htv, struct xrt_vec3 model_out); - -/*! - * Returns a 2x3 transform matrix that takes you back from the blackbarred image to the original image. - */ -cv::Matx23f -blackbar(const cv::Mat &in, cv::Mat &out, xrt_size out_size); - -/*! - * This is a template so that we can use xrt_vec3 or xrt_vec2. - * Please don't use this for anything other than xrt_vec3 or xrt_vec2! - */ -template -T -transformVecBy2x3(T in, cv::Matx23f warp_back) -{ - T rr; - rr.x = (in.x * warp_back(0, 0)) + (in.y * warp_back(0, 1)) + warp_back(0, 2); - rr.y = (in.x * warp_back(1, 0)) + (in.y * warp_back(1, 1)) + warp_back(1, 2); - return rr; -} - -//! Draw some dots. Factors out some boilerplate. -void -handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type); - -void -centerAndRotationFromJoints(struct ht_view *htv, - const xrt_vec2 *wrist, - const xrt_vec2 *index, - const xrt_vec2 *middle, - const xrt_vec2 *little, - xrt_vec2 *out_center, - xrt_vec2 *out_wrist_to_middle); - -struct DetectionModelOutput -rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 center, xrt_vec2 wrist_to_middle, DetectionModelOutput *out); - -void -planarize(const cv::Mat &input, uint8_t *output); diff --git a/src/xrt/drivers/ht/ht_interface.h b/src/xrt/drivers/ht/ht_interface.h index 7a84c8d54..5f3a92eea 100644 --- a/src/xrt/drivers/ht/ht_interface.h +++ b/src/xrt/drivers/ht/ht_interface.h @@ -13,27 +13,13 @@ #include "xrt/xrt_device.h" #include "tracking/t_tracking.h" +#include "xrt/xrt_prober.h" #ifdef __cplusplus extern "C" { #endif - -enum ht_run_type -{ - HT_RUN_TYPE_VALVE_INDEX, - HT_RUN_TYPE_NORTH_STAR, -}; -// YES this is stupid. PLEASE bikeshed me on this when the time comes, this is terrible. - -// With Valve Index, we use the frameserver prober and look for the Valve Index camera, and we give the joint poses out -// in the space of the left (unrectified) camera. - -// With North Star, (really just Moses's headset :)) we hard-code to opening up a depthai_fs_stereo_rgb and give the -// joint poses out in the space of the "center" of the stereo camera. (Why? Because I don't have exact extrinsics from -// the NS "eyes" to the cameras. Less code this way.) - -/*! +/* * @defgroup drv_ht Camera based hand tracking * @ingroup drv * diff --git a/src/xrt/drivers/ht/ht_model.hpp b/src/xrt/drivers/ht/ht_model.hpp deleted file mode 100644 index b22666a3b..000000000 --- a/src/xrt/drivers/ht/ht_model.hpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2021, Collabora, Ltd. -// SPDX-License-Identifier: BSL-1.0 -/*! - * @file - * @brief Code to run machine learning models for camera-based hand tracker. - * @author Moses Turner - * @author Marcus Edel - * @author Simon Zeni - * @ingroup drv_ht - */ - -#pragma once - -#include "ht_driver.hpp" - -#include - -#include -#include - -// forward-declare -struct OrtApi; -struct OrtEnv; -struct OrtMemoryInfo; -struct OrtSession; -struct OrtSessionOptions; -struct OrtValue; -struct ht_device; - -class ht_model -{ - struct ht_device *device = nullptr; - - const OrtApi *api = nullptr; - OrtEnv *env = nullptr; - - OrtMemoryInfo *palm_detection_meminfo = nullptr; - OrtSession *palm_detection_session = nullptr; - OrtValue *palm_detection_tensor = nullptr; - std::array palm_detection_data; - - std::mutex hand_landmark_lock; - OrtMemoryInfo *hand_landmark_meminfo = nullptr; - OrtSession *hand_landmark_session = nullptr; - OrtValue *hand_landmark_tensor = nullptr; - std::array hand_landmark_data; - - void - init_palm_detection(OrtSessionOptions *opts); - void - init_hand_landmark(OrtSessionOptions *opts); - -public: - ht_model(struct ht_device *htd); - ~ht_model(); - - std::vector - palm_detection(ht_view *htv, const cv::Mat &input); - Hand2D - hand_landmark(const cv::Mat input); -}; diff --git a/src/xrt/drivers/ht/ht_nms.hpp b/src/xrt/drivers/ht/ht_nms.hpp deleted file mode 100644 index d41933385..000000000 --- a/src/xrt/drivers/ht/ht_nms.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2021, Collabora, Ltd. -// SPDX-License-Identifier: BSL-1.0 -/*! - * @file - * @brief Code to deal with bounding boxes for camera-based hand-tracking. - * @author Moses Turner - * @author Marcus Edel - * @ingroup drv_ht - */ - -#pragma once - -#include "xrt/xrt_defines.h" - -#include - -struct Box -{ - float cx; - float cy; - float w; - float h; -}; - -struct NMSPalm -{ - Box bbox; - struct xrt_vec2 keypoints[7]; - float confidence; -}; - -std::vector -filterBoxesWeightedAvg(const std::vector &detections, float min_iou = 0.1f); diff --git a/src/xrt/drivers/meson.build b/src/xrt/drivers/meson.build index eb12efc91..740531c9b 100644 --- a/src/xrt/drivers/meson.build +++ b/src/xrt/drivers/meson.build @@ -84,22 +84,19 @@ lib_drv_ulv2 = static_library( build_by_default: 'ulv2' in drivers, ) -lib_drv_ht = static_library( - 'drv_ht', - files( - 'ht/ht_algorithm.cpp', - 'ht/ht_driver.cpp', - 'ht/ht_driver.hpp', - 'ht/ht_interface.h', - 'ht/ht_model.cpp', - 'ht/ht_hand_math.cpp', - 'ht/ht_image_math.cpp', - 'ht/ht_nms.cpp', - ), - include_directories: [xrt_include, cjson_include], - dependencies: [aux, opencv, onnxruntime, eigen3], - build_by_default: 'handtracking' in drivers, -) +if 'handtracking' in drivers + lib_drv_ht = static_library( + 'drv_ht', + files( + 'ht/ht_driver.c', + 'ht/ht_interface.h', + ), + include_directories: [xrt_include, cjson_include], + dependencies: [aux], + link_with: [lib_t_hand_async, lib_t_ht_old_rgb], + # build_by_default: 'handtracking' in drivers, + ) +endif lib_drv_cemu = static_library( 'drv_cemu', diff --git a/src/xrt/tracking/hand/CMakeLists.txt b/src/xrt/tracking/hand/CMakeLists.txt index d5ab92d6a..17b12fa88 100644 --- a/src/xrt/tracking/hand/CMakeLists.txt +++ b/src/xrt/tracking/hand/CMakeLists.txt @@ -1,6 +1,8 @@ # Copyright 2022, Collabora, Ltd. # SPDX-License-Identifier: BSL-1.0 +add_subdirectory(old_rgb) + ### # Async wrapper around sync helper. diff --git a/src/xrt/tracking/hand/meson.build b/src/xrt/tracking/hand/meson.build index f06119542..f09461ec3 100644 --- a/src/xrt/tracking/hand/meson.build +++ b/src/xrt/tracking/hand/meson.build @@ -1,6 +1,8 @@ # Copyright 2022, Collabora, Ltd. # SPDX-License-Identifier: BSL-1.0 +subdir('old_rgb') + ### # Async wrapper around sync helper. diff --git a/src/xrt/tracking/hand/old_rgb/CMakeLists.txt b/src/xrt/tracking/hand/old_rgb/CMakeLists.txt new file mode 100644 index 000000000..525c3d6cb --- /dev/null +++ b/src/xrt/tracking/hand/old_rgb/CMakeLists.txt @@ -0,0 +1,31 @@ +# Copyright 2019-2022, Collabora, Ltd. +# SPDX-License-Identifier: BSL-1.0 + + +# Old RGB hand tracking library. +add_library( + t_ht_old_rgb STATIC + rgb_hand_math.hpp + rgb_image_math.hpp + rgb_interface.h + rgb_model.hpp + rgb_nms.hpp + rgb_sync.cpp + rgb_sync.hpp + ) +target_link_libraries( + t_ht_old_rgb + PUBLIC aux-includes xrt-external-cjson + PRIVATE + aux_math + aux_tracking + aux_os + aux_util + aux_gstreamer + ONNXRuntime::ONNXRuntime + ${OpenCV_LIBRARIES} + ) +if(XRT_HAVE_OPENCV) + target_include_directories(t_ht_old_rgb SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR}) + target_link_libraries(t_ht_old_rgb PUBLIC ${OpenCV_LIBRARIES}) +endif() diff --git a/src/xrt/tracking/hand/old_rgb/meson.build b/src/xrt/tracking/hand/old_rgb/meson.build index 7a67a0d8a..11e534e2a 100644 --- a/src/xrt/tracking/hand/old_rgb/meson.build +++ b/src/xrt/tracking/hand/old_rgb/meson.build @@ -5,15 +5,15 @@ # Old RGB hand tracking library. lib_t_ht_old_rgb = static_library( 't_ht_old_rgb', - files( - 'rgb_hand_math.hpp', - 'rgb_image_math.hpp', - 'rgb_interface.h', - 'rgb_model.hpp', - 'rgb_nms.hpp', - 'rgb_sync.cpp', - 'rgb_sync.hpp', - ), - include_directories: [xrt_include, cjson_include], - dependencies: [aux, onnxruntime, opencv, eigen3] + files( + 'rgb_hand_math.hpp', + 'rgb_image_math.hpp', + 'rgb_interface.h', + 'rgb_model.hpp', + 'rgb_nms.hpp', + 'rgb_sync.cpp', + 'rgb_sync.hpp', + ), + include_directories: [xrt_include, cjson_include], + dependencies: [aux, onnxruntime, opencv, eigen3] ) diff --git a/src/xrt/drivers/ht/readme.md b/src/xrt/tracking/hand/old_rgb/readme.md similarity index 92% rename from src/xrt/drivers/ht/readme.md rename to src/xrt/tracking/hand/old_rgb/readme.md index b77cb8cab..1d9e6434b 100644 --- a/src/xrt/drivers/ht/readme.md +++ b/src/xrt/tracking/hand/old_rgb/readme.md @@ -8,7 +8,7 @@ SPDX-License-Identifier: BSL-1.0 # What is this? This is a driver to do optical hand tracking. The actual code mostly written by Moses Turner, with tons of help from Marcus Edel, Jakob Bornecrantz, Ryan Pavlik, and Christoph Haag. Jakob Bornecrantz and Marcus Edel are the main people who gathered training data for the initial Collabora models. -Currently, it works with the Valve Index. In the past, it was tested with a Luxonis 1090ffc, and in the future it should work fine with devices like the T265, Leap Motion Controller (w/ LeapUVC), or PS4/PS5 cam, should there be enough interest for any of those. +In `main` it only works with Valve Index, although we've used a lot of Luxonis cameras in development. In the future it should work fine with devices like the T265, or PS4/PS5 cam, should there be enough interest for any of those. Under good lighting, I would say it's around as good as Oculus Quest 2's hand tracking. Not that I'm trying to make any claims; that's just what I honestly would tell somebody if they are wondering if it's worth testing out. diff --git a/src/xrt/drivers/ht/ht_hand_math.cpp b/src/xrt/tracking/hand/old_rgb/rgb_hand_math.hpp similarity index 98% rename from src/xrt/drivers/ht/ht_hand_math.cpp rename to src/xrt/tracking/hand/old_rgb/rgb_hand_math.hpp index e7c200b3e..18486e77c 100644 --- a/src/xrt/drivers/ht/ht_hand_math.cpp +++ b/src/xrt/tracking/hand/old_rgb/rgb_hand_math.hpp @@ -1,3 +1,5 @@ +#pragma once + // Copyright 2021, Collabora, Ltd. // SPDX-License-Identifier: BSL-1.0 /*! @@ -11,8 +13,7 @@ #include "math/m_api.h" #include "math/m_vec3.h" -#include "ht_driver.hpp" -#include "ht_hand_math.hpp" +#include "rgb_sync.hpp" #include "util/u_time.h" #include "xrt/xrt_defines.h" @@ -299,7 +300,7 @@ exp_smooth(double alpha, double y, double prev_y) } void -handEuroFiltersRun(struct ht_device *htd, HandHistory3D *f, Hand3D *out_hand) +handEuroFiltersRun(struct HandTracking *htd, HandHistory3D *f, Hand3D *out_hand) { // Assume present hand is in element 0! #if 0 @@ -375,7 +376,7 @@ handEuroFiltersRun(struct ht_device *htd, HandHistory3D *f, Hand3D *out_hand) } bool -rejectTooFar(struct ht_device *htd, Hand3D *hand) +rejectTooFar(struct HandTracking *htd, Hand3D *hand) { static const float max_dist = 1.0f; // this sucks too - make it bigger if you can. const float max_dist_from_camera_sqrd = max_dist * max_dist; @@ -394,7 +395,7 @@ reject: } bool -rejectTooClose(struct ht_device *htd, Hand3D *hand) +rejectTooClose(struct HandTracking *htd, Hand3D *hand) { const float min_dist = 0.12f; // Be a bit aggressive here - it's nice to not let people see our tracking fail // when the hands are way too close @@ -418,7 +419,7 @@ reject: } bool -rejectTinyPalm(struct ht_device *htd, Hand3D *hand) +rejectTinyPalm(struct HandTracking *htd, Hand3D *hand) { // This one sucks, because some people really have tiny hands. If at some point you can stop using it, stop // using it. diff --git a/src/xrt/drivers/ht/ht_image_math.cpp b/src/xrt/tracking/hand/old_rgb/rgb_image_math.hpp similarity index 94% rename from src/xrt/drivers/ht/ht_image_math.cpp rename to src/xrt/tracking/hand/old_rgb/rgb_image_math.hpp index 53131e4d7..8ef8bfa2b 100644 --- a/src/xrt/drivers/ht/ht_image_math.cpp +++ b/src/xrt/tracking/hand/old_rgb/rgb_image_math.hpp @@ -6,16 +6,29 @@ * @author Moses Turner * @ingroup drv_ht */ +#pragma once #include "math/m_vec2.h" #include "math/m_vec3.h" -#include "ht_image_math.hpp" - #include #include #include +/*! + * This is a template so that we can use xrt_vec3 or xrt_vec2. + * Please don't use this for anything other than xrt_vec3 or xrt_vec2! + */ +template +T +transformVecBy2x3(T in, cv::Matx23f warp_back) +{ + T rr; + rr.x = (in.x * warp_back(0, 0)) + (in.y * warp_back(0, 1)) + warp_back(0, 2); + rr.y = (in.x * warp_back(1, 0)) + (in.y * warp_back(1, 1)) + warp_back(1, 2); + return rr; +} + cv::Scalar hsv2rgb(float fH, float fS, float fV) { diff --git a/src/xrt/tracking/hand/old_rgb/rgb_interface.h b/src/xrt/tracking/hand/old_rgb/rgb_interface.h new file mode 100644 index 000000000..cc6c17e08 --- /dev/null +++ b/src/xrt/tracking/hand/old_rgb/rgb_interface.h @@ -0,0 +1,29 @@ +// Copyright 2022, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Public interface of old rgb hand tracking. + * @author Jakob Bornecrantz + * @ingroup aux_tracking + */ + +#include "tracking/t_tracking.h" +#include "tracking/t_hand_tracking.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/*! + * Create a old style RGB hand tracking pipeline. + * + * @ingroup aux_tracking + */ +struct t_hand_tracking_sync * +t_hand_tracking_sync_old_rgb_create(struct t_stereo_camera_calibration * calib); + + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/src/xrt/drivers/ht/ht_model.cpp b/src/xrt/tracking/hand/old_rgb/rgb_model.hpp similarity index 96% rename from src/xrt/drivers/ht/ht_model.cpp rename to src/xrt/tracking/hand/old_rgb/rgb_model.hpp index 5acecca1e..630eeeb0e 100644 --- a/src/xrt/drivers/ht/ht_model.cpp +++ b/src/xrt/tracking/hand/old_rgb/rgb_model.hpp @@ -11,18 +11,66 @@ // Many C api things were stolen from here (MIT license): // https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/fns_candy_style_transfer/fns_candy_style_transfer.c +#pragma once -#include "ht_driver.hpp" -#include "ht_image_math.hpp" -#include "ht_model.hpp" -#include "ht_nms.hpp" +#include "rgb_sync.hpp" +#include "rgb_image_math.hpp" +#include "rgb_nms.hpp" #include +#include #include #undef HEAVY_SCRIBBLE +// forward-declare +struct OrtApi; +struct OrtEnv; +struct OrtMemoryInfo; +struct OrtSession; +struct OrtSessionOptions; +struct OrtValue; + +namespace xrt::tracking::ht::old_rgb { + + +// struct ht_device; + +class ht_model +{ + HandTracking *device = nullptr; + + const OrtApi *api = nullptr; + OrtEnv *env = nullptr; + + OrtMemoryInfo *palm_detection_meminfo = nullptr; + OrtSession *palm_detection_session = nullptr; + OrtValue *palm_detection_tensor = nullptr; + std::array palm_detection_data; + + std::mutex hand_landmark_lock; + OrtMemoryInfo *hand_landmark_meminfo = nullptr; + OrtSession *hand_landmark_session = nullptr; + OrtValue *hand_landmark_tensor = nullptr; + std::array hand_landmark_data; + + void + init_palm_detection(OrtSessionOptions *opts); + void + init_hand_landmark(OrtSessionOptions *opts); + +public: + ht_model(struct HandTracking *htd); + ~ht_model(); + + std::vector + palm_detection(ht_view *htv, const cv::Mat &input); + Hand2D + hand_landmark(const cv::Mat input); +}; + + /* * Anchors data taken from mediapipe's palm detection, used for single-shot detector model. * @@ -337,7 +385,7 @@ ht_model::init_hand_landmark(OrtSessionOptions *opts) assert(is_tensor); } -ht_model::ht_model(struct ht_device *htd) : device(htd), api(OrtGetApiBase()->GetApi(ORT_API_VERSION)) +ht_model::ht_model(struct HandTracking *htd) : device(htd), api(OrtGetApiBase()->GetApi(ORT_API_VERSION)) { ORT(CreateEnv(ORT_LOGGING_LEVEL_WARNING, "monado_ht", &this->env)); @@ -594,3 +642,5 @@ ht_model::hand_landmark(const cv::Mat input) return hand; } + +} // namespace xrt::tracking::ht::old_rgb diff --git a/src/xrt/drivers/ht/ht_nms.cpp b/src/xrt/tracking/hand/old_rgb/rgb_nms.hpp similarity index 95% rename from src/xrt/drivers/ht/ht_nms.cpp rename to src/xrt/tracking/hand/old_rgb/rgb_nms.hpp index 5c11e4202..03e96bef0 100644 --- a/src/xrt/drivers/ht/ht_nms.cpp +++ b/src/xrt/tracking/hand/old_rgb/rgb_nms.hpp @@ -8,10 +8,25 @@ * @ingroup drv_ht */ -#include "ht_nms.hpp" - +#include "rgb_sync.hpp" #include +struct Box +{ + float cx; + float cy; + float w; + float h; +}; + +struct NMSPalm +{ + Box bbox; + struct xrt_vec2 keypoints[7]; + float confidence; +}; + + static float overlap(float x1, float w1, float x2, float w2) { diff --git a/src/xrt/drivers/ht/ht_algorithm.cpp b/src/xrt/tracking/hand/old_rgb/rgb_sync.cpp similarity index 58% rename from src/xrt/drivers/ht/ht_algorithm.cpp rename to src/xrt/tracking/hand/old_rgb/rgb_sync.cpp index c40ffed40..6e926e7a5 100644 --- a/src/xrt/drivers/ht/ht_algorithm.cpp +++ b/src/xrt/tracking/hand/old_rgb/rgb_sync.cpp @@ -1,27 +1,98 @@ -// Copyright 2021, Collabora, Ltd. +// Copyright 2022, Collabora, Ltd. // SPDX-License-Identifier: BSL-1.0 /*! * @file - * @brief Camera based hand tracking mainloop algorithm. - * @author Moses Turner - * @ingroup drv_ht + * @brief Old RGB hand tracking main file. + * @author Jakob Bornecrantz + * @ingroup aux_tracking */ +#include "rgb_interface.h" +#include "rgb_sync.hpp" +#include "xrt/xrt_frame.h" + + +using namespace xrt::tracking::ht::old_rgb; + + + #include "xrt/xrt_defines.h" #include "math/m_vec2.h" #include "util/u_frame.h" #include "util/u_trace_marker.h" -#include "ht_algorithm.hpp" -#include "ht_driver.hpp" -#include "ht_hand_math.hpp" -#include "ht_image_math.hpp" -#include "ht_model.hpp" + #include "templates/NaivePermutationSort.hpp" #include + +// Copyright 2021, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Camera based hand tracking driver code. + * @author Moses Turner + * @author Jakob Bornecrantz + * @ingroup drv_ht + */ + +#include "gstreamer/gst_pipeline.h" +#include "gstreamer/gst_sink.h" + +#include "xrt/xrt_defines.h" +#include "xrt/xrt_frame.h" +#include "xrt/xrt_frameserver.h" + +#include "os/os_time.h" +#include "os/os_threading.h" + +#include "math/m_api.h" +#include "math/m_eigen_interop.hpp" + +#include "util/u_device.h" +#include "util/u_frame.h" +#include "util/u_sink.h" +#include "util/u_format.h" +#include "util/u_logging.h" +#include "util/u_time.h" +#include "util/u_trace_marker.h" +#include "util/u_time.h" +#include "util/u_json.h" +#include "util/u_config_json.h" + +#include "tracking/t_frame_cv_mat_wrapper.hpp" +#include "tracking/t_calibration_opencv.hpp" + +#include "rgb_hand_math.hpp" +#include "rgb_image_math.hpp" +#include "rgb_model.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + // Flags to tell state tracker that these are indeed valid joints static const enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_relation_flags)( XRT_SPACE_RELATION_ORIENTATION_VALID_BIT | XRT_SPACE_RELATION_ORIENTATION_TRACKED_BIT | @@ -29,7 +100,7 @@ static const enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_rela static void -htProcessJoint(struct ht_device *htd, +htProcessJoint(struct HandTracking *htd, struct xrt_vec3 model_out, struct xrt_hand_joint_set *hand, enum xrt_hand_joint idx) @@ -62,7 +133,7 @@ errHistory2D(const HandHistory2DBBox &past, const Palm7KP &present) static std::vector htImageToKeypoints(struct ht_view *htv) { - struct ht_device *htd = htv->htd; + struct HandTracking *htd = htv->htd; ht_model *htm = htv->htm; cv::Mat raw_input = htv->run_model_on_this; @@ -247,7 +318,7 @@ jsonAddJoint(cJSON *into_this, xrt_pose loc, const char *name) } void -jsonMaybeAddSomeHands(struct ht_device *htd, bool err) +jsonMaybeAddSomeHands(struct HandTracking *htd, bool err) { if (!htd->tracking_should_record_dataset) { return; @@ -311,37 +382,7 @@ jsonMaybeAddSomeHands(struct ht_device *htd, bool err) static void -htExitFrame(struct ht_device *htd, - bool err, - struct xrt_hand_joint_set final_hands_ordered_by_handedness[2], - uint64_t timestamp) -{ - - os_mutex_lock(&htd->openxr_hand_data_mediator); - if (err) { - htd->hands_for_openxr[0].is_active = false; - htd->hands_for_openxr[1].is_active = false; - } else { - memcpy(&htd->hands_for_openxr[0], &final_hands_ordered_by_handedness[0], - sizeof(struct xrt_hand_joint_set)); - memcpy(&htd->hands_for_openxr[1], &final_hands_ordered_by_handedness[1], - sizeof(struct xrt_hand_joint_set)); - htd->hands_for_openxr_timestamp = timestamp; - HT_DEBUG(htd, "Adding ts %zu", htd->hands_for_openxr_timestamp); - } - os_mutex_unlock(&htd->openxr_hand_data_mediator); -#ifdef EXPERIMENTAL_DATASET_RECORDING - if (htd->tracking_should_record_dataset) { - // Add nothing-entry to json file. - jsonMaybeAddSomeHands(htd, err); - htd->gst.current_index++; - } -#endif -} - - -static void -htJointDisparityMath(struct ht_device *htd, Hand2D *hand_in_left, Hand2D *hand_in_right, Hand3D *out_hand) +htJointDisparityMath(struct HandTracking *htd, Hand2D *hand_in_left, Hand2D *hand_in_right, Hand3D *out_hand) { for (int i = 0; i < 21; i++) { // Believe it or not, this is where the 3D stuff happens! @@ -361,29 +402,416 @@ htJointDisparityMath(struct ht_device *htd, Hand2D *hand_in_left, Hand2D *hand_i } int64_t last_frame, this_frame; +DEBUG_GET_ONCE_LOG_OPTION(ht_log, "HT_LOG", U_LOGGING_WARN) + +/*! + * Setup helper functions. + */ + +static bool +getCalibration(struct HandTracking *htd, t_stereo_camera_calibration *calibration) +{ + xrt::auxiliary::tracking::StereoCameraCalibrationWrapper wrap(calibration); + xrt_vec3 trans = {(float)wrap.camera_translation_mat(0, 0), (float)wrap.camera_translation_mat(1, 0), + (float)wrap.camera_translation_mat(2, 0)}; + htd->baseline = m_vec3_len(trans); + +#if 0 + std::cout << "\n\nTRANSLATION VECTOR IS\n" << wrap.camera_translation_mat; + std::cout << "\n\nROTATION FROM LEFT TO RIGHT IS\n" << wrap.camera_rotation_mat << "\n"; +#endif + + cv::Matx34d P1; + cv::Matx34d P2; + + cv::Matx44d Q; + + // The only reason we're calling stereoRectify is because we want R1 and R2 for the + cv::stereoRectify(wrap.view[0].intrinsics_mat, // cameraMatrix1 + wrap.view[0].distortion_mat, // distCoeffs1 + wrap.view[1].intrinsics_mat, // cameraMatrix2 + wrap.view[1].distortion_mat, // distCoeffs2 + wrap.view[0].image_size_pixels_cv, // imageSize* + wrap.camera_rotation_mat, // R + wrap.camera_translation_mat, // T + htd->views[0].rotate_camera_to_stereo_camera, // R1 + htd->views[1].rotate_camera_to_stereo_camera, // R2 + P1, // P1 + P2, // P2 + Q, // Q + 0, // flags + -1.0f, // alpha + cv::Size(), // newImageSize + NULL, // validPixROI1 + NULL); // validPixROI2 + + //* Good enough guess that view 0 and view 1 are the same size. + + for (int i = 0; i < 2; i++) { + htd->views[i].cameraMatrix = wrap.view[i].intrinsics_mat; + + htd->views[i].distortion = wrap.view[i].distortion_fisheye_mat; + } + + htd->one_view_size_px.w = wrap.view[0].image_size_pixels.w; + htd->one_view_size_px.h = wrap.view[0].image_size_pixels.h; + + U_LOG_E("%d %d %p %p", htd->one_view_size_px.w, htd->one_view_size_px.h, + (void *)&htd->one_view_size_px.w, (void *)&htd->one_view_size_px.h); + + + + cv::Matx33d rotate_stereo_camera_to_left_camera = htd->views[0].rotate_camera_to_stereo_camera.inv(); + + xrt_matrix_3x3 s; + s.v[0] = rotate_stereo_camera_to_left_camera(0, 0); + s.v[1] = rotate_stereo_camera_to_left_camera(0, 1); + s.v[2] = rotate_stereo_camera_to_left_camera(0, 2); + + s.v[3] = rotate_stereo_camera_to_left_camera(1, 0); + s.v[4] = rotate_stereo_camera_to_left_camera(1, 1); + s.v[5] = rotate_stereo_camera_to_left_camera(1, 2); + + s.v[6] = rotate_stereo_camera_to_left_camera(2, 0); + s.v[7] = rotate_stereo_camera_to_left_camera(2, 1); + s.v[8] = rotate_stereo_camera_to_left_camera(2, 2); + + xrt_quat tmp; + + math_quat_from_matrix_3x3(&s, &tmp); + + // Weird that I have to invert this quat, right? I think at some point - like probably just above this - I must + // have swapped row-major and col-major - remember, if you transpose a rotation matrix, you get its inverse. + // Doesn't matter that I don't understand - non-inverted looks definitely wrong, inverted looks definitely + // right. + math_quat_invert(&tmp, &htd->stereo_camera_to_left_camera); + +#if 0 + U_LOG_E("%f %f %f %f", htd->stereo_camera_to_left_camera.w, htd->stereo_camera_to_left_camera.x, + htd->stereo_camera_to_left_camera.y, htd->stereo_camera_to_left_camera.z); +#endif + + return true; +} + +#if 0 +static void +getStartupConfig(struct HandTracking *htd, const cJSON *startup_config) +{ + const cJSON *palm_detection_type = u_json_get(startup_config, "palm_detection_model"); + const cJSON *keypoint_estimation_type = u_json_get(startup_config, "keypoint_estimation_model"); + const cJSON *uvc_wire_format = u_json_get(startup_config, "uvc_wire_format"); + + // IsString does its own null-checking + if (cJSON_IsString(palm_detection_type)) { + bool is_collabora = (strcmp(cJSON_GetStringValue(palm_detection_type), "collabora") == 0); + bool is_mediapipe = (strcmp(cJSON_GetStringValue(palm_detection_type), "mediapipe") == 0); + if (!is_collabora && !is_mediapipe) { + HT_WARN(htd, "Unknown palm detection type %s - should be \"collabora\" or \"mediapipe\"", + cJSON_GetStringValue(palm_detection_type)); + } + htd->startup_config.palm_detection_use_mediapipe = is_mediapipe; + } + + if (cJSON_IsString(keypoint_estimation_type)) { + bool is_collabora = (strcmp(cJSON_GetStringValue(keypoint_estimation_type), "collabora") == 0); + bool is_mediapipe = (strcmp(cJSON_GetStringValue(keypoint_estimation_type), "mediapipe") == 0); + if (!is_collabora && !is_mediapipe) { + HT_WARN(htd, "Unknown keypoint estimation type %s - should be \"collabora\" or \"mediapipe\"", + cJSON_GetStringValue(keypoint_estimation_type)); + } + htd->startup_config.keypoint_estimation_use_mediapipe = is_mediapipe; + } + + if (cJSON_IsString(uvc_wire_format)) { + bool is_yuv = (strcmp(cJSON_GetStringValue(uvc_wire_format), "yuv") == 0); + bool is_mjpeg = (strcmp(cJSON_GetStringValue(uvc_wire_format), "mjpeg") == 0); + if (!is_yuv && !is_mjpeg) { + HT_WARN(htd, "Unknown wire format type %s - should be \"yuv\" or \"mjpeg\"", + cJSON_GetStringValue(uvc_wire_format)); + } + if (is_yuv) { + HT_DEBUG(htd, "Using YUYV422!"); + htd->startup_config.desired_format = XRT_FORMAT_YUYV422; + } else { + HT_DEBUG(htd, "Using MJPEG!"); + htd->startup_config.desired_format = XRT_FORMAT_MJPEG; + } + } +} + +static void +getUserConfig(struct HandTracking *htd) +{ + // The game here is to avoid bugs + be paranoid, not to be fast. If you see something that seems "slow" - don't + // fix it. Any of the tracking code is way stickier than this could ever be. + + struct u_config_json config_json = {}; + + u_config_json_open_or_create_main_file(&config_json); + if (!config_json.file_loaded) { + return; + } + + cJSON *ht_config_json = cJSON_GetObjectItemCaseSensitive(config_json.root, "config_ht"); + if (ht_config_json == NULL) { + return; + } + + // Don't get it twisted: initializing these to NULL is not cargo-culting. + // Uninitialized values on the stack aren't guaranteed to be 0, so these could end up pointing to what we + // *think* is a valid address but what is *not* one. + char *startup_config_string = NULL; + char *dynamic_config_string = NULL; + + { + const cJSON *startup_config_string_json = u_json_get(ht_config_json, "startup_config_index"); + if (cJSON_IsString(startup_config_string_json)) { + startup_config_string = cJSON_GetStringValue(startup_config_string_json); + } + + const cJSON *dynamic_config_string_json = u_json_get(ht_config_json, "dynamic_config_index"); + if (cJSON_IsString(dynamic_config_string_json)) { + dynamic_config_string = cJSON_GetStringValue(dynamic_config_string_json); + } + } + + if (startup_config_string != NULL) { + const cJSON *startup_config_obj = + u_json_get(u_json_get(ht_config_json, "startup_configs"), startup_config_string); + getStartupConfig(htd, startup_config_obj); + } + + if (dynamic_config_string != NULL) { + const cJSON *dynamic_config_obj = + u_json_get(u_json_get(ht_config_json, "dynamic_configs"), dynamic_config_string); + { + ht_dynamic_config *hdc = &htd->dynamic_config; + // Do the thing + u_json_get_string_into_array(u_json_get(dynamic_config_obj, "name"), hdc->name, 64); + + u_json_get_float(u_json_get(dynamic_config_obj, "hand_fc_min"), &hdc->hand_fc_min.val); + u_json_get_float(u_json_get(dynamic_config_obj, "hand_fc_min_d"), &hdc->hand_fc_min_d.val); + u_json_get_float(u_json_get(dynamic_config_obj, "hand_beta"), &hdc->hand_beta.val); + + u_json_get_float(u_json_get(dynamic_config_obj, "nms_iou"), &hdc->nms_iou.val); + u_json_get_float(u_json_get(dynamic_config_obj, "nms_threshold"), &hdc->nms_threshold.val); + + u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_nms_detections"), + &hdc->scribble_nms_detections); + u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_raw_detections"), + &hdc->scribble_raw_detections); + u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_2d_keypoints"), + &hdc->scribble_2d_keypoints); + u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_bounding_box"), + &hdc->scribble_bounding_box); + + char *dco_str = cJSON_Print(dynamic_config_obj); + U_LOG_D("Config %s %s", dynamic_config_string, dco_str); + free(dco_str); + } + } + + + + cJSON_Delete(config_json.root); + return; +} +#endif + +static void +userConfigSetDefaults(struct HandTracking *htd) +{ + // Admit defeat: for now, Mediapipe's are still better than ours. + htd->startup_config.palm_detection_use_mediapipe = true; + htd->startup_config.keypoint_estimation_use_mediapipe = true; + + // Make sure you build DebugOptimized! + htd->startup_config.desired_format = XRT_FORMAT_YUYV422; + + + ht_dynamic_config *hdc = &htd->dynamic_config; + + hdc->scribble_nms_detections = true; + hdc->scribble_raw_detections = false; + hdc->scribble_2d_keypoints = true; + hdc->scribble_bounding_box = false; + + hdc->hand_fc_min.min = 0.0f; + hdc->hand_fc_min.max = 50.0f; + hdc->hand_fc_min.step = 0.05f; + hdc->hand_fc_min.val = FCMIN_HAND; + + hdc->hand_fc_min_d.min = 0.0f; + hdc->hand_fc_min_d.max = 50.0f; + hdc->hand_fc_min_d.step = 0.05f; + hdc->hand_fc_min_d.val = FCMIN_D_HAND; + + + hdc->hand_beta.min = 0.0f; + hdc->hand_beta.max = 50.0f; + hdc->hand_beta.step = 0.05f; + hdc->hand_beta.val = BETA_HAND; + + hdc->max_vel.min = 0.0f; + hdc->max_vel.max = 50.0f; + hdc->max_vel.step = 0.05f; + hdc->max_vel.val = 30.0f; // 30 m/s; about 108 kph. If your hand is going this fast, our tracking failing is the + // least of your problems. + + hdc->max_acc.min = 0.0f; + hdc->max_acc.max = 100.0f; + hdc->max_acc.step = 0.1f; + hdc->max_acc.val = 100.0f; // 100 m/s^2; about 10 Gs. Ditto. + + hdc->nms_iou.min = 0.0f; + hdc->nms_iou.max = 1.0f; + hdc->nms_iou.step = 0.01f; + + + hdc->nms_threshold.min = 0.0f; + hdc->nms_threshold.max = 1.0f; + hdc->nms_threshold.step = 0.01f; + + hdc->new_detection_threshold.min = 0.0f; + hdc->new_detection_threshold.max = 1.0f; + hdc->new_detection_threshold.step = 0.01f; + + + hdc->nms_iou.val = 0.05f; + hdc->nms_threshold.val = 0.3f; + hdc->new_detection_threshold.val = 0.6f; +} + + +static void +getModelsFolder(struct HandTracking *htd) +{ +// Please bikeshed me on this! I don't know where is the best place to put this stuff! +#if 0 + char exec_location[1024] = {}; + readlink("/proc/self/exe", exec_location, 1024); + + HT_DEBUG(htd, "Exec at %s\n", exec_location); + + int end = 0; + while (exec_location[end] != '\0') { + HT_DEBUG(htd, "%d", end); + end++; + } + + while (exec_location[end] != '/' && end != 0) { + HT_DEBUG(htd, "%d %c", end, exec_location[end]); + exec_location[end] = '\0'; + end--; + } + + strcat(exec_location, "../share/monado/hand-tracking-models/"); + strcpy(htd->startup_config.model_slug, exec_location); +#else + const char *xdg_home = getenv("XDG_CONFIG_HOME"); + const char *home = getenv("HOME"); + if (xdg_home != NULL) { + strcpy(htd->startup_config.model_slug, xdg_home); + } else if (home != NULL) { + strcpy(htd->startup_config.model_slug, home); + } else { + assert(false); + } + strcat(htd->startup_config.model_slug, "/.local/share/monado/hand-tracking-models/"); +#endif +} + + + +static void +htExitFrame(struct HandTracking *htd, + bool err, + struct xrt_hand_joint_set final_hands_ordered_by_handedness[2], + uint64_t timestamp, + struct xrt_hand_joint_set *out_left, + struct xrt_hand_joint_set *out_right, + uint64_t *out_timestamp_ns) +{ + + os_mutex_lock(&htd->openxr_hand_data_mediator); + if (err) { + out_left->is_active = false; + out_right->is_active = false; + } else { + *out_left = final_hands_ordered_by_handedness[0]; + *out_right = final_hands_ordered_by_handedness[1]; + + + *out_timestamp_ns = timestamp; + HT_DEBUG(htd, "Adding ts %zu", htd->hands_for_openxr_timestamp); + } + os_mutex_unlock(&htd->openxr_hand_data_mediator); +#ifdef EXPERIMENTAL_DATASET_RECORDING + if (htd->tracking_should_record_dataset) { + // Add nothing-entry to json file. + jsonMaybeAddSomeHands(htd, err); + htd->gst.current_index++; + } +#endif +} + +/* + * + * Member functions. + * + */ + +HandTracking::HandTracking() +{ + this->base.process = &HandTracking::cCallbackProcess; + this->base.destroy = &HandTracking::cCallbackDestroy; +} + +HandTracking::~HandTracking() +{ + // +} + +//!@todo vVERY BAD +static void +combine_frames_r8g8b8_hack(struct xrt_frame *l, struct xrt_frame *r, struct xrt_frame *f) +{ + // SINK_TRACE_MARKER(); + + uint32_t height = l->height; + + for (uint32_t y = 0; y < height; y++) { + uint8_t *dst = f->data + f->stride * y; + uint8_t *src = l->data + l->stride * y; + + for (uint32_t x = 0; x < l->width * 3; x++) { + *dst++ = *src++; + } + + dst = f->data + f->stride * y + l->width * 3; + src = r->data + r->stride * y; + for (uint32_t x = 0; x < r->width * 3; x++) { + *dst++ = *src++; + } + } +} + void -htRunAlgorithm(struct ht_device *htd) +HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, + struct xrt_frame *left_frame, + struct xrt_frame *right_frame, + struct xrt_hand_joint_set *out_left_hand, + struct xrt_hand_joint_set *out_right_hand, + uint64_t *out_timestamp_ns) { XRT_TRACE_MARKER(); -#ifdef EXPERIMENTAL_DATASET_RECORDING + HandTracking *htd = (struct HandTracking *)ht_sync; - if (htd->tracking_should_record_dataset) { - U_LOG_E("PUSHING!"); - uint64_t start = os_monotonic_get_ns(); - xrt_sink_push_frame(htd->gst.sink, htd->frame_for_process); - uint64_t end = os_monotonic_get_ns(); + // U_LOG_E("htd is at %p", htd); - if ((end - start) > 0.1 * U_TIME_1MS_IN_NS) { - U_LOG_E("Encoder overloaded!"); - } - - htd->gst.offset_ns = gstreamer_sink_get_timestamp_offset(htd->gst.gs); - htd->gst.last_frame_ns = htd->frame_for_process->timestamp - htd->gst.offset_ns; - } -#endif - - htd->current_frame_timestamp = htd->frame_for_process->timestamp; + htd->current_frame_timestamp = left_frame->timestamp; int64_t start, end; start = os_monotonic_get_ns(); @@ -393,32 +821,39 @@ htRunAlgorithm(struct ht_device *htd) * Setup views. */ - const int full_width = htd->frame_for_process->width; - const int full_height = htd->frame_for_process->height; - const int view_width = htd->camera.one_view_size_px.w; - const int view_height = htd->camera.one_view_size_px.h; + assert(left_frame->width == right_frame->width); + assert(left_frame->height == right_frame->height); + + const int full_height = left_frame->height; + const int full_width = left_frame->width*2; + + const int view_width = htd->one_view_size_px.w; + const int view_height = htd->one_view_size_px.h; - // assert(full_width == view_width * 2); assert(full_height == view_height); const cv::Size full_size = cv::Size(full_width, full_height); const cv::Size view_size = cv::Size(view_width, view_height); const cv::Point view_offsets[2] = {cv::Point(0, 0), cv::Point(view_width, 0)}; - cv::Mat full_frame(full_size, CV_8UC3, htd->frame_for_process->data, htd->frame_for_process->stride); - htd->views[0].run_model_on_this = full_frame(cv::Rect(view_offsets[0], view_size)); - htd->views[1].run_model_on_this = full_frame(cv::Rect(view_offsets[1], view_size)); + // cv::Mat full_frame(full_size, CV_8UC3, htd->frame_for_process->data, htd->frame_for_process->stride); + htd->views[0].run_model_on_this = cv::Mat(view_size, CV_8UC3, left_frame->data, left_frame->stride); + htd->views[1].run_model_on_this = cv::Mat(view_size, CV_8UC3, right_frame->data, right_frame->stride); - htd->mat_for_process = &full_frame; - // Check this every frame. We really, really, really don't want it to ever suddenly be null. - htd->debug_scribble = htd->debug_sink.sink != nullptr; + // Convenience + uint64_t timestamp = left_frame->timestamp; + + htd->debug_scribble = u_sink_debug_is_active(&htd->debug_sink); cv::Mat debug_output = {}; - xrt_frame *debug_frame = nullptr; // only use if htd->debug_scribble + xrt_frame *debug_frame = nullptr; + if (htd->debug_scribble) { - u_frame_clone(htd->frame_for_process, &debug_frame); + u_frame_create_one_off(XRT_FORMAT_R8G8B8, full_width, full_height, &debug_frame); + combine_frames_r8g8b8_hack(left_frame, right_frame, debug_frame); + debug_output = cv::Mat(full_size, CV_8UC3, debug_frame->data, debug_frame->stride); htd->views[0].debug_out_to_this = debug_output(cv::Rect(view_offsets[0], view_size)); htd->views[1].debug_out_to_this = debug_output(cv::Rect(view_offsets[1], view_size)); @@ -462,8 +897,6 @@ htRunAlgorithm(struct ht_device *htd) } - // Convenience - uint64_t timestamp = htd->frame_for_process->timestamp; if (htd->debug_scribble) { u_sink_debug_push_frame(&htd->debug_sink, debug_frame); @@ -474,12 +907,10 @@ htRunAlgorithm(struct ht_device *htd) // In the long run, this'll be a silly thing - we shouldn't always take the detection model's word for it // especially when part of the pipeline is an arbitrary confidence threshold. if (hands_in_left_view.size() == 0 || hands_in_right_view.size() == 0) { - htExitFrame(htd, true, NULL, 0); + htExitFrame(htd, true, NULL, timestamp, out_left_hand, out_right_hand, out_timestamp_ns); return; } - - std::vector possible_3d_hands; // for every possible combination of hands in left view and hands in right view, @@ -597,7 +1028,7 @@ htRunAlgorithm(struct ht_device *htd) if (htd->histories_3d.size() == 0) { HT_DEBUG(htd, "Bailing"); - htExitFrame(htd, true, NULL, 0); + htExitFrame(htd, true, NULL, timestamp, out_left_hand, out_right_hand, out_timestamp_ns); return; } @@ -756,6 +1187,77 @@ htRunAlgorithm(struct ht_device *htd) applyJointWidths(put_in_set); applyJointOrientations(put_in_set, xr_indices[i]); } - - htExitFrame(htd, false, final_hands_ordered_by_handedness, filtered_hands[0].timestamp); + htExitFrame(htd, false, final_hands_ordered_by_handedness, filtered_hands[0].timestamp, out_left_hand, out_right_hand, out_timestamp_ns); +} + +void +HandTracking::cCallbackDestroy(t_hand_tracking_sync *ht_sync) +{ + auto ht_ptr = &HandTracking::fromC(ht_sync); + + delete ht_ptr->views[0].htm; + delete ht_ptr->views[1].htm; + delete ht_ptr; +} + + +/* + * + * 'Exported' functions. + * + */ + +extern "C" t_hand_tracking_sync * +t_hand_tracking_sync_old_rgb_create(struct t_stereo_camera_calibration *calib) +{ + XRT_TRACE_MARKER(); + + auto htd = new HandTracking(); + + U_LOG_E("htd is at %p", (void*)htd); + + // Setup logging first. We like logging. + htd->log_level = debug_get_log_option_ht_log(); + + /* + * Get configuration + */ + + assert(calib != NULL); + getCalibration(htd, calib); + // Set defaults - most people won't have a config json and it won't get past here. + userConfigSetDefaults(htd); + getModelsFolder(htd); + + + htd->views[0].htd = htd; + htd->views[1].htd = htd; // :) + + htd->views[0].htm = new ht_model(htd); + htd->views[1].htm = new ht_model(htd); + + htd->views[0].view = 0; + htd->views[1].view = 1; + + u_var_add_root(htd, "Camera-based Hand Tracker", true); + + u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_fc_min, "hand_fc_min"); + u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_fc_min_d, "hand_fc_min_d"); + u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_beta, "hand_beta"); + u_var_add_draggable_f32(htd, &htd->dynamic_config.nms_iou, "nms_iou"); + u_var_add_draggable_f32(htd, &htd->dynamic_config.nms_threshold, "nms_threshold"); + u_var_add_draggable_f32(htd, &htd->dynamic_config.new_detection_threshold, "new_detection_threshold"); + + u_var_add_bool(htd, &htd->dynamic_config.scribble_raw_detections, "Scribble raw detections"); + u_var_add_bool(htd, &htd->dynamic_config.scribble_nms_detections, "Scribble NMS detections"); + u_var_add_bool(htd, &htd->dynamic_config.scribble_2d_keypoints, "Scribble 2D keypoints"); + u_var_add_bool(htd, &htd->dynamic_config.scribble_bounding_box, "Scribble bounding box"); + + u_var_add_sink_debug(htd, &htd->debug_sink, "i"); + + + HT_DEBUG(htd, "Hand Tracker initialized!"); + + + return &htd->base; } diff --git a/src/xrt/drivers/ht/ht_driver.hpp b/src/xrt/tracking/hand/old_rgb/rgb_sync.hpp similarity index 57% rename from src/xrt/drivers/ht/ht_driver.hpp rename to src/xrt/tracking/hand/old_rgb/rgb_sync.hpp index 4f0b37b63..a887973a4 100644 --- a/src/xrt/drivers/ht/ht_driver.hpp +++ b/src/xrt/tracking/hand/old_rgb/rgb_sync.hpp @@ -1,15 +1,17 @@ -// Copyright 2021, Collabora, Ltd. +// Copyright 2022, Collabora, Ltd. // SPDX-License-Identifier: BSL-1.0 /*! * @file - * @brief Defines and common includes for camera-based hand tracker + * @brief Old RGB hand tracking header. + * @author Jakob Bornecrantz * @author Moses Turner - * @ingroup drv_ht + * @ingroup tracking */ #pragma once -#include "ht_interface.h" +#include "tracking/t_hand_tracking.h" + #include "os/os_threading.h" #include "xrt/xrt_device.h" @@ -29,44 +31,25 @@ #include "util/u_template_historybuf.hpp" -#ifdef XRT_HAVE_GST -#include "gstreamer/gst_pipeline.h" -#include "gstreamer/gst_sink.h" -#endif - #include #include +namespace xrt::tracking::ht::old_rgb { using namespace xrt::auxiliary::util; -#define HT_TRACE(htd, ...) U_LOG_XDEV_IFL_T(&htd->base, htd->log_level, __VA_ARGS__) -#define HT_DEBUG(htd, ...) U_LOG_XDEV_IFL_D(&htd->base, htd->log_level, __VA_ARGS__) -#define HT_INFO(htd, ...) U_LOG_XDEV_IFL_I(&htd->base, htd->log_level, __VA_ARGS__) -#define HT_WARN(htd, ...) U_LOG_XDEV_IFL_W(&htd->base, htd->log_level, __VA_ARGS__) -#define HT_ERROR(htd, ...) U_LOG_XDEV_IFL_E(&htd->base, htd->log_level, __VA_ARGS__) +#define HT_TRACE(htd, ...) U_LOG_IFL_T(htd->log_level, __VA_ARGS__) +#define HT_DEBUG(htd, ...) U_LOG_IFL_D(htd->log_level, __VA_ARGS__) +#define HT_INFO(htd, ...) U_LOG_IFL_I(htd->log_level, __VA_ARGS__) +#define HT_WARN(htd, ...) U_LOG_IFL_W(htd->log_level, __VA_ARGS__) +#define HT_ERROR(htd, ...) U_LOG_IFL_E(htd->log_level, __VA_ARGS__) -// #define ht_ - - -// To make clang-tidy happy -#define opencv_distortion_param_num 4 - -/* - * - * Compile-time defines to choose where to get camera frames from and what kind of output to give out - * - */ #undef EXPERIMENTAL_DATASET_RECORDING #define FCMIN_BBOX_ORIENTATION 3.0f #define FCMIN_D_BB0X_ORIENTATION 10.0f #define BETA_BB0X_ORIENTATION 0.0f -// #define FCMIN_BBOX_POSITION 15.0f -// #define FCMIN_D_BB0X_POSITION 12.0f -// #define BETA_BB0X_POSITION 0.3f - #define FCMIN_BBOX_POSITION 30.0f #define FCMIN_D_BB0X_POSITION 25.0f #define BETA_BB0X_POSITION 0.01f @@ -79,10 +62,6 @@ using namespace xrt::auxiliary::util; class ht_model; -#ifdef __cplusplus -extern "C" { -#endif - enum HandJoint7Keypoint { WRIST_7KP = 0, @@ -127,7 +106,7 @@ enum HandJoint21Keypoint struct Palm7KP { struct xrt_vec2 kps[7]; - float confidence; // BETWEEN 0 and 1. okay???? okay????!??? + float confidence; // between 0 and 1 }; struct DetectionModelOutput @@ -184,12 +163,6 @@ struct HandHistory3D struct HandHistory2DBBox { - // Ugh, I should definitely iterate these somehow... - // m_filter_euro_vec2 m_filter_wrist; - // m_filter_euro_vec2 m_filter_index; - // m_filter_euro_vec2 m_filter_middle; - // m_filter_euro_vec2 m_filter_pinky; - m_filter_euro_vec2 m_filter_center; m_filter_euro_vec2 m_filter_direction; @@ -201,16 +174,15 @@ struct HandHistory2DBBox }; // Forward declaration for ht_view -struct ht_device; +struct HandTracking; struct ht_view { - ht_device *htd; + HandTracking *htd; ht_model *htm; - int view; // :))) + int view; - // Loaded from config file - cv::Matx distortion; + cv::Matx distortion; cv::Matx cameraMatrix; cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2 @@ -220,13 +192,6 @@ struct ht_view std::vector bbox_histories; }; -enum ht_detection_scribble -{ - HT_DETECTION_SCRIBBLE_ALL, - HT_DETECTION_SCRIBBLE_SOME, - HT_DETECTION_SCRIBBLE_NONE -}; - struct ht_dynamic_config { char name[64]; @@ -252,100 +217,89 @@ struct ht_startup_config char model_slug[1024]; }; -// This is all ad-hoc! Review very welcome! -struct ht_device +/*! + * Main class of old style RGB hand tracking. + * + * @ingroup aux_tracking + */ +struct HandTracking { - struct xrt_device base; - - struct xrt_tracking_origin tracking_origin; // probably cargo-culted - - struct xrt_frame_sink sink; - struct xrt_frame_node node; - - struct u_sink_debug debug_sink; // this must be bad. - - - struct - { - struct xrt_frame_context xfctx; - - struct xrt_fs *xfs; - - struct xrt_fs_mode mode; - - struct xrt_prober *prober; - - struct xrt_size one_view_size_px; - } camera; +public: + // Base thing, has to be first. + t_hand_tracking_sync base = {}; + struct u_sink_debug debug_sink = {}; + struct xrt_size one_view_size_px = {}; #if defined(EXPERIMENTAL_DATASET_RECORDING) struct { - struct u_var_button start_json_record; - } gui; + struct u_var_button start_json_record = {}; + } gui = {}; + struct { - struct gstreamer_pipeline *gp; - struct gstreamer_sink *gs; - struct xrt_frame_sink *sink; - struct xrt_frame_context xfctx; - uint64_t offset_ns; - uint64_t last_frame_ns; - uint64_t current_index; + struct gstreamer_pipeline *gp = nullptr; + struct gstreamer_sink *gs = nullptr; + struct xrt_frame_sink *sink = nullptr; + struct xrt_frame_context xfctx = {}; + uint64_t offset_ns = {}; + uint64_t last_frame_ns = {}; + uint64_t current_index = {}; - cJSON *output_root; - cJSON *output_array; - } gst; + cJSON *output_root = nullptr; + cJSON *output_array = nullptr; + } gst = {}; #endif - struct xrt_frame *frame_for_process; - cv::Mat *mat_for_process; + struct ht_view views[2] = {}; - struct ht_view views[2]; + float baseline = {}; + struct xrt_quat stereo_camera_to_left_camera = {}; - float baseline; - struct xrt_quat stereo_camera_to_left_camera; + uint64_t current_frame_timestamp = {}; // SUPER dumb. - uint64_t current_frame_timestamp; // SUPER dumb. + std::vector histories_3d = {}; - std::vector histories_3d; - - struct os_mutex openxr_hand_data_mediator; - struct xrt_hand_joint_set hands_for_openxr[2]; - uint64_t hands_for_openxr_timestamp; + struct os_mutex openxr_hand_data_mediator = {}; + struct xrt_hand_joint_set hands_for_openxr[2] = {}; + uint64_t hands_for_openxr_timestamp = {}; // Only change these when you have unlocked_between_frames, ie. when the hand tracker is between frames. - bool tracking_should_die; - bool tracking_should_record_dataset; - struct os_mutex unlocked_between_frames; + bool tracking_should_die = {}; + bool tracking_should_record_dataset = {}; + struct os_mutex unlocked_between_frames = {}; // Change this whenever you want - bool debug_scribble = true; + volatile bool debug_scribble = true; - ht_run_type run_type; + struct ht_startup_config startup_config = {}; + struct ht_dynamic_config dynamic_config = {}; + enum u_logging_level log_level = U_LOGGING_INFO; +public: + explicit HandTracking(); + ~HandTracking(); - struct ht_startup_config startup_config; - struct ht_dynamic_config dynamic_config; + static inline HandTracking & + fromC(t_hand_tracking_sync *ht_sync) + { + return *reinterpret_cast(ht_sync); + } + static void + cCallbackProcess(struct t_hand_tracking_sync *ht_sync, + struct xrt_frame *left_frame, + struct xrt_frame *right_frame, + struct xrt_hand_joint_set *out_left_hand, + struct xrt_hand_joint_set *out_right_hand, + uint64_t *out_timestamp_ns); - int dynamic_config_to_use; - - - - enum u_logging_level log_level; + static void + cCallbackDestroy(t_hand_tracking_sync *ht_sync); }; -static inline struct ht_device * -ht_device(struct xrt_device *xdev) -{ - return (struct ht_device *)xdev; -} - -#ifdef __cplusplus -} -#endif +} // namespace xrt::tracking::ht::old_rgb diff --git a/src/xrt/drivers/ht/templates/NaivePermutationSort.hpp b/src/xrt/tracking/hand/old_rgb/templates/NaivePermutationSort.hpp similarity index 100% rename from src/xrt/drivers/ht/templates/NaivePermutationSort.hpp rename to src/xrt/tracking/hand/old_rgb/templates/NaivePermutationSort.hpp