From e9f79c45bfb3417a4691cb9a5eacd20377b9fcee Mon Sep 17 00:00:00 2001 From: Moses Turner Date: Tue, 31 Jan 2023 00:58:19 -0600 Subject: [PATCH] h/mercury: Update hand tracking with new half-artificial model --- src/xrt/tracking/hand/mercury/CMakeLists.txt | 21 +- .../hand/mercury/hg_image_distorter.cpp | 615 ++++++++++++++ .../tracking/hand/mercury/hg_image_math.inl | 40 +- src/xrt/tracking/hand/mercury/hg_model.cpp | 680 ++++++++++++---- .../hand/mercury/hg_numerics_checker.hpp | 62 ++ src/xrt/tracking/hand/mercury/hg_sync.cpp | 745 ++++++++++------- src/xrt/tracking/hand/mercury/hg_sync.hpp | 209 +++-- src/xrt/tracking/hand/mercury/kine_common.hpp | 31 +- .../hand/mercury/kine_lm/lm_defines.hpp | 114 ++- .../hand/mercury/kine_lm/lm_interface.hpp | 2 + .../tracking/hand/mercury/kine_lm/lm_main.cpp | 749 +++++++++++------- .../kine_lm/lm_optimizer_params_packer.inl | 108 ++- tests/tests_levenbergmarquardt.cpp | 20 +- 13 files changed, 2472 insertions(+), 924 deletions(-) create mode 100644 src/xrt/tracking/hand/mercury/hg_image_distorter.cpp create mode 100644 src/xrt/tracking/hand/mercury/hg_numerics_checker.hpp diff --git a/src/xrt/tracking/hand/mercury/CMakeLists.txt b/src/xrt/tracking/hand/mercury/CMakeLists.txt index 4290aa5ba..962ac8e1b 100644 --- a/src/xrt/tracking/hand/mercury/CMakeLists.txt +++ b/src/xrt/tracking/hand/mercury/CMakeLists.txt @@ -24,6 +24,25 @@ target_link_libraries( ONNXRuntime::ONNXRuntime ) +add_library(t_ht_mercury_distorter STATIC hg_image_distorter.cpp) + +target_link_libraries(t_ht_mercury_distorter PRIVATE aux_math aux_tracking aux_os aux_util) + +target_include_directories( + t_ht_mercury_distorter SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR} + ) + +target_link_libraries( + t_ht_mercury_distorter + PRIVATE + aux_math + aux_tracking + aux_os + aux_util + ${OpenCV_LIBRARIES} + ONNXRuntime::ONNXRuntime # no, wrong + ) + add_library(t_ht_mercury STATIC hg_sync.cpp hg_sync.hpp hg_interface.h kine_common.hpp) target_link_libraries( @@ -37,7 +56,7 @@ target_link_libraries( ONNXRuntime::ONNXRuntime t_ht_mercury_kine_lm t_ht_mercury_model - # ncnn # Soon... + t_ht_mercury_distorter ${OpenCV_LIBRARIES} ) diff --git a/src/xrt/tracking/hand/mercury/hg_image_distorter.cpp b/src/xrt/tracking/hand/mercury/hg_image_distorter.cpp new file mode 100644 index 000000000..180c55d92 --- /dev/null +++ b/src/xrt/tracking/hand/mercury/hg_image_distorter.cpp @@ -0,0 +1,615 @@ +// Copyright 2021-2022, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Utility to do batch stereographic projections of images. + * @author Moses Turner + * @ingroup tracking + */ + +#include +#include +#include + +#include +#include +#include "math/m_vec3.h" +#include "math/m_vec2.h" + +#include "util/u_time.h" + +#include "xrt/xrt_defines.h" +#include "math/m_space.h" +#include +#include +#include "os/os_time.h" +#include "util/u_logging.h" +#include "tracking/t_tracking.h" + +#include "tracking/t_calibration_opencv.hpp" +#include +#include + +//!@todo This is the wrong way of doing it; we should probably be allocating some space up front for each +//! keypoint estimator to scratch into. Patches welcome! +#define EIGEN_STACK_ALLOCATION_LIMIT 128 * 128 * 4 * 3 + +#include "math/m_eigen_interop.hpp" +#include "hg_sync.hpp" + +namespace xrt::tracking::hand::mercury { + +constexpr int wsize = 128; + +template using OutputSizedArray = Eigen::Array; +using OutputSizedFloatArray = OutputSizedArray; + +struct projection_state +{ + cv::Mat &input; + Eigen::Map> distorted_image_eigen; + t_camera_model_params dist; + + const projection_instructions &instructions; + + projection_state(const projection_instructions &instructions, cv::Mat &input, cv::Mat &output) + : input(input), distorted_image_eigen(output.data, 128, 128), instructions(instructions){}; +}; + + +// A private, purpose-optimized version of the Kannalla-Brandt projection function. +static void +project_kb4(const t_camera_model_params &dist, // + const OutputSizedFloatArray &x, // + const OutputSizedFloatArray &y, // + const OutputSizedFloatArray &z, // + OutputSizedFloatArray &out_x, // + OutputSizedFloatArray &out_y) +{ + + const OutputSizedFloatArray r2 = x * x + y * y; + const OutputSizedFloatArray r = sqrt(r2); + +#if 0 + const I theta = atan2(r, z); +#else + // This works here but will not work in eg. a nonlinear optimizer, or for more general applications. + // Takes about 200us off the runtime. + // Basically: + // * We can be sure that z won't be negative only because previous hand tracking code checks this for + // us. + // * x,y,z is normalized so we don't have to worry about numerical stability. + // If neither of these were true we'd definitely need atan2. + // + // Grrr, we really need a good library for fast approximations of trigonometric functions. + + const OutputSizedFloatArray theta = atan(r / z); +#endif + + const OutputSizedFloatArray theta2 = theta * theta; + + +#if 0 + I r_theta = dist.fisheye.k4 * theta2; + r_theta += dist.fisheye.k3; + r_theta *= theta2; + r_theta += dist.fisheye.k2; + r_theta *= theta2; + r_theta += dist.fisheye.k1; + r_theta *= theta2; + r_theta += 1; + r_theta *= theta; +#else + // This version gives the compiler more options to do FMAs and avoid temporaries. Down to floating point + // precision this should give the same result as the above. + OutputSizedFloatArray r_theta = + (((((dist.fisheye.k4 * theta2) + dist.fisheye.k3) * theta2 + dist.fisheye.k2) * theta2 + dist.fisheye.k1) * + theta2 + + 1) * + theta; +#endif + + const OutputSizedFloatArray mx = x * r_theta / r; + const OutputSizedFloatArray my = y * r_theta / r; + + out_x = dist.fx * mx + dist.cx; + out_y = dist.fy * my + dist.cy; +} + + + +Eigen::Vector3f +stereographic_to_direction(float sg_x, float sg_y) +{ + float X = sg_x; + float Y = sg_y; + + float denom = (1 + X * X + Y * Y); + + float x = (2 * X) / denom; + float y = (2 * Y) / denom; + float z = (-1 + X * X + Y * Y) / denom; + + // forward is -z + return {x, y, z}; + // return {x / -z, y / -z}; +} + + + +template +T +map_ranges(T value, T from_low, T from_high, T to_low, T to_high) +{ + return (value - from_low) * (to_high - to_low) / (from_high - from_low) + to_low; +} + +void +naive_remap(OutputSizedArray &image_x, + OutputSizedArray &image_y, + cv::Mat &input, + Eigen::Map> &output) +{ + output = 0; + + for (int y = 0; y < wsize; y++) { + for (int x = 0; x < wsize; x++) { + if (image_y(y, x) < 0) { + continue; + } + if (image_y(y, x) >= input.rows) { + continue; + } + if (image_x(y, x) < 0) { + continue; + } + if (image_x(y, x) >= input.cols) { + continue; + } + output(y, x) = input.at(image_y(y, x), image_x(y, x)); + } + } +} + + + +void +StereographicDistort(projection_state &mi) +{ + XRT_TRACE_MARKER(); + + OutputSizedFloatArray sg_x; + OutputSizedFloatArray sg_y; + + // Please vectorize me? + if (mi.instructions.flip) { + for (int x = 0; x < wsize; ++x) { + sg_x.col(x).setConstant(map_ranges((float)x, 0.0f, (float)wsize, + (float)mi.instructions.stereographic_radius, + (float)-mi.instructions.stereographic_radius)); + } + } else { + for (int x = 0; x < wsize; ++x) { + sg_x.col(x).setConstant(map_ranges((float)x, 0.0f, (float)wsize, + (float)-mi.instructions.stereographic_radius, + (float)mi.instructions.stereographic_radius)); + } + } + // Ditto? + for (int y = 0; y < wsize; ++y) { + sg_y.row(y).setConstant(map_ranges((float)y, 0.0f, (float)wsize, + (float)mi.instructions.stereographic_radius, + (float)-mi.instructions.stereographic_radius)); + } + + + // STEREOGRAPHIC DIRECTION TO 3D DIRECTION + // Note: we do not normalize the direction, because we don't need to. :) + + OutputSizedFloatArray dir_x; + OutputSizedFloatArray dir_y; + OutputSizedFloatArray dir_z; + + +#if 0 + dir_x = sg_x * 2; + dir_y = sg_y * 2; +#else + // Adding something to itself is faster than multiplying itself by 2 + // and unless you have fast-math the compiler won't do it for you. =/ + dir_x = sg_x + sg_x; + dir_y = sg_y + sg_y; +#endif + + dir_z = (sg_x * sg_x) + (sg_y * sg_y) - 1; + // END STEREOGRAPHIC DIRECTION TO 3D DIRECTION + + // QUATERNION ROTATING VECTOR + Eigen::Matrix rot_dir; + + OutputSizedFloatArray uv0; + OutputSizedFloatArray uv1; + OutputSizedFloatArray uv2; + + const Eigen::Quaternionf &q = mi.instructions.rot_quat; + + uv0 = q.y() * dir_z - q.z() * dir_y; + uv1 = q.z() * dir_x - q.x() * dir_z; + uv2 = q.x() * dir_y - q.y() * dir_x; + + uv0 += uv0; + uv1 += uv1; + uv2 += uv2; + + rot_dir.x() = dir_x + q.w() * uv0; + rot_dir.y() = dir_y + q.w() * uv1; + rot_dir.z() = dir_z + q.w() * uv2; + + rot_dir.x() += q.y() * uv2 - q.z() * uv1; + rot_dir.y() += q.z() * uv0 - q.x() * uv2; + rot_dir.z() += q.x() * uv1 - q.y() * uv0; + // END QUATERNION ROTATING VECTOR + + + + OutputSizedArray image_x; + OutputSizedArray image_y; + + OutputSizedFloatArray image_x_f; + OutputSizedFloatArray image_y_f; + + + //!@todo optimize + rot_dir.y() *= -1; + rot_dir.z() *= -1; + + { + XRT_TRACE_IDENT(camera_projection); + + switch (mi.dist.model) { + case T_DISTORTION_FISHEYE_KB4: + // This takes 250us vs 500 because of the removed atan2. + project_kb4(mi.dist, rot_dir.x(), rot_dir.y(), rot_dir.z(), image_x_f, image_y_f); + break; + case T_DISTORTION_OPENCV_RADTAN_8: + // Regular C is plenty fast for radtan :) + for (int i = 0; i < image_x_f.rows(); i++) { + for (int j = 0; j < image_x_f.cols(); j++) { + t_camera_models_project(&mi.dist, rot_dir.x()(i, j), rot_dir.y()(i, j), + rot_dir.z()(i, j), &image_x_f(i, j), &image_y_f(i, j)); + } + } + break; + default: assert(false); + } + } + + image_x = image_x_f.cast(); + image_y = image_y_f.cast(); + + naive_remap(image_x, image_y, mi.input, mi.distorted_image_eigen); +} + + + +bool +slow(projection_state &mi, float x, float y, cv::Point2i &out) +{ + float sg_x = + map_ranges(x, 0, wsize, -mi.instructions.stereographic_radius, mi.instructions.stereographic_radius); + + float sg_y = + map_ranges(y, 0, wsize, mi.instructions.stereographic_radius, -mi.instructions.stereographic_radius); + + Eigen::Vector3f dir = stereographic_to_direction(sg_x, sg_y); + + dir = mi.instructions.rot_quat * dir; + + dir.y() *= -1; + dir.z() *= -1; + + float _x = {}; + float _y = {}; + + bool ret = t_camera_models_project(&mi.dist, dir.x(), dir.y(), dir.z(), &_x, &_y); + + out.x = _x; + out.y = _y; + + return ret; +} + +void +draw_and_clear(cv::Mat img, std::vector &line_vec, bool good, cv::Scalar color) +{ + if (!good) { + color[0] = 255 - color[0]; + color[1] = 255 - color[1]; + color[2] = 255 - color[2]; + } + cv::polylines(img, line_vec, false, color); + line_vec.clear(); +} + +void +add_or_draw_line(projection_state &mi, // + int x, // + int y, // + std::vector &line_vec, // + cv::Scalar color, // + bool &good_most_recent, // + bool &started, + cv::Mat &img) +{ + cv::Point2i e = {}; + bool retval = slow(mi, x, y, e); + + if (!started) { + started = true; + good_most_recent = retval; + line_vec.push_back(e); + return; + } + if (retval != good_most_recent) { + line_vec.push_back(e); + draw_and_clear(img, line_vec, good_most_recent, color); + } + good_most_recent = retval; + line_vec.push_back(e); +} + +void +draw_boundary(projection_state &mi, cv::Scalar color, cv::Mat img) +{ + std::vector line_vec = {}; + bool good_most_recent = true; + bool started = false; + + constexpr float step = 16; + + // x = 0, y = 0->128 + for (int y = 0; y <= wsize; y += step) { + int x = 0; + add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img); + } + + // x = 0->128, y = 128 + for (int x = step; x <= wsize; x += step) { + int y = wsize; + add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img); + } + + // x = 128, y = 128->0 + for (int y = wsize - step; y >= 0; y -= step) { + int x = wsize; + add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img); + } + + // x = 128->0, y = 0 + for (int x = wsize - step; x >= 0; x -= step) { + int y = 0; + add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img); + } + + draw_and_clear(img, line_vec, good_most_recent, color); +} + +void +project_21_points_unscaled(Eigen::Array &joints_local, Eigen::Quaternionf rot_quat, hand21_2d &out_joints) +{ + for (int i = 0; i < 21; i++) { + Eigen::Vector3f direction = joints_local.col(i); //{d.x, d.y, d.z}; + direction.normalize(); + + direction = rot_quat.conjugate() * direction; + float denom = 1 - direction.z(); + float sg_x = direction.x() / denom; + float sg_y = direction.y() / denom; + // sg_x *= mi.stereographic_radius; + // sg_y *= mi.stereographic_radius; + + out_joints[i].pos_2d.x = sg_x; + out_joints[i].pos_2d.y = sg_y; + } +} + +template +void +project_point_scaled(projection_state &mi, Eigen::Vector3f direction, V2 &out_img_pt) +{ + direction = mi.instructions.rot_quat.conjugate() * direction; + float denom = 1 - direction.z(); + float sg_x = direction.x() / denom; + float sg_y = direction.y() / denom; + + out_img_pt.pos_2d.x = map_ranges(sg_x, -mi.instructions.stereographic_radius, + mi.instructions.stereographic_radius, 0, wsize); + out_img_pt.pos_2d.y = map_ranges(sg_y, mi.instructions.stereographic_radius, + -mi.instructions.stereographic_radius, 0, wsize); +} + +void +project_21_points_scaled(projection_state &mi, Eigen::Array &joints_local, hand21_2d &out_joints_in_img) +{ + for (int i = 0; i < 21; i++) { + project_point_scaled(mi, Eigen::Vector3f(joints_local.col(i)), out_joints_in_img[i]); + } +} + + + +Eigen::Quaternionf +direction(Eigen::Vector3f dir, float twist) +{ + Eigen::Quaternionf one = Eigen::Quaternionf().setFromTwoVectors(-Eigen::Vector3f::UnitZ(), dir); + + Eigen::Quaternionf two; + two = Eigen::AngleAxisf(twist, -Eigen::Vector3f::UnitZ()); + return one * two; +} + +void +add_rel_depth(const Eigen::Array &joints, hand21_2d &out_joints_in_img) +{ + float hand_size = (Eigen::Vector3f(joints.col(0)) - Eigen::Vector3f(joints.col(9))).norm(); + + float midpxm_depth = Eigen::Vector3f(joints.col(9)).norm(); + for (int i = 0; i < 21; i++) { + float jd = Eigen::Vector3f(joints.col(i)).norm(); + out_joints_in_img[i].depth_relative_to_midpxm = (jd - midpxm_depth) / hand_size; + } +} + +static float +palm_length(hand21_2d &joints) +{ + vec2_5 wrist = joints[0]; + vec2_5 middle_proximal = joints[9]; + + vec2_5 index_proximal = joints[5]; + vec2_5 ring_proximal = joints[17]; + + float fwd = m_vec2_len(wrist.pos_2d - middle_proximal.pos_2d); + float side = m_vec2_len(index_proximal.pos_2d - ring_proximal.pos_2d); + + float length = fmaxf(fwd, side); + + return length; +} + + + +void +make_projection_instructions(t_camera_model_params &dist, + bool flip_after, + float expand_val, + float twist, + Eigen::Array &joints, + projection_instructions &out_instructions, + hand21_2d &out_hand) +{ + + out_instructions.flip = flip_after; + + Eigen::Vector3f dir = Eigen::Vector3f(joints.col(9)).normalized(); + + out_instructions.rot_quat = direction(dir, twist); + + Eigen::Vector3f old_direction = dir; + + + // Tested on Dec 7: This converges in 4 iterations max, usually 2. + for (int i = 0; i < 8; i++) { + project_21_points_unscaled(joints, out_instructions.rot_quat, out_hand); + + xrt_vec2 min = out_hand[0].pos_2d; + xrt_vec2 max = out_hand[0].pos_2d; + + for (int i = 0; i < 21; i++) { + xrt_vec2 pt = out_hand[i].pos_2d; + min.x = fmin(pt.x, min.x); + min.y = fmin(pt.y, min.y); + + max.x = fmax(pt.x, max.x); + max.y = fmax(pt.y, max.y); + } + + + xrt_vec2 center = m_vec2_mul_scalar(min + max, 0.5); + + float r = fmax(center.x - min.x, center.y - min.y); + out_instructions.stereographic_radius = r; + + Eigen::Vector3f new_direction = stereographic_to_direction(center.x, center.y); + + new_direction = out_instructions.rot_quat * new_direction; + + out_instructions.rot_quat = direction(new_direction, twist); + + + if ((old_direction - dir).norm() < 0.0001) { + // We converged + break; + } + old_direction = dir; + } + + // This can basically be removed (we will have converged very well in the above), but for correctness's + // sake, let's project one last time. + project_21_points_unscaled(joints, out_instructions.rot_quat, out_hand); + + // These are to ensure that the bounding box doesn't get too small around a closed fist. + float palm_l = palm_length(out_hand); + float radius_around_palm = palm_l * 0.5 * (2.2 / 1.65) * expand_val; + + out_instructions.stereographic_radius *= expand_val; + + out_instructions.stereographic_radius = fmaxf(out_instructions.stereographic_radius, radius_around_palm); + + // This is going straight into the (-1, 1)-normalized space + for (int i = 0; i < 21; i++) { + vec2_5 &v = out_hand[i]; + v.pos_2d.x = map_ranges(v.pos_2d.x, -out_instructions.stereographic_radius, + out_instructions.stereographic_radius, -1, 1); + //!@todo optimize + if (flip_after) { + v.pos_2d.x *= -1; + } + //!@todo this is probably wrong, should probably be negated + v.pos_2d.y = map_ranges(v.pos_2d.y, out_instructions.stereographic_radius, + -out_instructions.stereographic_radius, -1, 1); + } + add_rel_depth(joints, out_hand); +} + +void +make_projection_instructions_angular(xrt_vec3 direction_3d, + bool flip_after, + float angular_radius, + float expand_val, + float twist, + projection_instructions &out_instructions) +{ + + out_instructions.flip = flip_after; + + xrt_vec3 imaginary_direction = {0, sinf(angular_radius), -cosf(angular_radius)}; + + out_instructions.stereographic_radius = imaginary_direction.y / (1 - imaginary_direction.z); + + math_vec3_normalize(&direction_3d); + + Eigen::Vector3f dir = xrt::auxiliary::math::map_vec3(direction_3d); + + + out_instructions.rot_quat = direction(dir, twist); + + + out_instructions.stereographic_radius *= expand_val; +} + + +void +stereographic_project_image(const t_camera_model_params &dist, + const projection_instructions &instructions, + cv::Mat &input_image, + cv::Mat *debug_image, + const cv::Scalar boundary_color, + cv::Mat &out) + +{ + out = cv::Mat(cv::Size(wsize, wsize), CV_8U); + projection_state mi(instructions, input_image, out); + mi.dist = dist; + // Why am I doing it like this???? + // mi.stereographic_radius = instructions.stereographic_radius; + // mi.rot_quat = instructions.rot_quat; + // mi.flip = instructions.flip; + + StereographicDistort(mi); + + if (debug_image) { + draw_boundary(mi, boundary_color, *debug_image); + } +} +} // namespace xrt::tracking::hand::mercury diff --git a/src/xrt/tracking/hand/mercury/hg_image_math.inl b/src/xrt/tracking/hand/mercury/hg_image_math.inl index 1dc617a93..4ca913a73 100644 --- a/src/xrt/tracking/hand/mercury/hg_image_math.inl +++ b/src/xrt/tracking/hand/mercury/hg_image_math.inl @@ -72,38 +72,18 @@ hsv2rgb(float fH, float fS, float fV) return {fR * 255.0f, fG * 255.0f, fB * 255.0f}; } - -//! @todo Make it take an array of vec2's and give out an array of vec2's, then -// put it in its own target so we don't have to link to OpenCV. -// Oooorrrrr... actually add good undistortion stuff to Monado so that we don't have to depend on OpenCV at all. - -XRT_MAYBE_UNUSED static struct xrt_vec2 -raycoord(ht_view *htv, struct xrt_vec2 model_out) -{ - model_out.x *= htv->hgt->multiply_px_coord_for_undistort; - model_out.y *= htv->hgt->multiply_px_coord_for_undistort; - cv::Mat in_px_coords(1, 1, CV_32FC2); - float *write_in; - write_in = in_px_coords.ptr(0); - write_in[0] = model_out.x; - write_in[1] = model_out.y; - cv::Mat out_ray(1, 1, CV_32FC2); - - if (htv->hgt->use_fisheye) { - cv::fisheye::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion); - } else { - cv::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion); - } - - float n_x = out_ray.at(0, 0); - float n_y = out_ray.at(0, 1); - - return {n_x, n_y}; -} - -static void +inline void handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type) { cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type); } + +inline void +handSquare(cv::Mat &debug_frame, xrt_vec2 center, float radius, cv::Scalar color) +{ + cv::Point2i pt((int)center.x, (int)center.y); + cv::rectangle(debug_frame, cv::Rect(pt - cv::Point2i(radius / 2, radius / 2), cv::Size(radius, radius)), color, + 1); +} + } // namespace xrt::tracking::hand::mercury diff --git a/src/xrt/tracking/hand/mercury/hg_model.cpp b/src/xrt/tracking/hand/mercury/hg_model.cpp index bd6ceab0c..c6efff2b5 100644 --- a/src/xrt/tracking/hand/mercury/hg_model.cpp +++ b/src/xrt/tracking/hand/mercury/hg_model.cpp @@ -11,6 +11,7 @@ // https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/fns_candy_style_transfer/fns_candy_style_transfer.c #include "hg_sync.hpp" #include "hg_image_math.inl" +#include "hg_numerics_checker.hpp" #include @@ -29,7 +30,6 @@ namespace xrt::tracking::hand::mercury { } \ } while (0) - static cv::Matx23f blackbar(const cv::Mat &in, enum t_camera_orientation rot, cv::Mat &out, xrt_size out_size) { @@ -134,9 +134,71 @@ argmax(const float *data, int size) return out_idx; } -static void -refine_center_of_distribution( - const float *data, int coarse_x, int coarse_y, int w, int h, float *out_refined_x, float *out_refined_y) +static bool +hand_depth_center_of_mass(struct HandTracking *hgt, float data[22], float *out_depth, float *out_confidence) +{ + float avg_location_px_coord = 0; + float sum = 0; + + for (int i = 0; i < 22; i++) { + data[i] = fmin(1.0, fmax(data[i], 0.0)); + sum += data[i]; + avg_location_px_coord += data[i] * (float)i; + } + + if (sum < 1e-5) { + HG_DEBUG(hgt, "All depth outputs were zero!"); + return false; + } + + avg_location_px_coord /= sum; + + // std::cout << avg_location_px_coord << std::endl; + + // bounds check + if (avg_location_px_coord < 0 || avg_location_px_coord > 21) { + HG_DEBUG(hgt, "Very bad! avg_location_px_coord was %f", avg_location_px_coord); + for (int i = 0; i < 22; i++) { + HG_DEBUG(hgt, "%f", data[i]); + } + + avg_location_px_coord = fmin(21.0, fmax(0.0, avg_location_px_coord)); + return false; + } + + // nan check + if (avg_location_px_coord != avg_location_px_coord) { + HG_DEBUG(hgt, "Very bad! avg_location_px_coord was not a number: %f", avg_location_px_coord); + for (int i = 0; i < 22; i++) { + HG_DEBUG(hgt, "%f", data[i]); + } + *out_depth = 0; + *out_confidence = 0; + return false; + } + // printf("%f %d\n", avg_location_px_coord, (int)avg_location_px_coord); + *out_confidence = data[(int)avg_location_px_coord]; + + + + float depth_value = avg_location_px_coord + 0.5; + depth_value /= 22; + depth_value -= 0.5; + depth_value *= 2 * 1.5; + + *out_depth = depth_value; + return true; +} + +static bool +refine_center_of_distribution(struct HandTracking *hgt, // + const float *data, // + int coarse_x, // + int coarse_y, // + int w, // + int h, // + float *out_refined_x, // + float *out_refined_y) { // Be VERY suspicious of this function, it's probably not centering correctly. float sum_of_values = 0; @@ -170,18 +232,16 @@ refine_center_of_distribution( // Edge case, will fix soon *out_refined_x = coarse_x; *out_refined_y = coarse_y; - U_LOG_E("Failed! %d %d %d %d %d", coarse_x, coarse_y, w, h, max_kern_width); - return; + HG_DEBUG(hgt, "Failed! %d %d %d %d %d", coarse_x, coarse_y, w, h, max_kern_width); + return false; + } else { + *out_refined_x = sum_of_values_times_locations_x / sum_of_values; + *out_refined_y = sum_of_values_times_locations_y / sum_of_values; + return true; } - - *out_refined_x = sum_of_values_times_locations_x / sum_of_values; - *out_refined_y = sum_of_values_times_locations_y / sum_of_values; - return; } - - -static void +static bool normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out) { data_in.convertTo(data_out, CV_32FC1, 1 / 255.0); @@ -192,7 +252,7 @@ normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out) if (stddev.at(0, 0) == 0) { U_LOG_W("Got image with zero standard deviation!"); - return; + return false; } data_out *= 0.25 / stddev.at(0, 0); @@ -201,6 +261,7 @@ normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out) //! @todo optimize cv::meanStdDev(data_out, mean, stddev); data_out += (0.5 - mean.at(0, 0)); + return true; } void @@ -228,18 +289,19 @@ setup_model_image_input(HandTracking *hgt, onnx_wrap *wrap, const char *name, in { model_input_wrap inputimg = {}; inputimg.name = name; - inputimg.dimensions.push_back(1); - inputimg.dimensions.push_back(1); - inputimg.dimensions.push_back(h); - inputimg.dimensions.push_back(w); + inputimg.dimensions[0] = 1; + inputimg.dimensions[1] = 1; + inputimg.dimensions[2] = h; + inputimg.dimensions[3] = w; + inputimg.num_dimensions = 4; size_t data_size = w * h * sizeof(float); inputimg.data = (float *)malloc(data_size); ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, // inputimg.data, // data_size, // - inputimg.dimensions.data(), // - inputimg.dimensions.size(), // + inputimg.dimensions, // + inputimg.num_dimensions, // ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, // &inputimg.tensor)); @@ -320,12 +382,12 @@ run_hand_detection(void *ptr) for (int hand_idx = 0; hand_idx < 2; hand_idx++) { - hand_bounding_box *output = info->outputs[hand_idx]; + hand_region_of_interest &output = info->outputs[hand_idx]; - output->found = hand_exists[hand_idx] > 0.3; + output.found = hand_exists[hand_idx] > 0.3; - if (output->found) { - output->confidence = hand_exists[hand_idx]; + if (output.found) { + output.hand_detection_confidence = hand_exists[hand_idx]; xrt_vec2 _pt = {}; _pt.x = math_map_ranges(cx[hand_idx], -1, 1, 0, kDetectionInputSize); @@ -344,14 +406,11 @@ run_hand_detection(void *ptr) _pt = transformVecBy2x3(_pt, go_back); - output->center = _pt; - output->size_px = size; + output.center_px = _pt; + output.size_px = size; if (hgt->debug_scribble) { - cv::Point2i pt((int)output->center.x, (int)output->center.y); - cv::rectangle(debug_frame, - cv::Rect(pt - cv::Point2i(size / 2, size / 2), cv::Size(size, size)), - PINK, 1); + handSquare(debug_frame, output.center_px, output.size_px, PINK); } } @@ -377,81 +436,151 @@ init_keypoint_estimation(HandTracking *hgt, onnx_wrap *wrap) std::filesystem::path path = hgt->models_folder; - path /= "grayscale_keypoint_new.onnx"; + path /= "grayscale_keypoint_jan18.onnx"; wrap->wraps.clear(); - setup_ort_api(hgt, wrap, path); - setup_model_image_input(hgt, wrap, "inputImg", kKeypointInputSize, kKeypointInputSize); -} + wrap->api = OrtGetApiBase()->GetApi(ORT_API_VERSION); -void -calc_src_tri(cv::Point2f center, - cv::Point2f go_right, - cv::Point2f go_down, - enum t_camera_orientation rot, - cv::Point2f out_src_tri[3]) -{ - cv::Point2f top_left = {center - go_down - go_right}; - cv::Point2f bottom_left = {center + go_down - go_right}; - cv::Point2f bottom_right = {center + go_down + go_right}; - cv::Point2f top_right = {center - go_down + go_right}; - switch (rot) { - case CAMERA_ORIENTATION_0: { + OrtSessionOptions *opts = nullptr; + ORT(CreateSessionOptions(&opts)); - // top left - out_src_tri[0] = top_left; // {center - go_down - go_right}; + // TODO review options, config for threads? + ORT(SetSessionGraphOptimizationLevel(opts, ORT_ENABLE_ALL)); + ORT(SetIntraOpNumThreads(opts, 1)); - // bottom left - out_src_tri[1] = bottom_left; //{center + go_down - go_right}; - // top right - out_src_tri[2] = top_right; //{center - go_down + go_right}; - } break; - case CAMERA_ORIENTATION_90: { - // Need to rotate the view back by -90° - // top left (becomes top right) - out_src_tri[0] = top_right; + ORT(CreateEnv(ORT_LOGGING_LEVEL_FATAL, "monado_ht", &wrap->env)); - // bottom left (becomes top left) - out_src_tri[1] = top_left; + ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &wrap->meminfo)); - // top right (becomes bottom right) - out_src_tri[2] = bottom_right; - } break; + // HG_DEBUG(this->device, "Loading hand detection model from file '%s'", path.c_str()); + ORT(CreateSession(wrap->env, path.c_str(), opts, &wrap->session)); + assert(wrap->session != NULL); - case CAMERA_ORIENTATION_180: { - // top left (becomes bottom right) - out_src_tri[0] = bottom_right; + // size_t input_size = wrap->input_shape[0] * wrap->input_shape[1] * wrap->input_shape[2] * + // wrap->input_shape[3]; - // bottom left (becomes top right) - out_src_tri[1] = top_right; + // wrap->data = (float *)malloc(input_size * sizeof(float)); + { + model_input_wrap inputimg = {}; + inputimg.name = "inputImg"; + inputimg.dimensions[0] = 1; + inputimg.dimensions[1] = 1; + inputimg.dimensions[2] = 128; + inputimg.dimensions[3] = 128; + inputimg.num_dimensions = 4; - // top right (becomes bottom left) - out_src_tri[2] = bottom_left; - } break; - case CAMERA_ORIENTATION_270: { - // Need to rotate the view clockwise 90° - // top left (becomes bottom left) - out_src_tri[0] = bottom_left; //{center + go_down - go_right}; + inputimg.data = (float *)malloc(128 * 128 * sizeof(float)); // SORRY IM BUSY - // bottom left (becomes bottom right) - out_src_tri[1] = bottom_right; //{center + go_down + go_right}; - // top right (becomes top left) - out_src_tri[2] = top_left; //{center - go_down - go_right}; - } break; - default: assert(false); + + ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, // + inputimg.data, // + 128 * 128 * sizeof(float), // + inputimg.dimensions, // + inputimg.num_dimensions, // + ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, // + &inputimg.tensor)); + + assert(inputimg.tensor); + int is_tensor; + ORT(IsTensor(inputimg.tensor, &is_tensor)); + assert(is_tensor); + + wrap->wraps.push_back(inputimg); } + + { + model_input_wrap inputimg = {}; + inputimg.name = "lastKeypoints"; + inputimg.dimensions[0] = 1; + inputimg.dimensions[1] = 42; + inputimg.num_dimensions = 2; + + inputimg.data = (float *)malloc(1 * 42 * sizeof(float)); // SORRY IM BUSY + + + + ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, // + inputimg.data, // + 1 * 42 * sizeof(float), // + inputimg.dimensions, // + inputimg.num_dimensions, // + ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, // + &inputimg.tensor)); + + assert(inputimg.tensor); + int is_tensor; + ORT(IsTensor(inputimg.tensor, &is_tensor)); + assert(is_tensor); + wrap->wraps.push_back(inputimg); + } + + { + model_input_wrap inputimg = {}; + inputimg.name = "useLastKeypoints"; + inputimg.dimensions[0] = 1; + inputimg.num_dimensions = 1; + + inputimg.data = (float *)malloc(1 * sizeof(float)); // SORRY IM BUSY + + + + ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, // + inputimg.data, // + 1 * sizeof(float), // + inputimg.dimensions, // + inputimg.num_dimensions, // + ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, // + &inputimg.tensor)); + + assert(inputimg.tensor); + int is_tensor; + ORT(IsTensor(inputimg.tensor, &is_tensor)); + assert(is_tensor); + wrap->wraps.push_back(inputimg); + } + + + wrap->api->ReleaseSessionOptions(opts); } +enum xrt_hand_joint joints_ml_to_xr[21]{ + XRT_HAND_JOINT_WRIST, // + XRT_HAND_JOINT_THUMB_METACARPAL, // + XRT_HAND_JOINT_THUMB_PROXIMAL, // + XRT_HAND_JOINT_THUMB_DISTAL, // + XRT_HAND_JOINT_THUMB_TIP, // + // + XRT_HAND_JOINT_INDEX_PROXIMAL, // + XRT_HAND_JOINT_INDEX_INTERMEDIATE, // + XRT_HAND_JOINT_INDEX_DISTAL, // + XRT_HAND_JOINT_INDEX_TIP, // + // + XRT_HAND_JOINT_MIDDLE_PROXIMAL, // + XRT_HAND_JOINT_MIDDLE_INTERMEDIATE, // + XRT_HAND_JOINT_MIDDLE_DISTAL, // + XRT_HAND_JOINT_MIDDLE_TIP, // + // + XRT_HAND_JOINT_RING_PROXIMAL, // + XRT_HAND_JOINT_RING_INTERMEDIATE, // + XRT_HAND_JOINT_RING_DISTAL, // + XRT_HAND_JOINT_RING_TIP, // + // + XRT_HAND_JOINT_LITTLE_PROXIMAL, // + XRT_HAND_JOINT_LITTLE_INTERMEDIATE, // + XRT_HAND_JOINT_LITTLE_DISTAL, // + XRT_HAND_JOINT_LITTLE_TIP, // +}; + void make_keypoint_heatmap_output(int camera_idx, int hand_idx, int grid_pt_x, int grid_pt_y, float *plane, cv::Mat &out) { - int root_x = kVisSpacerSize + ((1 + 2 * hand_idx) * (kKeypointInputSize + kVisSpacerSize)); - int root_y = kVisSpacerSize + (camera_idx * (kKeypointInputSize + kVisSpacerSize)); + int root_x = 8 + ((1 + 2 * hand_idx) * (128 + 8)); + int root_y = 8 + ((2 * camera_idx) * (128 + 8)); int org_x = (root_x) + (grid_pt_x * 25); int org_y = (root_y) + (grid_pt_y * 25); @@ -464,168 +593,383 @@ make_keypoint_heatmap_output(int camera_idx, int hand_idx, int grid_pt_x, int gr start.copyTo(out(p)); } +void +make_keypoint_depth_heatmap_output(int camera_idx, // + int hand_idx, // + int grid_pt_x, // + int grid_pt_y, + float *plane, // + cv::Mat &out) +{ + int root_x = 8 + ((1 + 2 * hand_idx) * (128 + 8)); + int root_y = 8 + ((1 + 2 * camera_idx) * (128 + 8)); + + int org_x = (root_x) + (grid_pt_x * 25); + int org_y = (root_y) + (grid_pt_y * 25); + + + cv::Rect p = cv::Rect(org_x, org_y, 22, 22); + + + cv::Mat start(cv::Size(22, 22), CV_32FC1); + float *ptr = (float *)start.data; // Cope cope cope cope cope + + + for (int i = 0; i < 22; i++) { + for (int j = 0; j < 22; j++) { + ptr[(i * 22) + j] = plane[i]; + } + } + + start *= 255.0; + + start.copyTo(out(p)); +} + +void +set_predicted_zero(float *data) +{ + for (int i = 0; i < 42; i++) { + data[i] = 0.0f; + } +} void run_keypoint_estimation(void *ptr) { XRT_TRACE_MARKER(); - keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr; + keypoint_estimation_run_info info = *(keypoint_estimation_run_info *)ptr; - onnx_wrap *wrap = &info->view->keypoint[info->hand_idx]; - struct HandTracking *hgt = info->view->hgt; + onnx_wrap *wrap = &info.view->keypoint[info.hand_idx]; + struct HandTracking *hgt = info.view->hgt; + int view_idx = info.view->view; + int hand_idx = info.hand_idx; + one_frame_one_view &this_output = hgt->keypoint_outputs[hand_idx].views[view_idx]; + MLOutput2D &px_coord = this_output.keypoints_in_scaled_stereographic; // Factor out starting here - cv::Mat &debug = info->view->debug_out_to_this; + hand_region_of_interest &output = info.view->regions_of_interest_this_frame[hand_idx]; - hand_bounding_box *output = &info->view->bboxes_this_frame[info->hand_idx]; + cv::Mat data_128x128_uint8; - cv::Point2f src_tri[3]; - cv::Point2f dst_tri[3]; - // top-left - cv::Point2f center = {output->center.x, output->center.y}; + projection_instructions instr(info.view->hgdist); + instr.rot_quat = Eigen::Quaternionf::Identity(); + instr.stereographic_radius = 0.4; - cv::Point2f go_right = {output->size_px / 2, 0}; - cv::Point2f go_down = {0, output->size_px / 2}; - calc_src_tri(center, go_right, go_down, info->view->camera_info.camera_orientation, src_tri); - /* For the right hand, flip the result horizontally since - * the model is trained on left hands. - * Top left, bottom left, top right */ - if (info->hand_idx == 1) { - dst_tri[0] = {kKeypointInputSize, 0}; - dst_tri[1] = {kKeypointInputSize, kKeypointInputSize}; - dst_tri[2] = {0, 0}; + t_camera_model_params dist = info.view->hgdist; + + + float twist = 0; + + if (output.provenance == ROIProvenance::HAND_DETECTION) { + + xrt_vec3 center; + + xrt_vec3 edges[4]; + + + t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, output.center_px.x, output.center_px.y, + ¢er.x, ¢er.y, ¢er.z); + + xrt_vec2 r = {output.size_px / 2, 0}; + xrt_vec2 d = {0, output.size_px / 2}; + xrt_vec2 v; + + // note! we do not need to rotate this! it's *already* in camera space. + int acc_idx = 0; + v = output.center_px + r + d; + t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x, + &edges[acc_idx].y, &edges[acc_idx].z); + acc_idx++; + + v = output.center_px - r + d; + t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x, + &edges[acc_idx].y, &edges[acc_idx].z); + acc_idx++; + + v = output.center_px - r - d; + t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x, + &edges[acc_idx].y, &edges[acc_idx].z); + acc_idx++; + + v = output.center_px + r - d; + t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x, + &edges[acc_idx].y, &edges[acc_idx].z); + + + float angle = 0; + + for (int i = 0; i < 4; i++) { + angle = fmaxf(angle, m_vec3_angle(center, edges[i])); + } + + + make_projection_instructions_angular(center, hand_idx, angle, + hgt->tuneable_values.after_detection_fac.val, twist, instr); + + wrap->wraps[2].data[0] = 0.0f; + set_predicted_zero(wrap->wraps[1].data); } else { - dst_tri[0] = {0, 0}; - dst_tri[1] = {0, kKeypointInputSize}; - dst_tri[2] = {kKeypointInputSize, 0}; + Eigen::Array keypoints_in_camera; + + + if (view_idx == 0) { + keypoints_in_camera = hgt->pose_predicted_keypoints[hand_idx]; + } else { + for (int i = 0; i < 21; i++) { + + Eigen::Quaternionf ori = map_quat(hgt->left_in_right.orientation); + Eigen::Vector3f tmp = hgt->pose_predicted_keypoints[hand_idx].col(i); + + tmp = ori * tmp; + tmp += map_vec3(hgt->left_in_right.position); + keypoints_in_camera.col(i) = tmp; + } + } + + hand21_2d bleh; + + make_projection_instructions(dist, hand_idx, hgt->tuneable_values.dyn_radii_fac.val, twist, + keypoints_in_camera, instr, bleh); + + if (hgt->tuneable_values.enable_pose_predicted_input) { + for (int ml_joint_idx = 0; ml_joint_idx < 21; ml_joint_idx++) { + float *data = wrap->wraps[1].data; + data[(ml_joint_idx * 2) + 0] = bleh[ml_joint_idx].pos_2d.x; + data[(ml_joint_idx * 2) + 1] = bleh[ml_joint_idx].pos_2d.y; + // data[(ml_joint_idx * 2) + 2] = bleh[ml_joint_idx].depth_relative_to_midpxm; + } + + + wrap->wraps[2].data[0] = 1.0f; + } else { + wrap->wraps[2].data[0] = 0.0f; + set_predicted_zero(wrap->wraps[1].data); + } } - cv::Matx23f go_there = getAffineTransform(src_tri, dst_tri); - cv::Matx23f go_back = getAffineTransform(dst_tri, src_tri); // NOLINT + stereographic_project_image(dist, instr, hgt->views[view_idx].run_model_on_this, + &hgt->views[view_idx].debug_out_to_this, info.hand_idx ? RED : YELLOW, + data_128x128_uint8); - cv::Mat cropped_image_uint8; + + xrt::auxiliary::math::map_quat(this_output.look_dir) = instr.rot_quat; + this_output.stereographic_radius = instr.stereographic_radius; + + bool is_hand = true; { - XRT_TRACE_IDENT(transforms); + XRT_TRACE_IDENT(convert_format); - cv::warpAffine(info->view->run_model_on_this, cropped_image_uint8, go_there, - cv::Size(kKeypointInputSize, kKeypointInputSize), cv::INTER_LINEAR); + // here! + cv::Mat data_128x128_float(cv::Size(128, 128), CV_32FC1, wrap->wraps[0].data, 128 * sizeof(float)); - cv::Mat cropped_image_float_wrapper(cv::Size(kKeypointInputSize, kKeypointInputSize), // - CV_32FC1, // - wrap->wraps[0].data, // - kKeypointInputSize * sizeof(float)); - - normalizeGrayscaleImage(cropped_image_uint8, cropped_image_float_wrapper); + is_hand = is_hand && normalizeGrayscaleImage(data_128x128_uint8, data_128x128_float); } + // Ending here - const OrtValue *inputs[] = {wrap->wraps[0].tensor}; - const char *input_names[] = {wrap->wraps[0].name}; - OrtValue *output_tensors[] = {nullptr}; + const OrtValue *inputs[] = {wrap->wraps[0].tensor, wrap->wraps[1].tensor, wrap->wraps[2].tensor}; + const char *input_names[] = {wrap->wraps[0].name, wrap->wraps[1].name, wrap->wraps[2].name}; - const char *output_names[] = {"heatmap"}; - - - // OrtValue *output_tensor = nullptr; + OrtValue *output_tensors[] = {nullptr, nullptr, nullptr, nullptr}; + const char *output_names[] = {"heatmap_xy", "heatmap_depth", "scalar_extras", "curls"}; { XRT_TRACE_IDENT(model); - static_assert(ARRAY_SIZE(input_names) == ARRAY_SIZE(inputs)); - static_assert(ARRAY_SIZE(output_names) == ARRAY_SIZE(output_tensors)); + assert(ARRAY_SIZE(input_names) == ARRAY_SIZE(inputs)); + assert(ARRAY_SIZE(output_names) == ARRAY_SIZE(output_tensors)); ORT(Run(wrap->session, nullptr, input_names, inputs, ARRAY_SIZE(input_names), output_names, ARRAY_SIZE(output_names), output_tensors)); } // To here + // Interpret model outputs! + + float *out_data = nullptr; ORT(GetTensorMutableData(output_tensors[0], (void **)&out_data)); - Hand2D &px_coord = info->view->keypoint_outputs[info->hand_idx].hand_px_coord; - Hand2D &tan_space = info->view->keypoint_outputs[info->hand_idx].hand_tan_space; - float *confidences = info->view->keypoint_outputs[info->hand_idx].hand_tan_space.confidences; - xrt_vec2 *keypoints_global = px_coord.kps; + // I don't know why this was added + // float *confidences = info.view->keypoint_outputs.views[hand_idx].confidences; - size_t plane_size = kKeypointOutputHeatmapSize * kKeypointOutputHeatmapSize; + // This was added for debug scribbling, and should be added back at some point. + // xrt_vec2 *keypoints_global = px_coord.kps; + + size_t plane_size = 22 * 22; for (int i = 0; i < 21; i++) { float *data = &out_data[i * plane_size]; - int out_idx = argmax(data, kKeypointOutputHeatmapSize * kKeypointOutputHeatmapSize); - int row = out_idx / kKeypointOutputHeatmapSize; - int col = out_idx % kKeypointOutputHeatmapSize; - - xrt_vec2 loc; - refine_center_of_distribution(data, col, row, kKeypointOutputHeatmapSize, kKeypointOutputHeatmapSize, - &loc.x, &loc.y); + // This will be optimized out if nan checking is disabled in hg_numerics_checker + for (size_t x = 0; x < plane_size; x++) { + CHECK_NOT_NAN(data[i]); + } - // 128.0/22.0f - loc.x *= float(kKeypointInputSize) / float(kKeypointOutputHeatmapSize); - loc.y *= float(kKeypointInputSize) / float(kKeypointOutputHeatmapSize); - loc = transformVecBy2x3(loc, go_back); + int out_idx = argmax(data, 22 * 22); + int row = out_idx / 22; + int col = out_idx % 22; - confidences[i] = data[out_idx]; - px_coord.kps[i] = loc; + xrt_vec2 loc = {}; - tan_space.kps[i] = raycoord(info->view, loc); + // This is a good start but rethink it. Maybe fail if less than 18/21 joints failed? + // is_hand = is_hand && + refine_center_of_distribution(hgt, data, col, row, 22, 22, &loc.x, &loc.y); + + if (hand_idx == 0) { + px_coord[i].pos_2d.x = math_map_ranges(loc.x, 0, 22, -1, 1); + } else { + px_coord[i].pos_2d.x = math_map_ranges(loc.x, 0, 22, 1, -1); + } + + //!@todo when you change this to have +Z-forward + // note note note the flip!!! + px_coord[i].pos_2d.y = math_map_ranges(loc.y, 0, 22, 1, -1); + + px_coord[i].confidence_xy = data[out_idx]; } + + float *out_data_depth = nullptr; + ORT(GetTensorMutableData(output_tensors[1], (void **)&out_data_depth)); + + for (int joint_idx = 0; joint_idx < 21; joint_idx++) { + float *p_ptr = &out_data_depth[(joint_idx * 22)]; + + + float depth = 0; + float confidence = 0; + + // This function can fail + if (hand_depth_center_of_mass(hgt, p_ptr, &depth, &confidence)) { + + px_coord[joint_idx].depth_relative_to_midpxm = depth; + px_coord[joint_idx].confidence_depth = confidence; + } else { + px_coord[joint_idx].depth_relative_to_midpxm = 0; + px_coord[joint_idx].confidence_depth = 0; + } + } + + float *out_data_extras = nullptr; + ORT(GetTensorMutableData(output_tensors[2], (void **)&out_data_extras)); + + float is_hand_explicit = out_data_extras[0]; + + is_hand_explicit = (1.0) / (1.0 + powf(2.71828182845904523536, -is_hand_explicit)); + + // When the model is sure, it's _really_ sure. + // Index was fine with 0.99. + // North Star seemed to need 0.97. + if (is_hand_explicit < 0.97) { + U_LOG_D("Not hand! %f", is_hand_explicit); + is_hand = false; + } + + this_output.active = is_hand; + + + float *out_data_curls = nullptr; + ORT(GetTensorMutableData(output_tensors[3], (void **)&out_data_curls)); + + for (int i = 0; i < 5; i++) { + float curl = out_data_curls[i]; + float variance = out_data_curls[5 + i]; + + // Next two lines directly correspond to py_training/settings.py + // We don't want it to be negative + variance = fabsf(variance); + // We don't want it to be possible to be zero + variance += 0.01; + + this_output.curls[i].value = curl; + this_output.curls[i].variance = curl; + } + + + if (hgt->debug_scribble) { int data_acc_idx = 0; - int root_x = kVisSpacerSize + ((2 * info->hand_idx) * (kKeypointInputSize + kVisSpacerSize)); - int root_y = kVisSpacerSize + (info->view->view * (kKeypointInputSize + kVisSpacerSize)); + int root_x = 8 + ((2 * hand_idx) * (128 + 8)); + int root_y = 8 + (2 * info.view->view * (128 + 8)); - cv::Rect p = cv::Rect(root_x, root_y, kKeypointInputSize, kKeypointInputSize); + cv::Rect p = cv::Rect(root_x, root_y, 128, 128); - cropped_image_uint8.copyTo(hgt->visualizers.mat(p)); + data_128x128_uint8.copyTo(hgt->visualizers.mat(p)); + + make_keypoint_heatmap_output(info.view->view, hand_idx, 0, 0, out_data + (data_acc_idx * plane_size), + hgt->visualizers.mat); + make_keypoint_depth_heatmap_output(info.view->view, hand_idx, 0, 0, + out_data_depth + (data_acc_idx * 22), hgt->visualizers.mat); - make_keypoint_heatmap_output(info->view->view, info->hand_idx, 0, 0, - out_data + (data_acc_idx * plane_size), hgt->visualizers.mat); data_acc_idx++; for (int finger = 0; finger < 5; finger++) { for (int joint = 0; joint < 4; joint++) { - make_keypoint_heatmap_output(info->view->view, info->hand_idx, 1 + joint, finger, + make_keypoint_heatmap_output(info.view->view, hand_idx, 1 + joint, finger, out_data + (data_acc_idx * plane_size), hgt->visualizers.mat); + make_keypoint_depth_heatmap_output(info.view->view, hand_idx, 1 + joint, finger, + out_data_depth + (data_acc_idx * 22), + hgt->visualizers.mat); data_acc_idx++; } } + // Hand existence + char amt[5]; + + snprintf(amt, ARRAY_SIZE(amt), "%.2f", is_hand_explicit); + + cv::Point2i text_origin; + + text_origin.x = root_x + 128 + 2; + text_origin.y = root_y + 60; + + // Clear out what was there before + cv::rectangle(hgt->visualizers.mat, cv::Rect(text_origin - cv::Point2i{0, 25}, cv::Size{30, 30}), {255}, + cv::FILLED); - if (hgt->tuneable_values.scribble_keypoint_model_outputs) { - for (int finger = 0; finger < 5; finger++) { - cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y}; - for (int joint = 0; joint < 4; joint++) { - cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x, - (int)keypoints_global[1 + finger * 4 + joint].y}; + cv::putText(hgt->visualizers.mat, amt, text_origin, cv::FONT_HERSHEY_SIMPLEX, 0.3, {0, 0, 0}); - cv::line(debug, last, the_new, colors[info->hand_idx]); - last = the_new; - } - } + // Curls + cv::rectangle(hgt->visualizers.mat, cv::Rect(cv::Point2i{root_x, root_y + 128 + 22}, cv::Size{128, 60}), + {255}, cv::FILLED); + for (int i = 0; i < 5; i++) { + int r = 15; - for (int i = 0; i < 21; i++) { - xrt_vec2 loc = keypoints_global[i]; - handDot(debug, loc, 2, (float)(i) / 21.0, 1, 2); - } + cv::Point2i center = {root_x + r + (20 * i), root_y + 128 + 60}; + + cv::circle(hgt->visualizers.mat, center, 1, {0}, 1); + + float c = this_output.curls[i].value * 0.3; + int x = cos(c) * r; + // Remember, OpenCV has (0,0) at top left + int y = -sin(c) * r; + + cv::Point2i pt2 = {x, y}; + pt2 += center; + cv::circle(hgt->visualizers.mat, pt2, 1, {0}, 1); + cv::line(hgt->visualizers.mat, center, pt2, {0}, 1); } } - wrap->api->ReleaseValue(output_tensors[0]); + for (size_t i = 0; i < ARRAY_SIZE(output_tensors); i++) { + wrap->api->ReleaseValue(output_tensors[i]); + } } void @@ -640,4 +984,4 @@ release_onnx_wrap(onnx_wrap *wrap) wrap->api->ReleaseEnv(wrap->env); } -} // namespace xrt::tracking::hand::mercury +} // namespace xrt::tracking::hand::mercury \ No newline at end of file diff --git a/src/xrt/tracking/hand/mercury/hg_numerics_checker.hpp b/src/xrt/tracking/hand/mercury/hg_numerics_checker.hpp new file mode 100644 index 000000000..f57b92d53 --- /dev/null +++ b/src/xrt/tracking/hand/mercury/hg_numerics_checker.hpp @@ -0,0 +1,62 @@ +// Copyright 2021-2022, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Simple util for setting floating-point exceptions and checking for NaNs. + * @author Moses Turner + * @ingroup drv_ht + */ + +#undef PEDANTIC_NAN_CHECKS +#undef NAN_EXCEPTIONS + +#ifdef NAN_EXCEPTIONS +#include +#endif + +namespace xrt::tracking::hand::mercury::numerics_checker { + +#ifdef PEDANTIC_NAN_CHECKS +#define CHECK_NOT_NAN(val) \ + do { \ + if (val != val) { \ + U_LOG_E(" was NAN at %d", __LINE__); \ + assert(false); \ + } \ + \ + } while (0) + +#else +#define CHECK_NOT_NAN(val) (void)val; +#endif + +#ifdef NAN_EXCEPTIONS +static int ex = FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID; + + +static inline void +set_floating_exceptions() +{ + // NO: FE_UNDERFLOW, FE_INEXACT + // https://stackoverflow.com/questions/60731382/c-setting-floating-point-exception-environment + feenableexcept(ex); // Uncomment this for version 2 +} + +static inline void +remove_floating_exceptions() +{ + fedisableexcept(ex); +} + +#else +static inline void +set_floating_exceptions() +{} + +static inline void +remove_floating_exceptions() +{} +#endif + + +} // namespace xrt::tracking::hand::mercury::numerics_checker \ No newline at end of file diff --git a/src/xrt/tracking/hand/mercury/hg_sync.cpp b/src/xrt/tracking/hand/mercury/hg_sync.cpp index 4bcf7b735..9b4fe6313 100644 --- a/src/xrt/tracking/hand/mercury/hg_sync.cpp +++ b/src/xrt/tracking/hand/mercury/hg_sync.cpp @@ -25,6 +25,7 @@ namespace xrt::tracking::hand::mercury { #define DEG_TO_RAD(DEG) (DEG * M_PI / 180.) DEBUG_GET_ONCE_LOG_OPTION(mercury_log, "MERCURY_LOG", U_LOGGING_WARN) +DEBUG_GET_ONCE_BOOL_OPTION(mercury_optimize_hand_size, "MERCURY_optimize_hand_size", true) // Flags to tell state tracker that these are indeed valid joints static const enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_relation_flags)( @@ -38,25 +39,15 @@ static const enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_rela static bool getCalibration(struct HandTracking *hgt, t_stereo_camera_calibration &calibration) { - xrt::auxiliary::tracking::StereoCameraCalibrationWrapper wrap(&calibration); xrt_vec3 trans = { (float)calibration.camera_translation[0], (float)calibration.camera_translation[1], (float)calibration.camera_translation[2], }; + hgt->baseline = m_vec3_len(trans); HG_DEBUG(hgt, "I think the baseline is %f meters!", hgt->baseline); - // Note, this assumes camera 0 is the left camera and camera 1 is the right camera. - // If you find one with the opposite arrangement, you'll need to invert hgt->baseline, and look at - // hgJointDisparityMath - hgt->use_fisheye = wrap.view[0].distortion_model == T_DISTORTION_FISHEYE_KB4; - - if (hgt->use_fisheye) { - HG_DEBUG(hgt, "I think the cameras are fisheye!"); - } else { - HG_DEBUG(hgt, "I think the cameras are not fisheye!"); - } { // Officially, I have no idea if this is row-major or col-major. Empirically it seems to work, and that // is all I will say. @@ -65,9 +56,6 @@ getCalibration(struct HandTracking *hgt, t_stereo_camera_calibration &calibratio // right" or "right in left" //!@todo xrt_matrix_3x3 s; - s.v[0] = wrap.camera_rotation_mat(0, 0); - s.v[1] = wrap.camera_rotation_mat(1, 0); - s.v[2] = wrap.camera_rotation_mat(2, 0); s.v[0] = (float)calibration.camera_rotation[0][0]; s.v[1] = (float)calibration.camera_rotation[1][0]; @@ -83,8 +71,6 @@ getCalibration(struct HandTracking *hgt, t_stereo_camera_calibration &calibratio xrt_pose left_in_right; left_in_right.position = trans; - left_in_right.position.y = wrap.camera_translation_mat(1); - left_in_right.position.z = wrap.camera_translation_mat(2); math_quat_from_matrix_3x3(&s, &left_in_right.orientation); @@ -103,32 +89,22 @@ getCalibration(struct HandTracking *hgt, t_stereo_camera_calibration &calibratio left_in_right.orientation.w); } + for (int view_idx = 0; view_idx < 2; view_idx++) { + ht_view &view = hgt->views[view_idx]; + t_camera_model_params_from_t_camera_calibration(&calibration.view[view_idx], &view.hgdist_orig); - - //* Good enough guess that view 0 and view 1 are the same size. - for (int i = 0; i < 2; i++) { - hgt->views[i].cameraMatrix = wrap.view[i].intrinsics_mat; - hgt->views[i].distortion = wrap.view[i].distortion_mat; - - if (hgt->log_level <= U_LOGGING_DEBUG) { - HG_DEBUG(hgt, "K%d ->", i); - std::cout << hgt->views[i].cameraMatrix << std::endl; - - HG_DEBUG(hgt, "D%d ->", i); - std::cout << hgt->views[i].distortion << std::endl; - } + view.hgdist = view.hgdist_orig; } - hgt->calibration_one_view_size_px.w = wrap.view[0].image_size_pixels.w; - hgt->calibration_one_view_size_px.h = wrap.view[0].image_size_pixels.h; + //!@todo Really? We can totally support cameras with varying resolutions. + // For a later MR. + hgt->calibration_one_view_size_px.w = calibration.view[0].image_size_pixels.w; + hgt->calibration_one_view_size_px.h = calibration.view[0].image_size_pixels.h; hgt->last_frame_one_view_size_px = hgt->calibration_one_view_size_px; hgt->multiply_px_coord_for_undistort = 1.0f; hgt->hand_pose_camera_offset = XRT_POSE_IDENTITY; - - - return true; } @@ -171,14 +147,13 @@ getModelsFolder(struct HandTracking *hgt) #endif } -template static inline bool -check_outside_view(struct HandTracking *hgt, struct t_camera_extra_info_one_view boundary, Vec &keypoint) +check_outside_view(struct HandTracking *hgt, struct t_camera_extra_info_one_view boundary, xrt_vec2 &keypoint) { // Regular case - the keypoint is literally outside the image - if (keypoint.y > hgt->calibration_one_view_size_px.h || // - keypoint.y < 0 || // - keypoint.x > hgt->calibration_one_view_size_px.w || // + if (keypoint.y > hgt->last_frame_one_view_size_px.h || // + keypoint.y < 0 || // + keypoint.x > hgt->last_frame_one_view_size_px.w || // keypoint.x < 0) { return true; } @@ -189,10 +164,10 @@ check_outside_view(struct HandTracking *hgt, struct t_camera_extra_info_one_view case HT_IMAGE_BOUNDARY_CIRCLE: { //!@todo Optimize: Most of this can be calculated once at startup xrt_vec2 center_px = { - boundary.boundary.circle.normalized_center.x * (float)hgt->calibration_one_view_size_px.w, // - boundary.boundary.circle.normalized_center.y * (float)hgt->calibration_one_view_size_px.h}; + boundary.boundary.circle.normalized_center.x * (float)hgt->last_frame_one_view_size_px.w, // + boundary.boundary.circle.normalized_center.y * (float)hgt->last_frame_one_view_size_px.h}; float radius_px = - boundary.boundary.circle.normalized_radius * (float)hgt->calibration_one_view_size_px.w; + boundary.boundary.circle.normalized_radius * (float)hgt->last_frame_one_view_size_px.w; xrt_vec2 keypoint_xrt = {float(keypoint.x), float(keypoint.y)}; @@ -207,16 +182,16 @@ check_outside_view(struct HandTracking *hgt, struct t_camera_extra_info_one_view } static void -back_project(struct HandTracking *hgt, - xrt_hand_joint_set *in, - bool also_debug_output, - xrt_vec2 centers[2], - float radii[2], +back_project(struct HandTracking *hgt, // + Eigen::Array &pts, // + int hand_idx, // + bool also_debug_output, // int num_outside[2]) { for (int view_idx = 0; view_idx < 2; view_idx++) { - xrt_pose move_amount; + cv::Mat debug = hgt->views[view_idx].debug_out_to_this; + xrt_pose move_amount = {}; if (view_idx == 0) { // left camera. @@ -224,92 +199,123 @@ back_project(struct HandTracking *hgt, } else { move_amount = hgt->left_in_right; } - std::vector pts_relative_to_camera(26); - // Bandaid solution, doesn't quite fix things on WMR. - bool any_joint_behind_camera = false; + Eigen::Vector3f p = map_vec3(move_amount.position); + Eigen::Quaternionf q = map_quat(move_amount.orientation); - for (int i = 0; i < 26; i++) { - xrt_vec3 tmp; - math_quat_rotate_vec3(&move_amount.orientation, - &in->values.hand_joint_set_default[i].relation.pose.position, &tmp); - pts_relative_to_camera[i].x = tmp.x + move_amount.position.x; - pts_relative_to_camera[i].y = tmp.y + move_amount.position.y; - pts_relative_to_camera[i].z = tmp.z + move_amount.position.z; + Eigen::Array pts_relative_to_camera = {}; - pts_relative_to_camera[i].y *= -1; - pts_relative_to_camera[i].z *= -1; + bool invalid[21] = {}; - if (pts_relative_to_camera[i].z < 0) { - any_joint_behind_camera = true; + for (int i = 0; i < 21; i++) { + pts_relative_to_camera.col(i) = (q * pts.col(i)) + p; + + if (pts_relative_to_camera.col(i).z() > 0) { + invalid[i] = true; } } + xrt_vec2 keypoints_global[21]; - std::vector out(26); - //!@opencv_camera The OpenCV, it hurts - if (hgt->use_fisheye) { - cv::Affine3f aff = cv::Affine3f::Identity(); - cv::fisheye::projectPoints(pts_relative_to_camera, out, aff, hgt->views[view_idx].cameraMatrix, - hgt->views[view_idx].distortion); - } else { - cv::Affine3d aff = cv::Affine3d::Identity(); - // cv::Matx33d rvec = cv::Matx33d:: - cv::Matx33d rotation = aff.rotation(); - cv::projectPoints(pts_relative_to_camera, rotation, aff.translation(), - hgt->views[view_idx].cameraMatrix, hgt->views[view_idx].distortion, out); + for (int i = 0; i < 21; i++) { + invalid[i] = + invalid[i] || !t_camera_models_flip_and_project(&hgt->views[view_idx].hgdist, // + pts_relative_to_camera.col(i).x(), // + pts_relative_to_camera.col(i).y(), // + pts_relative_to_camera.col(i).z(), // + &keypoints_global[i].x, // + &keypoints_global[i].y // + ); } - xrt_vec2 keypoints_global[26]; - bool outside_view[26] = {}; - for (int i = 0; i < 26; i++) { - if (check_outside_view(hgt, hgt->views[view_idx].camera_info, out[i]) || - any_joint_behind_camera) { - outside_view[i] = true; - if (num_outside != NULL) { + + for (int i = 0; i < 21; i++) { + invalid[i] = invalid[i] || + check_outside_view(hgt, hgt->views[view_idx].camera_info, keypoints_global[i]); + } + + if (num_outside != NULL) { + num_outside[view_idx] = 0; + for (int i = 0; i < 21; i++) { + if (invalid[i]) { num_outside[view_idx]++; } } - keypoints_global[i].x = out[i].x / hgt->multiply_px_coord_for_undistort; - keypoints_global[i].y = out[i].y / hgt->multiply_px_coord_for_undistort; - } - if (centers != NULL) { - centers[view_idx] = keypoints_global[XRT_HAND_JOINT_MIDDLE_PROXIMAL]; - } + xrt_vec2 min = keypoints_global[0]; + xrt_vec2 max = keypoints_global[0]; - if (radii != NULL) { - for (int i = 0; i < 26; i++) { - radii[view_idx] = - std::max(radii[view_idx], m_vec2_len(centers[view_idx] - keypoints_global[i])); + for (int i = 0; i < 21; i++) { + xrt_vec2 &pt = keypoints_global[i]; + min.x = fmin(pt.x, min.x); + min.y = fmin(pt.y, min.y); + + max.x = fmax(pt.x, max.x); + max.y = fmax(pt.y, max.y); + } + xrt_vec2 center = m_vec2_mul_scalar(min + max, 0.5); + + float r = fmax(center.x - min.x, center.y - min.y); + + float size = r * 2; + + + hgt->views[view_idx].regions_of_interest_this_frame[hand_idx].center_px = center; + hgt->views[view_idx].regions_of_interest_this_frame[hand_idx].size_px = size; + if (also_debug_output) { + handSquare(debug, center, size, GREEN); } - radii[view_idx] *= hgt->tuneable_values.dyn_radii_fac.val; } - cv::Mat debug = hgt->views[view_idx].debug_out_to_this; if (also_debug_output) { - // for (int finger = 0; finger < 5; finger++) { - // cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y}; - // for (int joint = 0; joint < 4; joint++) { - // cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x, - // (int)keypoints_global[1 + finger * 4 + joint].y}; - - // cv::line(debug, last, the_new, colors[0]); - // last = the_new; - // } - // } - - for (int i = 0; i < 26; i++) { + for (int i = 0; i < 21; i++) { xrt_vec2 loc; loc.x = keypoints_global[i].x; loc.y = keypoints_global[i].y; - handDot(debug, loc, 2, outside_view[i] ? 0.0 : (float)(i) / 26.0, 1, 2); + handDot(debug, loc, 2, invalid[i] ? 0.0 : (float)(i) / 26.0, 1, 2); } } } // for view_idx } +static void +back_project_keypoint_output(struct HandTracking *hgt, // + int hand_idx, // + int view_idx) +{ + + cv::Mat debug = hgt->views[view_idx].debug_out_to_this; + one_frame_one_view &view = hgt->keypoint_outputs[hand_idx].views[view_idx]; + + for (int i = 0; i < 21; i++) { + + //!@todo We're trivially rewriting the stereographic projection for like the 2nd or 3rd time here. We + //! should add an Eigen template for this instead. + + xrt_vec2 dir_sg = + m_vec2_mul_scalar(view.keypoints_in_scaled_stereographic[i].pos_2d, view.stereographic_radius); + + float denom = (1 + dir_sg.x * dir_sg.x + dir_sg.y * dir_sg.y); + xrt_vec3 dir = {}; + dir.x = 2 * dir_sg.x / denom; + dir.y = 2 * dir_sg.y / denom; + dir.z = (-1 + (dir_sg.x * dir_sg.x) + (dir_sg.y * dir_sg.y)) / denom; + + math_quat_rotate_vec3(&view.look_dir, &dir, &dir); + + xrt_vec2 loc = {}; + t_camera_models_flip_and_project(&hgt->views[view_idx].hgdist, // + dir.x, // + dir.y, // + dir.z, // + &loc.x, // + &loc.y // + ); + + handDot(debug, loc, 2, (float)(i) / 26.0, 1, 2); + } +} static bool handle_changed_image_size(HandTracking *hgt, xrt_size &new_one_view_size) @@ -331,8 +337,21 @@ handle_changed_image_size(HandTracking *hgt, xrt_size &new_one_view_size) return false; } + //!@todo optimize: can't we just scale camera matrix/etc correctly? hgt->multiply_px_coord_for_undistort = (float)hgt->calibration_one_view_size_px.h / (float)new_one_view_size.h; hgt->last_frame_one_view_size_px = new_one_view_size; + + for (int view_idx = 0; view_idx < 2; view_idx++) { + hgt->views[view_idx].hgdist.fx = + hgt->views[view_idx].hgdist_orig.fx / hgt->multiply_px_coord_for_undistort; + hgt->views[view_idx].hgdist.fy = + hgt->views[view_idx].hgdist_orig.fy / hgt->multiply_px_coord_for_undistort; + + hgt->views[view_idx].hgdist.cx = + hgt->views[view_idx].hgdist_orig.cx / hgt->multiply_px_coord_for_undistort; + hgt->views[view_idx].hgdist.cy = + hgt->views[view_idx].hgdist_orig.cy / hgt->multiply_px_coord_for_undistort; + } return true; } @@ -342,11 +361,12 @@ hand_confidence_value(float reprojection_error, one_frame_input &input) float out_confidence = 0.0f; for (int view_idx = 0; view_idx < 2; view_idx++) { for (int i = 0; i < 21; i++) { - out_confidence += input.views[view_idx].confidences[i]; + // whatever + out_confidence += input.views[view_idx].keypoints_in_scaled_stereographic[i].confidence_xy; } } out_confidence /= 42.0f; // number of hand joints - float reproj_err_mul = 1.0f / (reprojection_error + 1.0f); + float reproj_err_mul = 1.0f / ((reprojection_error * 10) + 1.0f); return out_confidence * reproj_err_mul; } @@ -366,46 +386,56 @@ check_new_user_event(struct HandTracking *hgt) hgt->hand_seen_before[0] = false; hgt->hand_seen_before[1] = false; hgt->refinement.hand_size_refinement_schedule_x = 0; + hgt->refinement.optimizing = true; + hgt->target_hand_size = STANDARD_HAND_SIZE; } } + +static float +hand_bounding_boxes_iou(const hand_region_of_interest &one, const hand_region_of_interest &two) +{ + if (!one.found || !two.found) { + return -1; + } + box_iou::Box this_box(one.center_px, one.size_px); + box_iou::Box other_box(two.center_px, two.size_px); + + return boxIOU(this_box, other_box); +} + void dispatch_and_process_hand_detections(struct HandTracking *hgt) { if (hgt->tuneable_values.always_run_detection_model) { // Pretend like nothing was detected last frame. for (int hand_idx = 0; hand_idx < 2; hand_idx++) { - // hgt->last_frame_hand_detected[hand_idx] = false; hgt->this_frame_hand_detected[hand_idx] = false; - hgt->histories[hand_idx].hands.clear(); - hgt->histories[hand_idx].timestamps.clear(); + hgt->history_hands[hand_idx].clear(); } } - // view, hand - hand_bounding_box states[2][2] = {}; - - // paranoia - states[0]->found = false; - states[1]->found = false; - - states[0]->confidence = 0; - states[1]->confidence = 0; - - hand_detection_run_info infos[2] = {}; + // Mega paranoia, should get optimized out. + for (int view_idx = 0; view_idx < 2; view_idx++) { + for (int hand_idx = 0; hand_idx < 2; hand_idx++) { + infos[view_idx].outputs[hand_idx].found = false; + infos[view_idx].outputs[hand_idx].hand_detection_confidence = 0; + infos[view_idx].outputs[hand_idx].provenance = ROIProvenance::HAND_DETECTION; + } + } + + + infos[0].view = &hgt->views[0]; infos[1].view = &hgt->views[1]; - infos[0].outputs[0] = &states[0][0]; - infos[0].outputs[1] = &states[0][1]; - infos[1].outputs[0] = &states[1][0]; - infos[1].outputs[1] = &states[1][1]; + bool no_hands_detected_last_frame = !(hgt->this_frame_hand_detected[0] || hgt->this_frame_hand_detected[1]); size_t active_camera = hgt->detection_counter++ % 2; @@ -423,57 +453,90 @@ dispatch_and_process_hand_detections(struct HandTracking *hgt) for (int hand_idx = 0; hand_idx < 2; hand_idx++) { - // run_hand_detection(&infos[active_camera]); - - - - // float confidence_sum = states[active_camera][hand_idx].confidence; - float confidence_sum = - (states[0][hand_idx].confidence + states[1][hand_idx].confidence) / float(num_views); + float confidence_sum = (infos[0].outputs[hand_idx].hand_detection_confidence + + infos[1].outputs[hand_idx].hand_detection_confidence) / + float(num_views); if (confidence_sum < 0.9) { continue; } + if (hgt->tuneable_values.always_run_detection_model || !hgt->last_frame_hand_detected[hand_idx]) { - // hgt->views[active_camera].bboxes_this_frame[hand_idx] = - // states[active_camera][hand_idx]; + + bool good_to_go = true; - for (int view_idx = 0; view_idx < 2; view_idx++) { - hand_bounding_box this_state = states[view_idx][hand_idx]; - hand_bounding_box other_state = states[view_idx][!hand_idx]; - if (!this_state.found || !other_state.found) { - continue; + + if (no_hands_detected_last_frame) { + // Stop overlapping _double_ hand detections - detecting both hands in the same place. + // This happens a lot if you put your hands together (we can't track intertwining hands + // yet) + for (int view_idx = 0; view_idx < 2; view_idx++) { + float iou = hand_bounding_boxes_iou(infos[view_idx].outputs[hand_idx], + infos[view_idx].outputs[!hand_idx]); + if (iou > hgt->tuneable_values.mpiou_double_detection.val) { + HG_DEBUG(hgt, + "Rejected double detection because the iou for hand idx %d, " + "view idx " + "%d was %f", + hand_idx, view_idx, iou); + good_to_go = false; + break; + } } + } else { + // Stop overlapping _single_ hand detections - detecting one hand where another hand is + // already tracked. This happens a lot if you trick the hand detector into thinking your + // left hand is a right hand. + for (int view_idx = 0; view_idx < 2; view_idx++) { + hand_region_of_interest &this_state = infos[view_idx].outputs[hand_idx]; - xrt::auxiliary::util::box_iou::Box this_box(states[view_idx][hand_idx].center, - states[view_idx][hand_idx].size_px); - xrt::auxiliary::util::box_iou::Box other_box(states[view_idx][!hand_idx].center, - states[view_idx][!hand_idx].size_px); - - float iou = xrt::auxiliary::util::box_iou::boxIOU(this_box, other_box); - if (iou > hgt->tuneable_values.max_permissible_iou.val) { - HG_WARN( - hgt, - "Rejected detection because the iou for hand idx %d, view idx %d was %f", - hand_idx, view_idx, iou); - good_to_go = false; - break; + // Note that this is not just the other state + hand_region_of_interest &other_state = + hgt->views[view_idx].regions_of_interest_this_frame[!hand_idx]; + float iou = hand_bounding_boxes_iou(this_state, other_state); + if (iou > hgt->tuneable_values.mpiou_single_detection.val) { + HG_DEBUG(hgt, + "Rejected single detection because the iou for hand idx %d, " + "view idx " + "%d was %f", + hand_idx, view_idx, iou); + good_to_go = false; + break; + } } } + + if (good_to_go) { - hgt->views[0].bboxes_this_frame[hand_idx] = states[0][hand_idx]; - hgt->views[1].bboxes_this_frame[hand_idx] = states[1][hand_idx]; - // if (hgt->views[!active_camera].bboxes_this_frame[h]) - hgt->this_frame_hand_detected[hand_idx] = true; + // Note we already initialized the previous-keypoints-input to nonexistent above this. + hgt->views[0].regions_of_interest_this_frame[hand_idx] = infos[0].outputs[hand_idx]; + hgt->views[1].regions_of_interest_this_frame[hand_idx] = infos[1].outputs[hand_idx]; } } - } - // Most of the time, this codepath runs - we predict where the hand should be based on the last - // two frames. + + hgt->this_frame_hand_detected[hand_idx] = true; + } } +void +hand_joint_set_to_eigen_21(const xrt_hand_joint_set &set, Eigen::Array &out) +{ + int acc_idx = 0; + + out.col(acc_idx++) = map_vec3(set.values.hand_joint_set_default[XRT_HAND_JOINT_WRIST].relation.pose.position); + for (int finger = 0; finger < 5; finger++) { + for (int joint = 1; joint < 5; joint++) { + xrt_hand_joint j = joints_5x5_to_26[finger][joint]; + const xrt_vec3 &jp = set.values.hand_joint_set_default[j].relation.pose.position; + out.col(acc_idx++) = map_vec3(jp); + } + } +} + +// Most of the time, this codepath runs - we predict where the hand should be based on the last +// two frames. void predict_new_regions_of_interest(struct HandTracking *hgt) { @@ -483,8 +546,11 @@ predict_new_regions_of_interest(struct HandTracking *hgt) // If we only have *one* frame, we just reuse the same bounding box and hope the hand // hasn't moved too much. @todo - if (hgt->histories[hand_idx].timestamps.size() < 2) { - HG_TRACE(hgt, "continuing, size is %zu", hgt->histories[hand_idx].timestamps.size()); + HistoryBuffer, 5> &hh = hgt->history_hands[hand_idx]; + + + if (hh.size() < 2) { + HG_TRACE(hgt, "continuing, size is %zu", hgt->history_hands[hand_idx].size()); continue; } @@ -492,13 +558,11 @@ predict_new_regions_of_interest(struct HandTracking *hgt) // model. hgt->this_frame_hand_detected[hand_idx] = hgt->last_frame_hand_detected[hand_idx]; - hand_history &history = hgt->histories[hand_idx]; - uint64_t time_two_frames_ago = *history.timestamps.get_at_age(1); - uint64_t time_one_frame_ago = *history.timestamps.get_at_age(0); + uint64_t time_two_frames_ago = *hgt->history_timestamps.get_at_age(1); + uint64_t time_one_frame_ago = *hgt->history_timestamps.get_at_age(0); uint64_t time_now = hgt->current_frame_timestamp; - xrt_hand_joint_set *set_two_frames_ago = history.hands.get_at_age(1); - xrt_hand_joint_set *set_one_frame_ago = history.hands.get_at_age(0); + // double dt_past = (double)() / (double)U_TIME_1S_IN_NS; double dt_past = time_ns_to_s(time_one_frame_ago - time_two_frames_ago); @@ -506,68 +570,32 @@ predict_new_regions_of_interest(struct HandTracking *hgt) double dt_now = time_ns_to_s(time_now - time_one_frame_ago); - xrt_vec3 vels[26]; - - for (int i = 0; i < 26; i++) { + Eigen::Array &n_minus_two = *hh.get_at_age(1); + Eigen::Array &n_minus_one = *hh.get_at_age(0); - xrt_vec3 from_to = set_one_frame_ago->values.hand_joint_set_default[i].relation.pose.position - - set_two_frames_ago->values.hand_joint_set_default[i].relation.pose.position; - vels[i] = m_vec3_mul_scalar(from_to, 1.0 / dt_past); + Eigen::Array add; - // U_LOG_E("%f %f %f", vels[i].x, vels[i].y, vels[i].z); - } - xrt_vec3 positions_last_frame[26]; - // xrt_vec3 predicted_positions_this_frame[26]; - xrt_hand_joint_set predicted_positions_this_frame; + add = n_minus_one - n_minus_two; - for (int i = 0; i < 26; i++) { - positions_last_frame[i] = - set_one_frame_ago->values.hand_joint_set_default[i].relation.pose.position; + add *= (dt_now * hgt->tuneable_values.amount_to_lerp_prediction.val) / dt_past; - //!@todo I dunno if this is right. - // Number of times this has been changed without rigorously testing: 1 - float lerp_between_last_frame_and_predicted = - hgt->tuneable_values.amount_to_lerp_prediction.val; - predicted_positions_this_frame.values.hand_joint_set_default[i].relation.pose.position = - positions_last_frame[i] + (vels[i] * dt_now * lerp_between_last_frame_and_predicted); - } - xrt_vec2 centers[2] = {}; - float radii[2] = {}; - int num_outside[2] = {0, 0}; + hgt->pose_predicted_keypoints[hand_idx] = n_minus_one + add; - back_project( // - hgt, // - &predicted_positions_this_frame, // - hgt->tuneable_values.scribble_predictions_into_this_frame && hgt->debug_scribble, // - centers, // - radii, // - num_outside); + + int num_outside[2]; + back_project(hgt, hgt->pose_predicted_keypoints[hand_idx], hand_idx, + hgt->tuneable_values.scribble_predictions_into_next_frame && hgt->debug_scribble, + num_outside); for (int view_idx = 0; view_idx < 2; view_idx++) { if (num_outside[view_idx] < hgt->tuneable_values.max_num_outside_view) { - hgt->views[view_idx].bboxes_this_frame[hand_idx].center = centers[view_idx]; - hgt->views[view_idx].bboxes_this_frame[hand_idx].size_px = radii[view_idx]; - hgt->views[view_idx].bboxes_this_frame[hand_idx].found = true; + hgt->views[view_idx].regions_of_interest_this_frame[hand_idx].provenance = + ROIProvenance::POSE_PREDICTION; + hgt->views[view_idx].regions_of_interest_this_frame[hand_idx].found = true; + } else { - hgt->views[view_idx].bboxes_this_frame[hand_idx].found = false; - } - } - } - - if (hgt->debug_scribble) { - - for (int view_idx = 0; view_idx < 2; view_idx++) { - for (int hand_idx = 0; hand_idx < 2; hand_idx++) { - if (!hgt->views[view_idx].bboxes_this_frame[hand_idx].found) { - continue; - } - xrt_vec2 &_pt = hgt->views[view_idx].bboxes_this_frame[hand_idx].center; - float &size = hgt->views[view_idx].bboxes_this_frame[hand_idx].size_px; - cv::Point2i pt(_pt.x, _pt.y); - cv::rectangle(hgt->views[view_idx].debug_out_to_this, - cv::Rect(pt - cv::Point2i(size / 2, size / 2), cv::Size(size, size)), - colors[hand_idx], 1); + hgt->views[view_idx].regions_of_interest_this_frame[hand_idx].found = false; } } } @@ -580,8 +608,8 @@ predict_new_regions_of_interest(struct HandTracking *hgt) // around. // // If we were only concerned about the first one, we'd do some simple depth testing to figure out which one is -// closer to the hand and only discard the further-away hand. But the second one is such a common (and bad) failure mode -// that we really just need to stop tracking all hands if they start overlapping. +// closer to the hand and only discard the further-away hand. But the second one is such a common (and bad) +// failure mode that we really just need to stop tracking all hands if they start overlapping. //!@todo I really want to try making a discrete optimizer that looks at recent info and decides whether to drop tracking //! for a hand, switch its handedness or switch to some forthcoming overlapping-hands model. This would likely work by @@ -591,15 +619,15 @@ stop_everything_if_hands_are_overlapping(struct HandTracking *hgt) { bool ok = true; for (int view_idx = 0; view_idx < 2; view_idx++) { - hand_bounding_box left_box = hgt->views[view_idx].bboxes_this_frame[0]; - hand_bounding_box right_box = hgt->views[view_idx].bboxes_this_frame[1]; + hand_region_of_interest left_box = hgt->views[view_idx].regions_of_interest_this_frame[0]; + hand_region_of_interest right_box = hgt->views[view_idx].regions_of_interest_this_frame[1]; if (!left_box.found || !right_box.found) { continue; } - box_iou::Box this_nbox(left_box.center, right_box.size_px); - box_iou::Box other_nbox(right_box.center, right_box.size_px); + box_iou::Box this_nbox(left_box.center_px, right_box.size_px); + box_iou::Box other_nbox(right_box.center_px, right_box.size_px); float iou = box_iou::boxIOU(this_nbox, other_nbox); - if (iou > hgt->tuneable_values.max_permissible_iou.val) { + if (iou > hgt->tuneable_values.mpiou_any.val) { HG_DEBUG(hgt, "Stopped tracking because iou was %f in view %d", iou, view_idx); ok = false; break; @@ -608,12 +636,19 @@ stop_everything_if_hands_are_overlapping(struct HandTracking *hgt) if (!ok) { for (int view_idx = 0; view_idx < 2; view_idx++) { for (int hand_idx = 0; hand_idx < 2; hand_idx++) { - hgt->views[view_idx].bboxes_this_frame[hand_idx].found = false; + hgt->views[view_idx].regions_of_interest_this_frame[hand_idx].found = false; } } } } +bool +hand_too_far(struct HandTracking *hgt, xrt_hand_joint_set &set) +{ + xrt_vec3 dp = set.values.hand_joint_set_default[XRT_HAND_JOINT_PALM].relation.pose.position; + return (m_vec3_len(dp) > hgt->tuneable_values.max_hand_dist.val); +} + void scribble_image_boundary(struct HandTracking *hgt) { @@ -674,8 +709,6 @@ HandTracking::~HandTracking() u_frame_times_widget_teardown(&this->ft_widget); } - -// THIS FUNCTION MUST NEVER EXPLICITLY RETURN, BECAUSE OF tick_up. void HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, struct xrt_frame *left_frame, @@ -751,14 +784,12 @@ HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, scribble_image_boundary(hgt); // Let's check that the collage size is actually as big as we think it is - static_assert(720 == (kVisSpacerSize + ((kKeypointInputSize + kVisSpacerSize) * 4) + - ((kDetectionInputSize + 8)))); + static_assert(1064 == (8 + ((128 + 8) * 4) + ((320 + 8)) + ((80 + 8) * 2) + 8)); + static_assert(504 == (240 + 240 + 8 + 8 + 8)); + static_assert(552 == (8 + (128 + 8) * 4)); - static_assert(344 == (kDetectionInputSize + kDetectionInputSize + // - kVisSpacerSize + kVisSpacerSize + kVisSpacerSize)); - - const int w = 720; - const int h = 344; + const int w = 1064; + const int h = 552; u_frame_create_one_off(XRT_FORMAT_L8, w, h, &hgt->visualizers.xrtframe); hgt->visualizers.xrtframe->timestamp = hgt->current_frame_timestamp; @@ -781,25 +812,22 @@ HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, check_new_user_event(hgt); + + // Every now and then if we're not already tracking both hands, try to detect new hands. bool saw_both_hands_last_frame = hgt->last_frame_hand_detected[0] && hgt->last_frame_hand_detected[1]; if (!saw_both_hands_last_frame) { dispatch_and_process_hand_detections(hgt); } - // For already-tracked hands, predict where we think they should be in image space based on the past two - // frames. Note that this always happens We want to pose-predict already tracked hands but not mess with - // just-detected hands - if (!hgt->tuneable_values.always_run_detection_model) { - predict_new_regions_of_interest(hgt); - } stop_everything_if_hands_are_overlapping(hgt); //!@todo does this go here? - // If no hand regions of interest were found anywhere, there's no hand - register that in the state tracker + // If no hand regions of interest were found anywhere, there's no hand - register that in the state + // tracker for (int hand_idx = 0; hand_idx < 2; hand_idx++) { - if (!(hgt->views[0].bboxes_this_frame[hand_idx].found || - hgt->views[1].bboxes_this_frame[hand_idx].found)) { + if (!(hgt->views[0].regions_of_interest_this_frame[hand_idx].found || + hgt->views[1].regions_of_interest_this_frame[hand_idx].found)) { hgt->this_frame_hand_detected[hand_idx] = false; } } @@ -808,8 +836,7 @@ HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, // Dispatch keypoint estimator neural nets for (int hand_idx = 0; hand_idx < 2; hand_idx++) { for (int view_idx = 0; view_idx < 2; view_idx++) { - - if (!hgt->views[view_idx].bboxes_this_frame[hand_idx].found) { + if (!hgt->views[view_idx].regions_of_interest_this_frame[hand_idx].found) { continue; } @@ -822,15 +849,14 @@ HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, } u_worker_group_wait_all(hgt->group); - // Spaghetti logic for optimizing hand size bool any_hands_are_only_visible_in_one_view = false; for (int hand_idx = 0; hand_idx < 2; hand_idx++) { - any_hands_are_only_visible_in_one_view = // - any_hands_are_only_visible_in_one_view || // - (hgt->views[0].bboxes_this_frame[hand_idx].found != // - hgt->views[1].bboxes_this_frame[hand_idx].found); + any_hands_are_only_visible_in_one_view = // + any_hands_are_only_visible_in_one_view || // + (hgt->views[0].regions_of_interest_this_frame[hand_idx].found != // + hgt->views[1].regions_of_interest_this_frame[hand_idx].found); } constexpr float mul_max = 1.0; @@ -840,12 +866,12 @@ HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, if ((hgt->refinement.hand_size_refinement_schedule_x > frame_max)) { hgt->refinement.hand_size_refinement_schedule_y = mul_max; optimize_hand_size = false; - hgt->refinement.optimizing = false; } else { hgt->refinement.hand_size_refinement_schedule_y = powf((hgt->refinement.hand_size_refinement_schedule_x / frame_max), 2) * mul_max; optimize_hand_size = true; + hgt->refinement.optimizing = true; } if (any_hands_are_only_visible_in_one_view) { @@ -861,81 +887,132 @@ HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, std::min(hgt->refinement.hand_size_refinement_schedule_x, frame_max / 2); } + optimize_hand_size = optimize_hand_size && hgt->tuneable_values.optimize_hand_size; + int num_hands = 0; float avg_hand_size = 0; // Dispatch the optimizers! for (int hand_idx = 0; hand_idx < 2; hand_idx++) { + + + for (int view_idx = 0; view_idx < 2; view_idx++) { + if (!hgt->views[view_idx].regions_of_interest_this_frame[hand_idx].found) { + // to the next view + continue; + } + + if (!hgt->keypoint_outputs[hand_idx].views[view_idx].active) { + HG_DEBUG(hgt, "Removing hand %d because keypoint estimator said to!", hand_idx); + hgt->this_frame_hand_detected[hand_idx] = false; + } + } + if (!hgt->this_frame_hand_detected[hand_idx]) { continue; } - one_frame_input input; for (int view = 0; view < 2; view++) { - keypoint_output *from_model = &hgt->views[view].keypoint_outputs[hand_idx]; - input.views[view].active = hgt->views[view].bboxes_this_frame[hand_idx].found; - if (!input.views[view].active) { - continue; - } - for (int i = 0; i < 21; i++) { - input.views[view].confidences[i] = from_model->hand_tan_space.confidences[i]; - // std::cout << input.views[view].confidences[i] << std::endl; - input.views[view].rays[i] = correct_direction(from_model->hand_tan_space.kps[i]); + hand_region_of_interest &from_model = hgt->views[view].regions_of_interest_this_frame[hand_idx]; + if (!from_model.found) { + hgt->keypoint_outputs[hand_idx].views[view].active = false; } } + if (hgt->tuneable_values.scribble_keypoint_model_outputs && hgt->debug_scribble) { + for (int view_idx = 0; view_idx < 2; view_idx++) { + if (!hgt->keypoint_outputs[hand_idx].views[view_idx].active) { + continue; + } + + back_project_keypoint_output(hgt, hand_idx, view_idx); + } + } struct xrt_hand_joint_set *put_in_set = out_xrt_hands[hand_idx]; lm::KinematicHandLM *hand = hgt->kinematic_hands[hand_idx]; - //!@todo - // ABOUT TWO MINUTES OF THOUGHT WERE PUT INTO THIS VALUE - float reprojection_error_threshold = 0.35f; + float reprojection_error_threshold = hgt->tuneable_values.max_reprojection_error.val; + float smoothing_factor = hgt->tuneable_values.opt_smooth_factor.val; + + if (hgt->last_frame_hand_detected[hand_idx]) { + if (hgt->tuneable_values.enable_framerate_based_smoothing) { + int64_t one_before = *hgt->history_timestamps.get_at_age(0); + int64_t now = hgt->current_frame_timestamp; + + uint64_t diff = now - one_before; + double diff_d = time_ns_to_s(diff); + smoothing_factor = hgt->tuneable_values.opt_smooth_factor.val * (1 / 60.0f) / diff_d; + } + } else { + reprojection_error_threshold = hgt->tuneable_values.max_reprojection_error.val; + } + + float out_hand_size; - //!@todo Optimize: We can have one of these on each thread + //!@todo optimize: We can have one of these on each thread float reprojection_error; - lm::optimizer_run(hand, // - input, // - !hgt->last_frame_hand_detected[hand_idx], // + lm::optimizer_run(hand, // + hgt->keypoint_outputs[hand_idx], // + !hgt->last_frame_hand_detected[hand_idx], // + smoothing_factor, optimize_hand_size, // hgt->target_hand_size, // hgt->refinement.hand_size_refinement_schedule_y, // - *put_in_set, // - out_hand_size, // + hgt->tuneable_values.amt_use_depth.val, + *put_in_set, // + out_hand_size, // reprojection_error); - avg_hand_size += out_hand_size; - num_hands++; + if (reprojection_error > reprojection_error_threshold) { HG_DEBUG(hgt, "Reprojection error above threshold!"); hgt->this_frame_hand_detected[hand_idx] = false; - continue; } + + if (hand_too_far(hgt, *put_in_set)) { + HG_DEBUG(hgt, "Hand too far away"); + hgt->this_frame_hand_detected[hand_idx] = false; + continue; + } + + + avg_hand_size += out_hand_size; + num_hands++; + if (!any_hands_are_only_visible_in_one_view) { hgt->refinement.hand_size_refinement_schedule_x += - hand_confidence_value(reprojection_error, input); + hand_confidence_value(reprojection_error, hgt->keypoint_outputs[hand_idx]); } u_hand_joints_apply_joint_width(put_in_set); - // Just debug scribbling - remove this in hard production environment - if (hgt->tuneable_values.scribble_optimizer_outputs && hgt->debug_scribble) { - back_project(hgt, put_in_set, true, NULL, NULL, NULL); - } + put_in_set->hand_pose.pose = hgt->hand_pose_camera_offset; put_in_set->hand_pose.relation_flags = valid_flags_ht; + Eigen::Array asf = {}; - hgt->histories[hand_idx].hands.push_back(*put_in_set); - hgt->histories[hand_idx].timestamps.push_back(hgt->current_frame_timestamp); + + + hand_joint_set_to_eigen_21(*put_in_set, asf); + + back_project(hgt, // + asf, // + hand_idx, // + hgt->tuneable_values.scribble_optimizer_outputs && hgt->debug_scribble, // + NULL // + ); + + hgt->history_hands[hand_idx].push_back(asf); } // More hand-size-optimization spaghetti @@ -952,15 +1029,32 @@ HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, hgt->hand_seen_before[hand_idx] || hgt->this_frame_hand_detected[hand_idx]; if (!hgt->last_frame_hand_detected[hand_idx]) { - hgt->views[0].bboxes_this_frame[hand_idx].found = false; - hgt->views[1].bboxes_this_frame[hand_idx].found = false; - hgt->histories[hand_idx].hands.clear(); - hgt->histories[hand_idx].timestamps.clear(); + hgt->views[0].regions_of_interest_this_frame[hand_idx].found = false; + hgt->views[1].regions_of_interest_this_frame[hand_idx].found = false; + hgt->history_hands[hand_idx].clear(); } } + // estimators next frame. Also, if next frame's hand will be outside of the camera's field of view, mark it as + // inactive this frame. This stops issues where our hand detector detects hands that are slightly too close to + // the edge, causing flickery hands. + if (!hgt->tuneable_values.always_run_detection_model) { + predict_new_regions_of_interest(hgt); + bool still_found[2] = {hgt->last_frame_hand_detected[0], hgt->last_frame_hand_detected[1]}; + still_found[0] = hgt->views[0].regions_of_interest_this_frame[0].found || + hgt->views[1].regions_of_interest_this_frame[0].found; + still_found[1] = hgt->views[0].regions_of_interest_this_frame[1].found || + hgt->views[1].regions_of_interest_this_frame[1].found; + + for (int hand_idx = 0; hand_idx < 2; hand_idx++) { + out_xrt_hands[hand_idx]->is_active = still_found[hand_idx]; + } + } + + // If the debug UI is active, push to the frame-timing widget u_frame_times_widget_push_sample(&hgt->ft_widget, hgt->current_frame_timestamp); + hgt->history_timestamps.push_back(hgt->current_frame_timestamp); // If the debug UI is active, push our debug frame if (hgt->debug_scribble) { @@ -1002,7 +1096,7 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib, { XRT_TRACE_MARKER(); - auto hgt = new xrt::tracking::hand::mercury::HandTracking(); + xrt::tracking::hand::mercury::HandTracking *hgt = new xrt::tracking::hand::mercury::HandTracking(); // Setup logging first. We like logging. hgt->log_level = xrt::tracking::hand::mercury::debug_get_log_option_mercury_log(); @@ -1053,17 +1147,26 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib, u_var_add_ro_f32(hgt, &hgt->ft_widget.fps, "FPS!"); u_var_add_f32_timing(hgt, hgt->ft_widget.debug_var, "Frame timing!"); - u_var_add_ro_f32(hgt, &hgt->target_hand_size, "Hand size (Meters between wrist and middle-proximal joint)"); + u_var_add_f32(hgt, &hgt->target_hand_size, "Hand size (Meters between wrist and middle-proximal joint)"); u_var_add_ro_f32(hgt, &hgt->refinement.hand_size_refinement_schedule_x, "Schedule (X value)"); u_var_add_ro_f32(hgt, &hgt->refinement.hand_size_refinement_schedule_y, "Schedule (Y value)"); u_var_add_bool(hgt, &hgt->tuneable_values.new_user_event, "Trigger new-user event!"); + hgt->tuneable_values.optimize_hand_size = debug_get_bool_option_mercury_optimize_hand_size(); + hgt->tuneable_values.dyn_radii_fac.max = 4.0f; hgt->tuneable_values.dyn_radii_fac.min = 0.3f; hgt->tuneable_values.dyn_radii_fac.step = 0.02f; - hgt->tuneable_values.dyn_radii_fac.val = 3.0f; + hgt->tuneable_values.dyn_radii_fac.val = 1.7f; + + + hgt->tuneable_values.after_detection_fac.max = 1.0f; + hgt->tuneable_values.after_detection_fac.min = 0.1f; + hgt->tuneable_values.after_detection_fac.step = 0.01f; + // note that sqrt2/2 is what should make sense, but I tuned it down to this. Detection model needs work. + hgt->tuneable_values.after_detection_fac.val = 0.65f; // but 0.5 is closer to what we actually want hgt->tuneable_values.dyn_joint_y_angle_error.max = 40.0f; hgt->tuneable_values.dyn_joint_y_angle_error.min = 0.0f; @@ -1075,20 +1178,72 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib, hgt->tuneable_values.amount_to_lerp_prediction.min = -1.5f; hgt->tuneable_values.amount_to_lerp_prediction.step = 0.01f; hgt->tuneable_values.amount_to_lerp_prediction.val = 0.4f; - hgt->tuneable_values.max_permissible_iou.max = 1.0f; - hgt->tuneable_values.max_permissible_iou.min = 0.0f; - hgt->tuneable_values.max_permissible_iou.step = 0.01f; - hgt->tuneable_values.max_permissible_iou.val = 0.8f; + + hgt->tuneable_values.amt_use_depth.max = 1.0f; + hgt->tuneable_values.amt_use_depth.min = 0.0f; + hgt->tuneable_values.amt_use_depth.step = 0.01f; + hgt->tuneable_values.amt_use_depth.val = 0.01f; + + hgt->tuneable_values.mpiou_any.max = 1.0f; + hgt->tuneable_values.mpiou_any.min = 0.0f; + hgt->tuneable_values.mpiou_any.step = 0.01f; + hgt->tuneable_values.mpiou_any.val = 0.7f; + + hgt->tuneable_values.mpiou_double_detection.max = 1.0f; + hgt->tuneable_values.mpiou_double_detection.min = 0.0f; + hgt->tuneable_values.mpiou_double_detection.step = 0.01f; + hgt->tuneable_values.mpiou_double_detection.val = 0.4f; + + hgt->tuneable_values.mpiou_single_detection.max = 1.0f; + hgt->tuneable_values.mpiou_single_detection.min = 0.0f; + hgt->tuneable_values.mpiou_single_detection.step = 0.01f; + hgt->tuneable_values.mpiou_single_detection.val = 0.2f; + + hgt->tuneable_values.max_reprojection_error.max = 600.0f; + hgt->tuneable_values.max_reprojection_error.min = 0.0f; + hgt->tuneable_values.max_reprojection_error.step = 0.001f; + hgt->tuneable_values.max_reprojection_error.val = 15.0f; + + hgt->tuneable_values.opt_smooth_factor.max = 30.0f; + hgt->tuneable_values.opt_smooth_factor.min = 0.0f; + hgt->tuneable_values.opt_smooth_factor.step = 0.01f; + hgt->tuneable_values.opt_smooth_factor.val = 2.0f; + + hgt->tuneable_values.max_hand_dist.max = 1000000.0f; + hgt->tuneable_values.max_hand_dist.min = 0.0f; + hgt->tuneable_values.max_hand_dist.step = 0.05f; + hgt->tuneable_values.max_hand_dist.val = 1.7f; + + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.amt_use_depth, "Amount to use depth prediction"); + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.dyn_radii_fac, "radius factor (predict)"); + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.after_detection_fac, "radius factor (after hand detection)"); u_var_add_draggable_f32(hgt, &hgt->tuneable_values.dyn_joint_y_angle_error, "max error hand joint"); u_var_add_draggable_f32(hgt, &hgt->tuneable_values.amount_to_lerp_prediction, "Amount to lerp pose-prediction"); - u_var_add_draggable_f32(hgt, &hgt->tuneable_values.max_permissible_iou, "Max permissible IOU"); + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.mpiou_any, "Max permissible IOU (Any)"); + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.mpiou_double_detection, + "Max permissible IOU (For suppressing double detections)"); + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.mpiou_single_detection, + "Max permissible IOU (For suppressing single detections)"); + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.max_reprojection_error, "Max reprojection error"); + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.opt_smooth_factor, "Optimizer smoothing factor"); + u_var_add_draggable_f32(hgt, &hgt->tuneable_values.max_hand_dist, "Max hand distance"); - u_var_add_bool(hgt, &hgt->tuneable_values.scribble_predictions_into_this_frame, "Scribble pose-predictions"); + u_var_add_i32(hgt, &hgt->tuneable_values.max_num_outside_view, + "max allowed number of hand joints outside view"); + + u_var_add_bool(hgt, &hgt->tuneable_values.scribble_predictions_into_next_frame, + "Scribble pose-predictions into next frame"); u_var_add_bool(hgt, &hgt->tuneable_values.scribble_keypoint_model_outputs, "Scribble keypoint model output"); u_var_add_bool(hgt, &hgt->tuneable_values.scribble_optimizer_outputs, "Scribble kinematic optimizer output"); u_var_add_bool(hgt, &hgt->tuneable_values.always_run_detection_model, "Always run detection model"); + u_var_add_bool(hgt, &hgt->tuneable_values.optimize_hand_size, "Optimize hand size"); + u_var_add_bool(hgt, &hgt->tuneable_values.enable_pose_predicted_input, + "Enable pose-predicted input to keypoint model"); + u_var_add_bool(hgt, &hgt->tuneable_values.enable_framerate_based_smoothing, + "Enable framerate-based smoothing (Don't use; surprisingly seems to make things worse)"); + u_var_add_sink_debug(hgt, &hgt->debug_sink_ann, "Annotated camera feeds"); diff --git a/src/xrt/tracking/hand/mercury/hg_sync.hpp b/src/xrt/tracking/hand/mercury/hg_sync.hpp index fbe4caf3d..5c1c75464 100644 --- a/src/xrt/tracking/hand/mercury/hg_sync.hpp +++ b/src/xrt/tracking/hand/mercury/hg_sync.hpp @@ -12,9 +12,10 @@ #include "hg_interface.h" -#include "tracking/t_calibration_opencv.hpp" #include "tracking/t_hand_tracking.h" +#include "tracking/t_camera_models.h" + #include "xrt/xrt_defines.h" #include "xrt/xrt_frame.h" @@ -22,6 +23,7 @@ #include "math/m_vec2.h" #include "math/m_vec3.h" #include "math/m_mathinclude.h" +#include "math/m_eigen_interop.hpp" #include "util/u_frame_times_widget.h" #include "util/u_logging.h" @@ -48,10 +50,10 @@ #include "kine_lm/lm_interface.hpp" - namespace xrt::tracking::hand::mercury { using namespace xrt::auxiliary::util; +using namespace xrt::auxiliary::math; #define HG_TRACE(hgt, ...) U_LOG_IFL_T(hgt->log_level, __VA_ARGS__) #define HG_DEBUG(hgt, ...) U_LOG_IFL_D(hgt->log_level, __VA_ARGS__) @@ -68,30 +70,85 @@ static constexpr uint16_t kVisSpacerSize = 8; static const cv::Scalar RED(255, 30, 30); static const cv::Scalar YELLOW(255, 255, 0); static const cv::Scalar PINK(255, 0, 255); +static const cv::Scalar GREEN(0, 255, 0); static const cv::Scalar colors[2] = {YELLOW, RED}; +constexpr enum xrt_hand_joint joints_5x5_to_26[5][5] = { + { + XRT_HAND_JOINT_WRIST, + XRT_HAND_JOINT_THUMB_METACARPAL, + XRT_HAND_JOINT_THUMB_PROXIMAL, + XRT_HAND_JOINT_THUMB_DISTAL, + XRT_HAND_JOINT_THUMB_TIP, + }, + { + XRT_HAND_JOINT_INDEX_METACARPAL, + XRT_HAND_JOINT_INDEX_PROXIMAL, + XRT_HAND_JOINT_INDEX_INTERMEDIATE, + XRT_HAND_JOINT_INDEX_DISTAL, + XRT_HAND_JOINT_INDEX_TIP, + }, + { + XRT_HAND_JOINT_MIDDLE_METACARPAL, + XRT_HAND_JOINT_MIDDLE_PROXIMAL, + XRT_HAND_JOINT_MIDDLE_INTERMEDIATE, + XRT_HAND_JOINT_MIDDLE_DISTAL, + XRT_HAND_JOINT_MIDDLE_TIP, + }, + { + XRT_HAND_JOINT_RING_METACARPAL, + XRT_HAND_JOINT_RING_PROXIMAL, + XRT_HAND_JOINT_RING_INTERMEDIATE, + XRT_HAND_JOINT_RING_DISTAL, + XRT_HAND_JOINT_RING_TIP, + }, + { + XRT_HAND_JOINT_LITTLE_METACARPAL, + XRT_HAND_JOINT_LITTLE_PROXIMAL, + XRT_HAND_JOINT_LITTLE_INTERMEDIATE, + XRT_HAND_JOINT_LITTLE_DISTAL, + XRT_HAND_JOINT_LITTLE_TIP, + }, +}; + +namespace ROIProvenance { + enum ROIProvenance + { + HAND_DETECTION, + POSE_PREDICTION + }; +} + + // Forward declaration for ht_view struct HandTracking; struct ht_view; -// Using the compiler to stop me from getting 2D space mixed up with 3D space. -struct Hand2D -{ - struct xrt_vec2 kps[21]; - float confidences[21]; -}; struct Hand3D { struct xrt_vec3 kps[21]; }; +using hand21_2d = std::array; + +struct projection_instructions +{ + Eigen::Quaternionf rot_quat = Eigen::Quaternionf::Identity(); + float stereographic_radius = 0; + bool flip = false; + const t_camera_model_params &dist; + + projection_instructions(const t_camera_model_params &dist) : dist(dist) {} +}; + struct model_input_wrap { float *data = nullptr; - // int64_t isn't a bug; that's what onnxruntime wants. - std::vector dimensions = {}; + int64_t dimensions[4]; + size_t num_dimensions = 0; + OrtValue *tensor = nullptr; const char *name; }; @@ -104,31 +161,37 @@ struct onnx_wrap OrtMemoryInfo *meminfo = nullptr; OrtSession *session = nullptr; - std::vector wraps; + std::vector wraps = {}; }; -struct hand_bounding_box +// Multipurpose. +// * Hand detector writes into center_px, size_px, found and hand_detection_confidence +// * Keypoint estimator operates on this to a direction/radius for the stereographic projection, and for the associated +// keypoints. +struct hand_region_of_interest { - xrt_vec2 center; + ROIProvenance::ROIProvenance provenance; + + // Either set by the detection model or by predict_new_regions_of_interest/back_project + xrt_vec2 center_px; float size_px; + bool found; - float confidence; + bool hand_detection_confidence; }; + + struct hand_detection_run_info { ht_view *view; - hand_bounding_box *outputs[2]; + // These are not duplicates of ht_view's regions_of_interest_this_frame! + // If some hands are already tracked, we have logic that only copies new ROIs to this frame's regions of + // interest. + hand_region_of_interest outputs[2]; }; -struct keypoint_output -{ - Hand2D hand_px_coord; - Hand2D hand_tan_space; - float confidences[21]; -}; - struct keypoint_estimation_run_info { ht_view *view; @@ -144,33 +207,19 @@ struct ht_view struct t_camera_extra_info_one_view camera_info; - cv::Mat distortion; - cv::Matx cameraMatrix; - cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2 + t_camera_model_params hgdist_orig; + // With fx, fy, cx, cy scaled to the current camera resolution as appropriate. + t_camera_model_params hgdist; + cv::Mat run_model_on_this; cv::Mat debug_out_to_this; - struct hand_bounding_box bboxes_this_frame[2]; // left, right + struct hand_region_of_interest regions_of_interest_this_frame[2]; // left, right struct keypoint_estimation_run_info run_info[2]; - - struct keypoint_output keypoint_outputs[2]; }; -struct hand_history -{ - HistoryBuffer hands; - HistoryBuffer timestamps; -}; - -struct output_struct_one_frame -{ - xrt_vec2 left[21]; - float confidences_left[21]; - xrt_vec2 right[21]; - float confidences_right[21]; -}; struct hand_size_refinement { @@ -209,7 +258,6 @@ public: float multiply_px_coord_for_undistort; - bool use_fisheye; struct t_stereo_camera_calibration *calib; @@ -240,22 +288,44 @@ public: lm::KinematicHandLM *kinematic_hands[2]; - // struct hand_detection_state_tracker st_det[2] = {}; + // These are produced by the keypoint estimator and consumed by the nonlinear optimizer + // left hand, right hand THEN left view, right view + struct one_frame_input keypoint_outputs[2]; + + // Used to track whether this hand has *ever* been seen during this user's session, so that we can spend some + // extra time optimizing their hand size if one of their hands isn't visible for the first bit. bool hand_seen_before[2] = {false, false}; + + // Used to: + // * see if a hand is currently being tracked. + // * If so, don't replace the bounding box with that from a hand detection. + // * Also, if both hands are being tracked, we just don't run the hand detector. bool last_frame_hand_detected[2] = {false, false}; + + // Used to decide whether to run the keypoint estimator/nonlinear optimizer. bool this_frame_hand_detected[2] = {false, false}; - struct hand_history histories[2]; + // Used to determine pose-predicted regions of interest. Contains the last five hand keypoint positions, or less + // if the hand has just started being tracked. + HistoryBuffer, 5> history_hands[2] = {}; + + // Contains the last 5 timestamps, or less if hand tracking has just started. + HistoryBuffer history_timestamps = {}; + + + // left hand, right hand + Eigen::Array pose_predicted_keypoints[2]; int detection_counter = 0; struct hand_size_refinement refinement = {}; - // Moses hand size is ~0.095; they has big-ish hands so let's do 0.09 - float target_hand_size = 0.09; + float target_hand_size = STANDARD_HAND_SIZE; xrt_frame *debug_frame; + + // This should be removed. void (*keypoint_estimation_run_func)(void *); @@ -267,15 +337,25 @@ public: struct { bool new_user_event = false; + struct u_var_draggable_f32 after_detection_fac; struct u_var_draggable_f32 dyn_radii_fac; struct u_var_draggable_f32 dyn_joint_y_angle_error; struct u_var_draggable_f32 amount_to_lerp_prediction; - struct u_var_draggable_f32 max_permissible_iou; - bool scribble_predictions_into_this_frame = false; + struct u_var_draggable_f32 amt_use_depth; + struct u_var_draggable_f32 mpiou_any; + struct u_var_draggable_f32 mpiou_single_detection; + struct u_var_draggable_f32 mpiou_double_detection; + struct u_var_draggable_f32 max_reprojection_error; + struct u_var_draggable_f32 opt_smooth_factor; + struct u_var_draggable_f32 max_hand_dist; + bool scribble_predictions_into_next_frame = false; bool scribble_keypoint_model_outputs = false; bool scribble_optimizer_outputs = true; - bool always_run_detection_model = false; - int max_num_outside_view = 3; + bool always_run_detection_model = false; // true + bool optimize_hand_size = true; + int max_num_outside_view = 6; + bool enable_pose_predicted_input = true; + bool enable_framerate_based_smoothing = false; } tuneable_values; @@ -317,4 +397,33 @@ run_keypoint_estimation(void *ptr); void release_onnx_wrap(onnx_wrap *wrap); + +void +make_projection_instructions(t_camera_model_params &dist, + bool flip_after, + float expand_val, + float twist, + Eigen::Array &joints, + projection_instructions &out_instructions, + hand21_2d &out_hand); + + +void +make_projection_instructions_angular(xrt_vec3 direction_3d, + bool flip_after, + float angular_radius, + float expand_val, + float twist, + projection_instructions &out_instructions); + +void +stereographic_project_image(const t_camera_model_params &dist, + const projection_instructions &instructions, + cv::Mat &input_image, + cv::Mat *debug_image, + const cv::Scalar boundary_color, + cv::Mat &out); + + + } // namespace xrt::tracking::hand::mercury diff --git a/src/xrt/tracking/hand/mercury/kine_common.hpp b/src/xrt/tracking/hand/mercury/kine_common.hpp index 4f4d807e3..b341052b8 100644 --- a/src/xrt/tracking/hand/mercury/kine_common.hpp +++ b/src/xrt/tracking/hand/mercury/kine_common.hpp @@ -8,6 +8,7 @@ */ #pragma once +#include #include "xrt/xrt_defines.h" namespace xrt::tracking::hand::mercury { @@ -17,14 +18,35 @@ namespace xrt::tracking::hand::mercury { // Different from `Scalar` in lm_* templates - those can be `Ceres::Jet`s too. typedef float HandScalar; +// Used for "2.5D" joint locations, with a 2D ray direction in stereographic-space and a 1D depth relative to the +// middle-proximal joint. +struct vec2_5 +{ + xrt_vec2 pos_2d; + float depth_relative_to_midpxm; + + float confidence_xy; + float confidence_depth; +}; + +// Using the compiler to stop me from getting 2D space mixed up with 3D space. +using MLOutput2D = std::array; + +struct one_curl +{ + float value; + float variance; +}; // Inputs to kinematic optimizers //!@todo Ask Ryan if adding `= {}` only does something if we do one_frame_one_view bla = {}. struct one_frame_one_view { bool active = true; - xrt_vec3 rays[21] = {}; - HandScalar confidences[21] = {}; + xrt_quat look_dir; + float stereographic_radius; + MLOutput2D keypoints_in_scaled_stereographic; + one_curl curls[5]; }; struct one_frame_input @@ -64,4 +86,9 @@ namespace Joint21 { }; } +//!@todo These are not backed up by real anthropometry data; they are just guesstimates. Patches welcome! +constexpr HandScalar MIN_HAND_SIZE = 0.06; +constexpr HandScalar STANDARD_HAND_SIZE = 0.09; +constexpr HandScalar MAX_HAND_SIZE = 0.12; + } // namespace xrt::tracking::hand::mercury diff --git a/src/xrt/tracking/hand/mercury/kine_lm/lm_defines.hpp b/src/xrt/tracking/hand/mercury/kine_lm/lm_defines.hpp index eb4541705..8368d6d00 100644 --- a/src/xrt/tracking/hand/mercury/kine_lm/lm_defines.hpp +++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_defines.hpp @@ -8,11 +8,8 @@ */ #pragma once -// #include -// #include #include "math/m_mathinclude.h" #include "../kine_common.hpp" -#include namespace xrt::tracking::hand::mercury::lm { @@ -48,17 +45,21 @@ static constexpr size_t kNumOrientationsInFinger = 4; // Not tested/tuned well enough; might make tracking slow. #undef USE_HAND_PLAUSIBILITY +// Should work, but our neural nets aren't good enough yet. +#undef USE_HAND_CURLS + +#undef RESIDUALS_HACKING static constexpr size_t kMetacarpalBoneDim = 3; static constexpr size_t kProximalBoneDim = 2; -static constexpr size_t kFingerDim = kProximalBoneDim + kMetacarpalBoneDim + 2; +static constexpr size_t kFingerDim = kProximalBoneDim + 2; static constexpr size_t kThumbDim = kMetacarpalBoneDim + 2; static constexpr size_t kHandSizeDim = 1; static constexpr size_t kHandTranslationDim = 3; static constexpr size_t kHandOrientationDim = 3; - +// HRTC = Hand Residual Temporal Consistency static constexpr size_t kHRTC_HandSize = 1; static constexpr size_t kHRTC_RootBoneTranslation = 3; static constexpr size_t kHRTC_RootBoneOrientation = 3; // Direct difference between the two angle-axis rotations. This @@ -69,12 +70,21 @@ static constexpr size_t kHRTC_ThumbCurls = 2; static constexpr size_t kHRTC_ProximalSimilarity = 2; -static constexpr size_t kHRTC_FingerMCPSwingTwist = 3; +static constexpr size_t kHRTC_FingerMCPSwingTwist = 0; static constexpr size_t kHRTC_FingerPXMSwing = 2; static constexpr size_t kHRTC_FingerCurls = 2; static constexpr size_t kHRTC_CurlSimilarity = 1; -static constexpr size_t kHandResidualOneSideSize = 21 * 2; + +static constexpr size_t kHandResidualOneSideXY = (kNumNNJoints * 2); +static constexpr size_t kHandResidualOneSideDepth = 20; // one less because midxpm joint isn't used +#ifdef USE_HAND_CURLS +static constexpr size_t kHandResidualOneSideMatchCurls = 4; +#else +static constexpr size_t kHandResidualOneSideMatchCurls = 0; +#endif +static constexpr size_t kHandResidualOneSideSize = + kHandResidualOneSideXY + kHandResidualOneSideDepth + kHandResidualOneSideMatchCurls; static constexpr size_t kHandResidualTemporalConsistencyOneFingerSize = // kHRTC_FingerMCPSwingTwist + // @@ -97,27 +107,49 @@ static constexpr size_t kHandResidualTemporalConsistencySize = // 0; -// Factors to multiply different values by to get a smooth hand trajectory without introducing too much latency +class HandStability +{ +public: + HandScalar stabilityRoot; + HandScalar stabilityCurlRoot; + HandScalar stabilityOtherRoot; -// 1.0 is good, a little jittery. -// Anything above 3.0 generally breaks. -static constexpr HandScalar kStabilityRoot = 1.0; -static constexpr HandScalar kStabilityCurlRoot = kStabilityRoot * 0.03f; -static constexpr HandScalar kStabilityOtherRoot = kStabilityRoot * 0.03f; + HandScalar stabilityThumbMCPSwing; + HandScalar stabilityThumbMCPTwist; -static constexpr HandScalar kStabilityThumbMCPSwing = kStabilityCurlRoot * 1.5f; -static constexpr HandScalar kStabilityThumbMCPTwist = kStabilityCurlRoot * 1.5f; + HandScalar stabilityFingerMCPSwing; + HandScalar stabilityFingerMCPTwist; -static constexpr HandScalar kStabilityFingerMCPSwing = kStabilityCurlRoot * 3.0f; -static constexpr HandScalar kStabilityFingerMCPTwist = kStabilityCurlRoot * 3.0f; + HandScalar stabilityFingerPXMSwingX; + HandScalar stabilityFingerPXMSwingY; -static constexpr HandScalar kStabilityFingerPXMSwingX = kStabilityCurlRoot * 1.0f; -static constexpr HandScalar kStabilityFingerPXMSwingY = kStabilityCurlRoot * 1.6f; + HandScalar stabilityRootPosition; + HandScalar stabilityHandSize; -static constexpr HandScalar kStabilityRootPosition = kStabilityOtherRoot * 30; -static constexpr HandScalar kStabilityHandSize = kStabilityOtherRoot * 1000; + HandScalar stabilityHandOrientationZ; + HandScalar stabilityHandOrientationXY; + HandStability(float root = 15.0f) + { + this->stabilityRoot = root; + this->stabilityCurlRoot = this->stabilityRoot * 0.03f; + this->stabilityOtherRoot = this->stabilityRoot * 0.03f; -static constexpr HandScalar kStabilityHandOrientation = kStabilityOtherRoot * 3; + this->stabilityThumbMCPSwing = this->stabilityCurlRoot * 1.5f; + this->stabilityThumbMCPTwist = this->stabilityCurlRoot * 1.5f; + + this->stabilityFingerMCPSwing = this->stabilityCurlRoot * 3.0f; + this->stabilityFingerMCPTwist = this->stabilityCurlRoot * 3.0f; + + this->stabilityFingerPXMSwingX = this->stabilityCurlRoot * 0.6f; + this->stabilityFingerPXMSwingY = this->stabilityCurlRoot * 1.6f; + + this->stabilityRootPosition = this->stabilityOtherRoot * 25; + this->stabilityHandSize = this->stabilityOtherRoot * 1000; + + this->stabilityHandOrientationZ = this->stabilityOtherRoot * 0.5; + this->stabilityHandOrientationXY = this->stabilityOtherRoot * 0.8; + } +}; static constexpr HandScalar kPlausibilityRoot = 1.0; @@ -154,7 +186,6 @@ calc_input_size(bool optimize_hand_size) return out; } - constexpr size_t calc_residual_size(bool stability, bool optimize_hand_size, int num_views) { @@ -179,10 +210,10 @@ calc_residual_size(bool stability, bool optimize_hand_size, int num_views) template struct Quat { - Scalar x; - Scalar y; - Scalar z; - Scalar w; + Scalar x = {}; + Scalar y = {}; + Scalar z = {}; + Scalar w = {}; /// Default constructor - DOES NOT INITIALIZE VALUES constexpr Quat() {} @@ -221,9 +252,9 @@ template struct Vec3 { // Note that these are not initialized, for performance reasons. // If you want them initialized, use Zero() or something else - Scalar x; - Scalar y; - Scalar z; + Scalar x = {}; + Scalar y = {}; + Scalar z = {}; /// Default constructor - DOES NOT INITIALIZE VALUES constexpr Vec3() {} @@ -254,12 +285,26 @@ template struct Vec3 { return Vec3(0.f, 0.f, 0.f); } + + // Norm, vector length, whatever. + Scalar + norm() + { + Scalar len = (Scalar)(0); + + len += this->x * this->x; + len += this->y * this->y; + len += this->z * this->z; + + len = sqrt(len); + return len; + } }; template struct Vec2 { - Scalar x; - Scalar y; + Scalar x = {}; + Scalar y = {}; /// Default constructor - DOES NOT INITIALIZE VALUES constexpr Vec2() noexcept {} @@ -298,7 +343,7 @@ template struct Vec2 template struct ResidualHelper { - T *out_residual; + T *out_residual = nullptr; size_t out_residual_idx = 0; ResidualHelper(T *residual) : out_residual(residual) @@ -312,7 +357,4 @@ template struct ResidualHelper this->out_residual[out_residual_idx++] = value; } }; - - - } // namespace xrt::tracking::hand::mercury::lm diff --git a/src/xrt/tracking/hand/mercury/kine_lm/lm_interface.hpp b/src/xrt/tracking/hand/mercury/kine_lm/lm_interface.hpp index d61a9b8bf..4420f2301 100644 --- a/src/xrt/tracking/hand/mercury/kine_lm/lm_interface.hpp +++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_interface.hpp @@ -48,9 +48,11 @@ void optimizer_run(KinematicHandLM *hand, one_frame_input &observation, bool hand_was_untracked_last_frame, + float smoothing_factor, //!<- Unused if this is the first frame bool optimize_hand_size, float target_hand_size, float hand_size_err_mul, + float amt_use_depth, xrt_hand_joint_set &out_hand, float &out_hand_size, float &out_reprojection_error); diff --git a/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp b/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp index 645320015..449798ba3 100644 --- a/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp +++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp @@ -26,6 +26,8 @@ #include "lm_optimizer_params_packer.inl" #include "lm_defines.hpp" +#include "../hg_numerics_checker.hpp" + /* Some notes: @@ -37,49 +39,70 @@ namespace xrt::tracking::hand::mercury::lm { template struct StereographicObservation { - // T obs[kNumNNJoints][2]; Vec2 obs[kNumNNJoints]; }; + +template struct DepthObservation +{ + T depth_value[kNumNNJoints]; +}; + +template struct ResidualTracker +{ + T *out_residual = nullptr; + size_t out_residual_idx = {}; + + ResidualTracker(T *residual) : out_residual(residual) {} + + void + AddValue(T const &value) + { + this->out_residual[out_residual_idx++] = value; + } +}; + + struct KinematicHandLM { bool first_frame = true; bool use_stability = false; bool optimize_hand_size = true; bool is_right = false; + float smoothing_factor; int num_observation_views = 0; - one_frame_input *observation; + one_frame_input *observation = nullptr; - HandScalar target_hand_size; - HandScalar hand_size_err_mul; - u_logging_level log_level; + HandScalar target_hand_size = {}; + HandScalar hand_size_err_mul = {}; + HandScalar depth_err_mul = {}; - StereographicObservation sgo[2]; + u_logging_level log_level = U_LOGGING_INFO; - Quat last_frame_pre_rotation; - OptimizerHand last_frame; + Quat last_frame_pre_rotation = {}; + OptimizerHand last_frame = {}; // The pose that will take you from the right camera's space to the left camera's space. - xrt_pose left_in_right; + xrt_pose left_in_right = {}; // The translation part of the same pose, just easier for Ceres to consume - Vec3 left_in_right_translation; + Vec3 left_in_right_translation = {}; // The orientation part of the same pose, just easier for Ceres to consume - Quat left_in_right_orientation; + Quat left_in_right_orientation = {}; - Eigen::Matrix TinyOptimizerInput; + Eigen::Matrix TinyOptimizerInput = {}; }; template struct Translations55 { - Vec3 t[kNumFingers][kNumJointsInFinger]; + Vec3 t[kNumFingers][kNumJointsInFinger] = {}; }; template struct Orientations54 { - Quat q[kNumFingers][kNumJointsInFinger]; + Quat q[kNumFingers][kNumJointsInFinger] = {}; }; template struct CostFunctor @@ -217,6 +240,7 @@ eval_hand_set_rel_orientations(const OptimizerHand &opt, Orientations54 &r // Finger orientations for (int i = 0; i < 4; i++) { + //!@todo: In this version of our tracking, this is always constant. SwingTwistToQuaternion(opt.finger[i].metacarpal.swing, // opt.finger[i].metacarpal.twist, // rel_orientations.q[i + 1][0]); @@ -234,7 +258,7 @@ eval_hand_set_rel_orientations(const OptimizerHand &opt, Orientations54 &r template void eval_hand_with_orientation(const OptimizerHand &opt, - bool is_right, + const bool is_right, Translations55 &translations_absolute, Orientations54 &orientations_absolute) @@ -242,16 +266,16 @@ eval_hand_with_orientation(const OptimizerHand &opt, XRT_TRACE_MARKER(); - Translations55 rel_translations; //[kNumFingers][kNumJointsInFinger]; - Orientations54 rel_orientations; //[kNumFingers][kNumOrientationsInFinger]; + Translations55 rel_translations = {}; + Orientations54 rel_orientations = {}; eval_hand_set_rel_orientations(opt, rel_orientations); eval_hand_set_rel_translations(opt, rel_translations); - Quat orientation_root; + Quat orientation_root = {}; - Quat post_orientation_quat; + Quat post_orientation_quat = {}; AngleAxisToQuaternion(opt.wrist_post_orientation_aax, post_orientation_quat); @@ -305,27 +329,65 @@ eval_hand_with_orientation(const OptimizerHand &opt, } } +float +get_avg_curl_value(const one_frame_input &obs, const int finger) +{ + float avg = 0; + float sum = 0; + for (int view = 0; view < 2; view++) { + const one_frame_one_view &inp = obs.views[view]; + if (!inp.active) { + continue; + } + float conf = 1 / inp.curls[finger].variance; // Can't divide by 0, variance is always >0 + float val = inp.curls[finger].value; + avg += val * conf; + sum += conf; + } + return avg / sum; +} + +HandScalar +calc_stability_curl_multiplier(const OptimizerFinger &finger_last, HandScalar obs_curl) +{ + HandScalar last_curl_sum = HandScalar(finger_last.proximal_swing.x + finger_last.rots[0] + finger_last.rots[1]); + //!@todo Use the neural net's output variance somehow + HandScalar curl_disagreement = abs(obs_curl - last_curl_sum); + + HandScalar curl_sub_mul = 1.0f - curl_disagreement; + curl_sub_mul += 0.2; + + + curl_sub_mul = std::max(curl_sub_mul, 0); + curl_sub_mul = std::min(curl_sub_mul, 1.0); + + return curl_sub_mul; +} + template void -computeResidualStability_Finger(const OptimizerFinger &finger, - const OptimizerFinger &finger_last, +computeResidualStability_Finger(const one_frame_input &observation, + const HandStability &stab, + const OptimizerHand &hand, + const OptimizerHand &last_hand, + int finger_idx, ResidualHelper &helper) { - helper.AddValue((finger.metacarpal.swing.x - finger_last.metacarpal.swing.x) * kStabilityFingerMCPSwing); - - helper.AddValue((finger.metacarpal.swing.y - finger_last.metacarpal.swing.y) * kStabilityFingerMCPSwing); + const OptimizerFinger &finger = hand.finger[finger_idx]; + const OptimizerFinger &finger_last = last_hand.finger[finger_idx]; - - helper.AddValue((finger.metacarpal.twist - finger_last.metacarpal.twist) * kStabilityFingerMCPTwist); + HandScalar obs_curl = HandScalar(get_avg_curl_value(observation, finger_idx + 1)); + HandScalar curl_sub_mul = calc_stability_curl_multiplier(finger_last, obs_curl); + helper.AddValue((finger.proximal_swing.x - finger_last.proximal_swing.x) * // + stab.stabilityFingerPXMSwingX * curl_sub_mul); + helper.AddValue((finger.proximal_swing.y - finger_last.proximal_swing.y) * // + stab.stabilityFingerPXMSwingY); - helper.AddValue((finger.proximal_swing.x - finger_last.proximal_swing.x) * kStabilityFingerPXMSwingX); - helper.AddValue((finger.proximal_swing.y - finger_last.proximal_swing.y) * kStabilityFingerPXMSwingY); - - helper.AddValue((finger.rots[0] - finger_last.rots[0]) * kStabilityCurlRoot); - helper.AddValue((finger.rots[1] - finger_last.rots[1]) * kStabilityCurlRoot); + helper.AddValue((finger.rots[0] - finger_last.rots[0]) * stab.stabilityCurlRoot * curl_sub_mul); + helper.AddValue((finger.rots[1] - finger_last.rots[1]) * stab.stabilityCurlRoot * curl_sub_mul); #ifdef USE_HAND_PLAUSIBILITY if (finger.rots[0] < finger.rots[1]) { @@ -340,35 +402,40 @@ template void computeResidualStability(const OptimizerHand &hand, const OptimizerHand &last_hand, - KinematicHandLM &state, + const KinematicHandLM &state, ResidualHelper &helper) { + HandStability stab(state.smoothing_factor); + if constexpr (optimize_hand_size) { helper.AddValue( // - (hand.hand_size - state.target_hand_size) * (T)(kStabilityHandSize * state.hand_size_err_mul)); + (hand.hand_size - state.target_hand_size) * (T)(stab.stabilityHandSize * state.hand_size_err_mul)); } + if (state.first_frame) { return; } - helper.AddValue((last_hand.wrist_location.x - hand.wrist_location.x) * kStabilityRootPosition); - helper.AddValue((last_hand.wrist_location.y - hand.wrist_location.y) * kStabilityRootPosition); - helper.AddValue((last_hand.wrist_location.z - hand.wrist_location.z) * kStabilityRootPosition); + helper.AddValue((last_hand.wrist_location.x - hand.wrist_location.x) * stab.stabilityRootPosition); + helper.AddValue((last_hand.wrist_location.y - hand.wrist_location.y) * stab.stabilityRootPosition); + helper.AddValue((last_hand.wrist_location.z - hand.wrist_location.z) * stab.stabilityRootPosition); - helper.AddValue((hand.wrist_post_orientation_aax.x) * (T)(kStabilityHandOrientation)); - helper.AddValue((hand.wrist_post_orientation_aax.y) * (T)(kStabilityHandOrientation)); - helper.AddValue((hand.wrist_post_orientation_aax.z) * (T)(kStabilityHandOrientation)); + helper.AddValue((hand.wrist_post_orientation_aax.x) * (T)(stab.stabilityHandOrientationXY)); + helper.AddValue((hand.wrist_post_orientation_aax.y) * (T)(stab.stabilityHandOrientationXY)); + helper.AddValue((hand.wrist_post_orientation_aax.z) * (T)(stab.stabilityHandOrientationZ)); - helper.AddValue((hand.thumb.metacarpal.swing.x - last_hand.thumb.metacarpal.swing.x) * kStabilityThumbMCPSwing); - helper.AddValue((hand.thumb.metacarpal.swing.y - last_hand.thumb.metacarpal.swing.y) * kStabilityThumbMCPSwing); - helper.AddValue((hand.thumb.metacarpal.twist - last_hand.thumb.metacarpal.twist) * kStabilityThumbMCPTwist); + helper.AddValue((hand.thumb.metacarpal.swing.x - last_hand.thumb.metacarpal.swing.x) * + stab.stabilityThumbMCPSwing); + helper.AddValue((hand.thumb.metacarpal.swing.y - last_hand.thumb.metacarpal.swing.y) * + stab.stabilityThumbMCPSwing); + helper.AddValue((hand.thumb.metacarpal.twist - last_hand.thumb.metacarpal.twist) * stab.stabilityThumbMCPTwist); - helper.AddValue((hand.thumb.rots[0] - last_hand.thumb.rots[0]) * kStabilityCurlRoot); - helper.AddValue((hand.thumb.rots[1] - last_hand.thumb.rots[1]) * kStabilityCurlRoot); + helper.AddValue((hand.thumb.rots[0] - last_hand.thumb.rots[0]) * stab.stabilityCurlRoot); + helper.AddValue((hand.thumb.rots[1] - last_hand.thumb.rots[1]) * stab.stabilityCurlRoot); #ifdef USE_HAND_PLAUSIBILITY helper.AddValue((hand.finger[1].proximal_swing.x - hand.finger[2].proximal_swing.x) * kPlausibilityProximalSimilarity); @@ -378,11 +445,7 @@ computeResidualStability(const OptimizerHand &hand, for (int finger_idx = 0; finger_idx < 4; finger_idx++) { - const OptimizerFinger &finger_last = last_hand.finger[finger_idx]; - - const OptimizerFinger &finger = hand.finger[finger_idx]; - - computeResidualStability_Finger(finger, finger_last, helper); + computeResidualStability_Finger(*state.observation, stab, hand, last_hand, finger_idx, helper); } } @@ -424,10 +487,11 @@ template static inline void unit_xrt_vec3_stereographic_projection(const xrt_vec3 in, Vec2 &out) { - Vec3 vec; - vec.x = (T)(in.x); - vec.y = (T)(in.y); - vec.z = (T)(in.z); + Vec3 vec = { + (T)(in.x), + (T)(in.y), + (T)(in.z), + }; normalize_vector_inplace(vec); @@ -436,101 +500,256 @@ unit_xrt_vec3_stereographic_projection(const xrt_vec3 in, Vec2 &out) template static void -diff(const Vec3 &model_joint_pos, - const Vec3 &move_joint_translation, - const Quat &move_joint_orientation, - const StereographicObservation &observation, - const HandScalar *confidences, - const HandScalar amount_we_care, - int &hand_joint_idx, - ResidualHelper &helper) +calc_joint_rel_camera(const Vec3 &model_joint_pos, + const Vec3 &move_joint_translation, + const Quat &move_joint_orientation, + const Quat &after_orientation, + Vec3 &out_position) { + // Should be uninitialized until here. + out_position = Vec3::Zero(); + UnitQuaternionRotatePoint(move_joint_orientation, model_joint_pos, out_position); + out_position.x += move_joint_translation.x; + out_position.y += move_joint_translation.y; + out_position.z += move_joint_translation.z; - Vec3 model_joint_dir_rel_camera; - UnitQuaternionRotatePoint(move_joint_orientation, model_joint_pos, model_joint_dir_rel_camera); - - model_joint_dir_rel_camera.x = model_joint_dir_rel_camera.x + move_joint_translation.x; - model_joint_dir_rel_camera.y = model_joint_dir_rel_camera.y + move_joint_translation.y; - model_joint_dir_rel_camera.z = model_joint_dir_rel_camera.z + move_joint_translation.z; - - normalize_vector_inplace(model_joint_dir_rel_camera); + UnitQuaternionRotatePoint(after_orientation, out_position, out_position); +} +template +static void +diff_stereographic(const Vec3 &model_joint_pos_rel_camera_, + const vec2_5 &observed_ray_sg, + const HandScalar confidence_xy, + const HandScalar stereographic_radius, + ResidualHelper &helper) +{ + Vec3 model_joint_pos_rel_camera = model_joint_pos_rel_camera_; + normalize_vector_inplace(model_joint_pos_rel_camera); Vec2 stereographic_model_dir; - unit_vector_stereographic_projection(model_joint_dir_rel_camera, stereographic_model_dir); + unit_vector_stereographic_projection(model_joint_pos_rel_camera, stereographic_model_dir); - - const HandScalar confidence = confidences[hand_joint_idx] * amount_we_care; - const Vec2 &observed_ray_sg = observation.obs[hand_joint_idx]; - - helper.AddValue((stereographic_model_dir.x - (T)(observed_ray_sg.x)) * confidence); - helper.AddValue((stereographic_model_dir.y - (T)(observed_ray_sg.y)) * confidence); - - hand_joint_idx++; + helper.AddValue((stereographic_model_dir.x - (T)(observed_ray_sg.pos_2d.x * stereographic_radius)) * + confidence_xy); + helper.AddValue((stereographic_model_dir.y - (T)(observed_ray_sg.pos_2d.y * stereographic_radius)) * + confidence_xy); } template void -CostFunctor_PositionsPart(OptimizerHand &hand, KinematicHandLM &state, ResidualHelper &helper) +cjrc(const KinematicHandLM &state, // + const OptimizerHand &hand, // + const Translations55 &translations_absolute, // + const int view, // + Vec3 out_model_joints_rel_camera[21]) +{ + Vec3 move_direction; + Quat move_orientation; + + Quat after_orientation; + + if (view == 0) { + move_direction = Vec3::Zero(); + move_orientation = Quat::Identity(); + } else { + move_direction.x = T(state.left_in_right_translation.x); + move_direction.y = T(state.left_in_right_translation.y); + move_direction.z = T(state.left_in_right_translation.z); + + move_orientation.w = T(state.left_in_right_orientation.w); + move_orientation.x = T(state.left_in_right_orientation.x); + move_orientation.y = T(state.left_in_right_orientation.y); + move_orientation.z = T(state.left_in_right_orientation.z); + } + + + + xrt_quat extra_rot = state.observation->views[view].look_dir; + + +#if 1 + math_quat_invert(&extra_rot, &extra_rot); +#endif + + + after_orientation.w = T(extra_rot.w); + after_orientation.x = T(extra_rot.x); + after_orientation.y = T(extra_rot.y); + after_orientation.z = T(extra_rot.z); + + + int joint_acc_idx = 0; + + calc_joint_rel_camera(hand.wrist_location, move_direction, move_orientation, after_orientation, + out_model_joints_rel_camera[joint_acc_idx++]); + + for (int finger_idx = 0; finger_idx < 5; finger_idx++) { + for (int joint_idx = 0; joint_idx < 4; joint_idx++) { + calc_joint_rel_camera(translations_absolute.t[finger_idx][joint_idx + 1], move_direction, + move_orientation, after_orientation, + out_model_joints_rel_camera[joint_acc_idx++]); + } + } +} + + +template +void +CostFunctor_PositionsPart(const OptimizerHand &hand, + const Translations55 &translations_absolute, + // Orientations54 &orientations_absolute, + const KinematicHandLM &state, + ResidualHelper &helper) +{ + for (int view = 0; view < 2; view++) { + if (!state.observation->views[view].active) { + continue; + } + HandScalar stereographic_radius = state.observation->views[view].stereographic_radius; + Vec3 model_joints_rel_camera[21] = {}; + + cjrc(state, hand, translations_absolute, view, model_joints_rel_camera); + MLOutput2D &out = state.observation->views[view].keypoints_in_scaled_stereographic; + + T middlepxmdepth = model_joints_rel_camera[Joint21::INDX_PXM].norm(); + + int djai = 0; + + for (int i = 0; i < 21; i++) { + + diff_stereographic(model_joints_rel_camera[i], // + state.observation->views[view].keypoints_in_scaled_stereographic[i], // + out[i].confidence_xy, // + stereographic_radius, // + helper); + + + if (i == Joint21::MIDL_PXM) { + continue; + } + // depth part! + T rel_depth = model_joints_rel_camera[i].norm() - middlepxmdepth; + rel_depth /= hand.hand_size; + + + + T obs_depth = T(out[i].depth_relative_to_midpxm); + + T relative_diff = rel_depth - obs_depth; + + if (state.first_frame) { + // Depth on the first frame was causing local minima. We need simulated annealing. + helper.AddValue(T(0)); + } else { + helper.AddValue(relative_diff * T(pow(out[i].confidence_depth, 3)) * T(1.0f) * + state.depth_err_mul); + } + + djai++; + } + } +} + +template +static void +diff_stereographic_reprojection_error(const Vec3 &model_joint_pos_rel_camera_, + const vec2_5 &observed_ray_sg, + const HandScalar confidence_xy, + const HandScalar stereographic_radius, + ResidualHelper &helper) +{ + Vec3 model_joint_pos_rel_camera = model_joint_pos_rel_camera_; + normalize_vector_inplace(model_joint_pos_rel_camera); + Vec2 stereographic_model_dir; + unit_vector_stereographic_projection(model_joint_pos_rel_camera, stereographic_model_dir); + + stereographic_model_dir.x /= stereographic_radius; + stereographic_model_dir.y /= stereographic_radius; + + //!@todo This works well but we can get a way more "rooted in math" way of increasing repro error with + //! low-confidence measurements than this. + HandScalar mul = 1 / (0.2 + confidence_xy); + + helper.AddValue((stereographic_model_dir.x - (T)(observed_ray_sg.pos_2d.x)) * mul); + helper.AddValue((stereographic_model_dir.y - (T)(observed_ray_sg.pos_2d.y)) * mul); +} + +// A much simpler reprojection error function for evaluating the final "goodness" so we can drop badly optimized hands. +template +void +simple_reprojection_error(const OptimizerHand &hand, + const Translations55 &translations_absolute, + const Orientations54 &orientations_absolute, + const KinematicHandLM &state, + + ResidualHelper &helper) { - Translations55 translations_absolute; - Orientations54 orientations_absolute; - - HandScalar we_care_joint[] = {1.3, 0.9, 0.9, 1.3}; - HandScalar we_care_finger[] = {1.0, 1.0, 0.8, 0.8, 0.8}; - - eval_hand_with_orientation(hand, state.is_right, translations_absolute, orientations_absolute); for (int view = 0; view < 2; view++) { if (!state.observation->views[view].active) { continue; } - Vec3 move_direction; - Quat move_orientation; - if (view == 0) { - move_direction = Vec3::Zero(); - move_orientation = Quat::Identity(); - } else { - move_direction.x = T(state.left_in_right_translation.x); - move_direction.y = T(state.left_in_right_translation.y); - move_direction.z = T(state.left_in_right_translation.z); + HandScalar stereographic_radius = state.observation->views[view].stereographic_radius; + Vec3 model_joints_rel_camera[21] = {}; + cjrc(state, hand, translations_absolute, view, model_joints_rel_camera); - move_orientation.w = T(state.left_in_right_orientation.w); - move_orientation.x = T(state.left_in_right_orientation.x); - move_orientation.y = T(state.left_in_right_orientation.y); - move_orientation.z = T(state.left_in_right_orientation.z); - } - int joint_acc_idx = 0; - - HandScalar *confidences = state.observation->views[view].confidences; - - diff(hand.wrist_location, move_direction, move_orientation, state.sgo[view], confidences, 1.5, - joint_acc_idx, helper); - - for (int finger_idx = 0; finger_idx < 5; finger_idx++) { - for (int joint_idx = 0; joint_idx < 4; joint_idx++) { - diff(translations_absolute.t[finger_idx][joint_idx + 1], move_direction, - move_orientation, state.sgo[view], confidences, - we_care_finger[finger_idx] * we_care_joint[joint_idx], joint_acc_idx, helper); - } + for (int i = 0; i < 21; i++) { + diff_stereographic_reprojection_error( + model_joints_rel_camera[i], // + state.observation->views[view].keypoints_in_scaled_stereographic[i], // + 1.0f, // + stereographic_radius, // + helper); } } } -// template -// void CostFunctor_HandSizePart(OptimizerHand &hand, KinematicHandLM &state, T *residual, size_t &out_residual_idx) -// { +#ifdef USE_HAND_CURLS +template +void +CostFunctor_MatchCurls(OptimizerHand &hand, KinematicHandLM &state, ResidualHelper &helper) +{ + for (int view = 0; view < 2; view++) { + one_frame_one_view &inp = state.observation->views[view]; + if (!inp.active) { + continue; + } -// } + for (int finger = 0; finger < 4; finger++) { + OptimizerFinger fing = hand.finger[finger]; + + T sum = fing.proximal_swing.x + fing.rots[0] + fing.rots[1]; + + T target = T(inp.curls[finger + 1].value); + + T diff = (sum - target) * T(1 / inp.curls[finger + 1].variance); + + helper.AddValue(diff); + } + } +} +#endif + + +template +void +print_residual_part(T *residual, int residual_size) +{ + for (int i = 0; i < residual_size; i++) { + std::cout << residual[i] << std::endl; + } +} template template bool CostFunctor::operator()(const T *const x, T *residual) const { + struct KinematicHandLM &state = this->parent; OptimizerHand hand = {}; // ??? should I do the below? probably. @@ -543,23 +762,37 @@ CostFunctor::operator()(const T *const x, T *residual) const // When you're hacking, you want to set the residuals to always-0 so that any of them you forget to touch keep their 0 // gradient. -// But then later this just becomes a waste. -#if 0 +#ifdef RESIDUALS_HACKING for (size_t i = 0; i < residual_size; i++) { residual[i] = (T)(0); } #endif + ResidualHelper helper(residual); - CostFunctor_PositionsPart(hand, state, helper); + + Translations55 translations_absolute = {}; + Orientations54 orientations_absolute = {}; + eval_hand_with_orientation(hand, state.is_right, translations_absolute, orientations_absolute); + + + CostFunctor_PositionsPart(hand, translations_absolute, state, helper); computeResidualStability(hand, state.last_frame, state, helper); - // Bounds checking - we should have written exactly to the end. - // U_LOG_E("%zu %zu", helper.out_residual_idx, residual_size); +#ifdef USE_HAND_CURLS + CostFunctor_MatchCurls(hand, state, helper); +#endif + +// Bounds checking - we should have written exactly to the end. +// If you're hacking on the optimizer, just increase the residual size a lot and don't worry. +#ifndef RESIDUALS_HACKING + if (helper.out_residual_idx != residual_size) { + LM_ERROR(state, "Residual size was wrong! Residual size was %zu, but out_residual_idx was %zu", + residual_size, helper.out_residual_idx); + } assert(helper.out_residual_idx == residual_size); - // If you're hacking, feel free to turn this off; just remember to not write off the end, and to initialize - // everything somewhere (maybe change the above to an #if 1? ) +#endif return true; } @@ -633,35 +866,24 @@ zldtt(Vec3 &trans, Quat &orientation, bool is_right, xrt_space_relation &o } static void -eval_to_viz_hand(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand) +eval_to_viz_hand(KinematicHandLM &state, + OptimizerHand &opt, + Translations55 translations_absolute, + Orientations54 orientations_absolute, + xrt_hand_joint_set &out_viz_hand) { XRT_TRACE_MARKER(); - //!@todo It's _probably_ fine to have the bigger size? - Eigen::Matrix pose = state.TinyOptimizerInput.cast(); - - OptimizerHand opt = {}; - OptimizerHandInit(opt, state.last_frame_pre_rotation); - OptimizerHandUnpackFromVector(pose.data(), state.optimize_hand_size, state.target_hand_size, opt); - - Translations55 translations_absolute; - Orientations54 orientations_absolute; - // Vec3 translations_absolute[kNumFingers][kNumJointsInFinger]; - // Quat orientations_absolute[kNumFingers][kNumOrientationsInFinger]; - eval_hand_with_orientation(opt, state.is_right, translations_absolute, orientations_absolute); - Quat post_wrist_orientation; + Quat post_wrist_orientation = {}; AngleAxisToQuaternion(opt.wrist_post_orientation_aax, post_wrist_orientation); Quat pre_wrist_orientation = state.last_frame_pre_rotation; - // for (int i = 0; i < 4; i++) { - // pre_wrist_orientation[i] = state.last_frame_pre_rotation[i]; - // } - Quat final_wrist_orientation; + Quat final_wrist_orientation = {}; QuaternionProduct(pre_wrist_orientation, post_wrist_orientation, final_wrist_orientation); @@ -689,7 +911,7 @@ eval_to_viz_hand(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand) if (finger == 0 && joint == 0) { continue; } - Quat *orientation; + Quat *orientation = nullptr; if (joint != 4) { orientation = &orientations_absolute.q[finger][joint]; } else { @@ -702,111 +924,6 @@ eval_to_viz_hand(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand) out_viz_hand.is_active = true; } -//!@todo -// This reprojection error thing is probably the wrong way of doing it. -// I think that the right way is to hack (?) TinySolver such that we get the last set of residuals back, and use those -// somehow. Maybe scale them simply by the spread of the input hand rays? What about handling stereographic projections? -// worth it? The main bit is that we should just use the last set of residuals somehow, calculating all this is a waste -// of cycles when we have something that already should work. - -static void -repro_error_make_joint_ray(const xrt_hand_joint_value &joint, const xrt_pose &cam_pose, xrt_vec3 &out_ray) -{ - // by VALUE - xrt_vec3 position = joint.relation.pose.position; - - math_quat_rotate_vec3(&cam_pose.orientation, &position, &position); - position = position + cam_pose.position; - - out_ray = m_vec3_normalize(position); -} - -static enum xrt_hand_joint -joint_from_21(int finger, int joint) -{ - if (finger > 0) { - return xrt_hand_joint(2 + (finger * 5) + joint); - } else { - return xrt_hand_joint(joint + 3); - } -} - -static inline float -simple_vec3_difference(const xrt_vec3 one, const xrt_vec3 two) -{ - return (1.0 - m_vec3_dot(one, two)); -} - -float -reprojection_error_2d(KinematicHandLM &state, const one_frame_input &observation, const xrt_hand_joint_set &set) -{ - float final_err = 0; - int views_looked_at = 0; - for (int view = 0; view < 2; view++) { - if (!observation.views[view].active) { - continue; - } - views_looked_at++; - xrt_pose move_amount; - - if (view == 0) { - // left camera. - move_amount = XRT_POSE_IDENTITY; - } else { - move_amount = state.left_in_right; - } - - xrt_vec3 lm_rays[21]; - const xrt_vec3 *obs_rays = observation.views[view].rays; - - - int acc_idx = 0; - - repro_error_make_joint_ray(set.values.hand_joint_set_default[XRT_HAND_JOINT_WRIST], move_amount, - lm_rays[acc_idx++]); - - for (int finger = 0; finger < 5; finger++) { - for (int joint = 0; joint < 4; joint++) { - repro_error_make_joint_ray( - set.values.hand_joint_set_default[joint_from_21(finger, joint)], move_amount, - lm_rays[acc_idx++]); - } - } - - xrt_vec3 obs_center = {}; - xrt_vec3 lm_center = {}; - - float err = 0; - float obs_spread = 0; - float lm_spread = 0; - - for (int i = 0; i < 21; i++) { - obs_center += obs_rays[i]; - lm_center += lm_rays[i]; - err += simple_vec3_difference(lm_rays[i], obs_rays[i]); - } - - math_vec3_scalar_mul(1.0f / 21.0f, &obs_center); - math_vec3_scalar_mul(1.0f / 21.0f, &lm_center); - - for (int i = 0; i < 21; i++) { - obs_spread += simple_vec3_difference(obs_center, obs_rays[i]); - } - for (int i = 0; i < 21; i++) { - lm_spread += simple_vec3_difference(lm_center, lm_rays[i]); - } - - - - // std::cout << err << std::endl; - // std::cout << err / (obs_spread + lm_spread) << std::endl; - - // return ; - final_err += err / (obs_spread + lm_spread); - } - return final_err / (float)views_looked_at; -} - template inline float opt_run(KinematicHandLM &state, one_frame_input &observation, xrt_hand_joint_set &out_viz_hand) @@ -825,14 +942,17 @@ opt_run(KinematicHandLM &state, one_frame_input &observation, xrt_hand_joint_set AutoDiffCostFunctor f(cf); - - // Okay I have no idea if this should be {}-initialized or not. Previous me seems to have thought no, but it - // works either way. ceres::TinySolver solver = {}; - solver.options.max_num_iterations = 50; - // We need to do a parameter sweep for the trust region and see what's fastest. - // solver.options.initial_trust_region_radius = 1e3; - solver.options.function_tolerance = 1e-6; + solver.options.max_num_iterations = 30; + + //!@todo We don't yet know what "good" termination conditions are. + // Instead of trying to guess without good offline datasets, just disable _all_ termination conditions and have + // it run for 30 iterations no matter what. + solver.options.gradient_tolerance = 0; + solver.options.function_tolerance = 0; + solver.options.parameter_tolerance = 0; + + //!@todo We need to do a parameter sweep on initial_trust_region_radius. Eigen::Matrix inp = state.TinyOptimizerInput.head(); @@ -848,7 +968,7 @@ opt_run(KinematicHandLM &state, one_frame_input &observation, xrt_hand_joint_set uint64_t diff = end - start; double time_taken = (double)diff / (double)U_TIME_1MS_IN_NS; - const char *status = "UNDEFINED"; + const char *status = nullptr; switch (summary.status) { case 0: { @@ -866,6 +986,7 @@ opt_run(KinematicHandLM &state, one_frame_input &observation, xrt_hand_joint_set case 4: { status = "COST_CHANGE_TOO_SMALL"; } break; + default: assert(false); } LM_DEBUG(state, "Status: %s, num_iterations %d, max_norm %E, gtol %E", status, summary.iterations, @@ -878,15 +999,60 @@ opt_run(KinematicHandLM &state, one_frame_input &observation, xrt_hand_joint_set return 0; } +void +optimizer_finish(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand, float &out_reprojection_error) +{ + + // Postfix - unpack, + + OptimizerHand &final_hand = state.last_frame; + + Translations55 translations_absolute; + Orientations54 orientations_absolute; + eval_hand_with_orientation(final_hand, state.is_right, translations_absolute, + orientations_absolute); + + eval_to_viz_hand(state, final_hand, translations_absolute, orientations_absolute, out_viz_hand); + + + // CALCULATE REPROJECTION ERROR + + // Let's make space calc_residual_sizefor two views, even though we may only use one. + Eigen::Matrix mat; //= mat.Zero(); + mat.setConstant(0); + + ResidualHelper helper(mat.data()); + + + + simple_reprojection_error(final_hand, translations_absolute, orientations_absolute, state, helper); + + HandScalar sum = mat.squaredNorm(); + + sum /= state.num_observation_views; + + out_reprojection_error = sum; +} + void hand_was_untracked(KinematicHandLM *hand) { hand->first_frame = true; +#if 0 hand->last_frame_pre_rotation.w = 1.0; hand->last_frame_pre_rotation.x = 0.0; hand->last_frame_pre_rotation.y = 0.0; hand->last_frame_pre_rotation.z = 0.0; +#else + // Rotated 90 degrees so that the palm is facing the user and the fingers are up. + // The _idea_ is that having the palm be "in" the camera's plane will reduce initial local minima due to flat + // things having multiple possible unprojections. + hand->last_frame_pre_rotation.w = sqrt(2) / 2; + hand->last_frame_pre_rotation.x = -sqrt(2) / 2; + hand->last_frame_pre_rotation.y = 0.0; + hand->last_frame_pre_rotation.z = 0.0; +#endif OptimizerHandInit(hand->last_frame, hand->last_frame_pre_rotation); OptimizerHandPackIntoVector(hand->last_frame, hand->optimize_hand_size, hand->TinyOptimizerInput.data()); @@ -896,14 +1062,19 @@ void optimizer_run(KinematicHandLM *hand, one_frame_input &observation, bool hand_was_untracked_last_frame, + float smoothing_factor, //!<- Unused if this is the first frame bool optimize_hand_size, float target_hand_size, float hand_size_err_mul, + float amt_use_depth, xrt_hand_joint_set &out_viz_hand, float &out_hand_size, float &out_reprojection_error) // NOLINT(bugprone-easily-swappable-parameters) { + numerics_checker::set_floating_exceptions(); + KinematicHandLM &state = *hand; + state.smoothing_factor = smoothing_factor; if (hand_was_untracked_last_frame) { hand_was_untracked(hand); @@ -919,24 +1090,58 @@ optimizer_run(KinematicHandLM *hand, state.optimize_hand_size = optimize_hand_size; state.target_hand_size = target_hand_size; state.hand_size_err_mul = hand_size_err_mul; + state.depth_err_mul = amt_use_depth; state.use_stability = !state.first_frame; state.observation = &observation; - for (int i = 0; i < 21; i++) { - for (int view = 0; view < 2; view++) { - unit_xrt_vec3_stereographic_projection(observation.views[view].rays[i], state.sgo[view].obs[i]); - } + + + // This code is disabled because I can't convince myself that it helps (I will be able to once we have good + // validation datasets.) + + // What it does: Update each finger's "initial" curl value to match what the neural net thought the curl was, + // so that the optimizer hopefully starts in the valley that contains the true global minimum. +#if 0 + + OptimizerHand blah = {}; + OptimizerHandUnpackFromVector(state.TinyOptimizerInput.data(), state.optimize_hand_size, state.target_hand_size, + blah); + + for (int finger_idx = 0; finger_idx < 4; finger_idx++) { + // int finger_idx = 0; + int curl_idx = finger_idx + 1; + + + OptimizerFinger fing = blah.finger[finger_idx]; + HandScalar sum = fing.proximal_swing.x + fing.rots[0] + fing.rots[1]; + + HandScalar target = get_avg_curl_value(observation, curl_idx); + + + HandScalar move = target - sum; + + fing.proximal_swing.x += move / 3; + fing.rots[0] += move / 3; + fing.rots[1] += move / 3; + } + OptimizerHandPackIntoVector(blah, state.optimize_hand_size, state.TinyOptimizerInput.data()); - // For now, we have to statically instantiate different versions of the optimizer depending on how many input - // parameters there are. For now, there are only two cases - either we are optimizing the hand size or we are - // not optimizing it. - // !@todo Can we make a magic template that automatically instantiates the right one, and also make it so we can - // decide to either make the residual size dynamic or static? Currently, it's dynamic, which is easier for us - // and makes compile times a lot lower, but it probably makes things some amount slower at runtime. + + +#endif + + + // For now, we have to statically instantiate different versions of the optimizer depending on + // how many input parameters there are. For now, there are only two cases - either we are + // optimizing the hand size or we are not optimizing it. + // !@todo Can we make a magic template that automatically instantiates the right one, and also + // make it so we can decide to either make the residual size dynamic or static? Currently, it's + // dynamic, which is easier for us and makes compile times a lot lower, but it probably makes + // things some amount slower at runtime. if (optimize_hand_size) { opt_run(state, observation, out_viz_hand); @@ -955,18 +1160,16 @@ optimizer_run(KinematicHandLM *hand, // Squash the orientations OptimizerHandSquashRotations(state.last_frame, state.last_frame_pre_rotation); - // Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're using sin/asin. + // Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're + // using sin/asin. OptimizerHandPackIntoVector(state.last_frame, hand->optimize_hand_size, state.TinyOptimizerInput.data()); - - - eval_to_viz_hand(state, out_viz_hand); + optimizer_finish(state, out_viz_hand, out_reprojection_error); state.first_frame = false; out_hand_size = state.last_frame.hand_size; - - out_reprojection_error = reprojection_error_2d(state, observation, out_viz_hand); + numerics_checker::remove_floating_exceptions(); } @@ -974,7 +1177,7 @@ optimizer_run(KinematicHandLM *hand, void optimizer_create(xrt_pose left_in_right, bool is_right, u_logging_level log_level, KinematicHandLM **out_kinematic_hand) { - KinematicHandLM *hand = new KinematicHandLM; + KinematicHandLM *hand = new KinematicHandLM(); hand->is_right = is_right; hand->left_in_right = left_in_right; diff --git a/src/xrt/tracking/hand/mercury/kine_lm/lm_optimizer_params_packer.inl b/src/xrt/tracking/hand/mercury/kine_lm/lm_optimizer_params_packer.inl index a3b74a6c3..d3dfcc18f 100644 --- a/src/xrt/tracking/hand/mercury/kine_lm/lm_optimizer_params_packer.inl +++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_optimizer_params_packer.inl @@ -8,48 +8,46 @@ * @ingroup tracking */ +// #include +// #include #include "util/u_logging.h" #include "math/m_api.h" #include "lm_interface.hpp" #include "lm_defines.hpp" -#include -#include - -// #include "lm_rotations.hpp" namespace xrt::tracking::hand::mercury::lm { template struct OptimizerMetacarpalBone { - Vec2 swing; - T twist; + Vec2 swing = {}; + T twist = {}; }; template struct OptimizerFinger { - OptimizerMetacarpalBone metacarpal; - Vec2 proximal_swing; + OptimizerMetacarpalBone metacarpal = {}; + Vec2 proximal_swing = {}; // Not Vec2. - T rots[2]; + T rots[2] = {}; }; template struct OptimizerThumb { - OptimizerMetacarpalBone metacarpal; + OptimizerMetacarpalBone metacarpal = {}; // Again not Vec2. - T rots[2]; + T rots[2] = {}; }; template struct OptimizerHand { T hand_size; - Vec3 wrist_location; + Vec3 wrist_location = {}; // This is constant, a ceres::Rotation.h quat,, taken from last frame. - Quat wrist_pre_orientation_quat; + Quat wrist_pre_orientation_quat = {}; // This is optimized - angle-axis rotation vector. Starts at 0, loss goes up the higher it goes because it // indicates more of a rotation. - Vec3 wrist_post_orientation_aax; + Vec3 wrist_post_orientation_aax = {}; OptimizerThumb thumb = {}; OptimizerFinger finger[4] = {}; }; @@ -77,16 +75,18 @@ public: class HandLimit { public: - minmax hand_size; + minmax hand_size = {}; - minmax thumb_mcp_swing_x, thumb_mcp_swing_y, thumb_mcp_twist; - minmax thumb_curls[2]; + minmax thumb_mcp_swing_x = {}; + minmax thumb_mcp_swing_y = {}; + minmax thumb_mcp_twist = {}; + minmax thumb_curls[2] = {}; - FingerLimit fingers[4]; + FingerLimit fingers[4] = {}; HandLimit() { - hand_size = {0.095 - 0.03, 0.095 + 0.03}; + hand_size = {MIN_HAND_SIZE, MAX_HAND_SIZE}; thumb_mcp_swing_x = {rad(-60), rad(60)}; thumb_mcp_swing_y = {rad(-60), rad(60)}; @@ -97,25 +97,31 @@ public: } - constexpr double margin = 0.09; + HandScalar margin = 0.0001; - fingers[0].mcp_swing_y = {-0.19 - margin, -0.19 + margin}; - fingers[1].mcp_swing_y = {0.00 - margin, 0.00 + margin}; - fingers[2].mcp_swing_y = {0.19 - margin, 0.19 + margin}; - fingers[3].mcp_swing_y = {0.38 - margin, 0.38 + margin}; + fingers[0].mcp_swing_y = {HandScalar(-0.19) - margin, HandScalar(-0.19) + margin}; + fingers[1].mcp_swing_y = {HandScalar(0.00) - margin, HandScalar(0.00) + margin}; + fingers[2].mcp_swing_y = {HandScalar(0.19) - margin, HandScalar(0.19) + margin}; + fingers[3].mcp_swing_y = {HandScalar(0.38) - margin, HandScalar(0.38) + margin}; + + + fingers[0].mcp_swing_x = {HandScalar(-0.02) - margin, HandScalar(-0.02) + margin}; + fingers[1].mcp_swing_x = {HandScalar(0.00) - margin, HandScalar(0.00) + margin}; + fingers[2].mcp_swing_x = {HandScalar(0.02) - margin, HandScalar(0.02) + margin}; + fingers[3].mcp_swing_x = {HandScalar(0.04) - margin, HandScalar(0.04) + margin}; for (int finger_idx = 0; finger_idx < 4; finger_idx++) { FingerLimit &finger = fingers[finger_idx]; - finger.mcp_swing_x = {rad(-10), rad(10)}; + // finger.mcp_swing_x = {rad(-0.0001), rad(0.0001)}; finger.mcp_twist = {rad(-4), rad(4)}; finger.pxm_swing_x = {rad(-100), rad(20)}; // ??? why is it reversed finger.pxm_swing_y = {rad(-20), rad(20)}; for (int i = 0; i < 2; i++) { - finger.curls[i] = {rad(-90), rad(10)}; + finger.curls[i] = {rad(-90), rad(0)}; } } } @@ -124,9 +130,6 @@ public: static const class HandLimit the_limit = {}; -constexpr HandScalar hand_size_min = 0.095 - 0.03; -constexpr HandScalar hand_size_max = 0.095 + 0.03; - template inline T LMToModel(T lm, minmax mm) @@ -169,17 +172,7 @@ OptimizerHandUnpackFromVector(const T *in, bool use_hand_size, T hand_size, Opti out.thumb.rots[1] = LMToModel(in[acc_idx++], the_limit.thumb_curls[1]); for (int finger_idx = 0; finger_idx < 4; finger_idx++) { - - out.finger[finger_idx].metacarpal.swing.x = - LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_swing_x); - - out.finger[finger_idx].metacarpal.swing.y = - LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_swing_y); - - out.finger[finger_idx].metacarpal.twist = - LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_twist); - - + // Note that we are not unpacking the metacarpal swing/twist as it is constant. out.finger[finger_idx].proximal_swing.x = LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].pxm_swing_x); out.finger[finger_idx].proximal_swing.y = @@ -224,13 +217,7 @@ OptimizerHandPackIntoVector(OptimizerHand &in, bool use_hand_size, T *out) out[acc_idx++] = ModelToLM(in.thumb.rots[1], the_limit.thumb_curls[1]); for (int finger_idx = 0; finger_idx < 4; finger_idx++) { - out[acc_idx++] = - ModelToLM(in.finger[finger_idx].metacarpal.swing.x, the_limit.fingers[finger_idx].mcp_swing_x); - out[acc_idx++] = - ModelToLM(in.finger[finger_idx].metacarpal.swing.y, the_limit.fingers[finger_idx].mcp_swing_y); - out[acc_idx++] = - ModelToLM(in.finger[finger_idx].metacarpal.twist, the_limit.fingers[finger_idx].mcp_twist); - + // Note that we are not packing the metacarpal swing/twist as it is constant. out[acc_idx++] = ModelToLM(in.finger[finger_idx].proximal_swing.x, the_limit.fingers[finger_idx].pxm_swing_x); out[acc_idx++] = @@ -252,18 +239,14 @@ template void OptimizerHandInit(OptimizerHand &opt, Quat &pre_rotation) { - opt.hand_size = (T)(0.095); + opt.hand_size = (T)STANDARD_HAND_SIZE; opt.wrist_post_orientation_aax.x = (T)(0); opt.wrist_post_orientation_aax.y = (T)(0); opt.wrist_post_orientation_aax.z = (T)(0); - // opt.store_wrist_pre_orientation_quat = pre_rotation; - opt.wrist_pre_orientation_quat.w = (T)pre_rotation.w; - opt.wrist_pre_orientation_quat.x = (T)pre_rotation.x; - opt.wrist_pre_orientation_quat.y = (T)pre_rotation.y; - opt.wrist_pre_orientation_quat.z = (T)pre_rotation.z; + opt.wrist_pre_orientation_quat = pre_rotation; opt.wrist_location.x = (T)(0); opt.wrist_location.y = (T)(0); @@ -275,23 +258,28 @@ OptimizerHandInit(OptimizerHand &opt, Quat &pre_rotation) opt.finger[i].metacarpal.swing.x = T(0); opt.finger[i].metacarpal.twist = T(0); - opt.finger[i].proximal_swing.x = rad((T)(15)); - opt.finger[i].rots[0] = rad((T)(-5)); - opt.finger[i].rots[1] = rad((T)(-5)); + opt.finger[i].proximal_swing.x = T(rad(15.0f)); + opt.finger[i].rots[0] = T(rad(-5)); + opt.finger[i].rots[1] = T(rad(-5)); } opt.thumb.metacarpal.swing.x = (T)(0); opt.thumb.metacarpal.swing.y = (T)(0); opt.thumb.metacarpal.twist = (T)(0); - opt.thumb.rots[0] = rad((T)(-5)); - opt.thumb.rots[1] = rad((T)(-59)); + opt.thumb.rots[0] = T(rad(-5)); + opt.thumb.rots[1] = T(rad(-59)); opt.finger[0].metacarpal.swing.y = (T)(-0.19); opt.finger[1].metacarpal.swing.y = (T)(0); opt.finger[2].metacarpal.swing.y = (T)(0.19); opt.finger[3].metacarpal.swing.y = (T)(0.38); + opt.finger[0].metacarpal.swing.x = (T)(-0.02); + opt.finger[1].metacarpal.swing.x = (T)(0); + opt.finger[2].metacarpal.swing.x = (T)(0.02); + opt.finger[3].metacarpal.swing.x = (T)(0.04); + opt.finger[0].proximal_swing.y = (T)(-0.01); opt.finger[1].proximal_swing.y = (T)(0); opt.finger[2].proximal_swing.y = (T)(0.01); @@ -312,11 +300,11 @@ OptimizerHandSquashRotations(OptimizerHand &opt, Quat &out_orientation) Quat &pre_rotation = opt.wrist_pre_orientation_quat; - Quat post_rotation; + Quat post_rotation = {}; AngleAxisToQuaternion(opt.wrist_post_orientation_aax, post_rotation); - Quat tmp_new_pre_rotation; + Quat tmp_new_pre_rotation = {}; QuaternionProduct(pre_rotation, post_rotation, tmp_new_pre_rotation); diff --git a/tests/tests_levenbergmarquardt.cpp b/tests/tests_levenbergmarquardt.cpp index f9b0a9076..e473804b9 100644 --- a/tests/tests_levenbergmarquardt.cpp +++ b/tests/tests_levenbergmarquardt.cpp @@ -34,14 +34,6 @@ TEST_CASE("LevenbergMarquardt") struct one_frame_input input = {}; - for (int i = 0; i < 21; i++) { - - input.views[0].rays[i] = m_vec3_normalize({0, (float)i, -1}); //{0,(float)i,-1}; - input.views[1].rays[i] = m_vec3_normalize({(float)i, 0, -1}); - input.views[0].confidences[i] = 1; - input.views[1].confidences[i] = 1; - } - lm::KinematicHandLM *hand; xrt_pose left_in_right = XRT_POSE_IDENTITY; @@ -53,7 +45,17 @@ TEST_CASE("LevenbergMarquardt") xrt_hand_joint_set out; float out_hand_size; float out_reprojection_error; - lm::optimizer_run(hand, input, true, true, 0.09, 0.5, out, out_hand_size, out_reprojection_error); + lm::optimizer_run(hand, // + input, // + true, // + 2.0f, // + true, // + 0.09, // + 0.5, // + 0.5f, // + out, // + out_hand_size, // + out_reprojection_error); CHECK(std::isfinite(out_reprojection_error)); CHECK(std::isfinite(out_hand_size));