From 03546e0b99d67f47b319092c4139574b5dde12cd Mon Sep 17 00:00:00 2001 From: Moshi Turner Date: Mon, 6 Mar 2023 23:53:50 -0600 Subject: [PATCH] h/mercury: Add wrist pose initialization guesser --- .../hand/mercury/kine_lm/CMakeLists.txt | 5 +- .../mercury/kine_lm/lm_hand_init_guesser.cpp | 181 ++++++++++++++++++ .../mercury/kine_lm/lm_hand_init_guesser.hpp | 22 +++ .../tracking/hand/mercury/kine_lm/lm_main.cpp | 66 +++---- 4 files changed, 237 insertions(+), 37 deletions(-) create mode 100644 src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.cpp create mode 100644 src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.hpp diff --git a/src/xrt/tracking/hand/mercury/kine_lm/CMakeLists.txt b/src/xrt/tracking/hand/mercury/kine_lm/CMakeLists.txt index 1c2a0d5f4..2b6e37010 100644 --- a/src/xrt/tracking/hand/mercury/kine_lm/CMakeLists.txt +++ b/src/xrt/tracking/hand/mercury/kine_lm/CMakeLists.txt @@ -1,7 +1,10 @@ # Copyright 2022, Collabora, Ltd. # SPDX-License-Identifier: BSL-1.0 -add_library(t_ht_mercury_kine_lm STATIC lm_interface.hpp lm_main.cpp) +add_library( + t_ht_mercury_kine_lm STATIC lm_interface.hpp lm_main.cpp lm_hand_init_guesser.hpp + lm_hand_init_guesser.cpp + ) target_link_libraries( t_ht_mercury_kine_lm diff --git a/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.cpp b/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.cpp new file mode 100644 index 000000000..27e2f3633 --- /dev/null +++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.cpp @@ -0,0 +1,181 @@ +// Copyright 2022, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Levenberg-Marquardt kinematic optimizer + * @author Moses Turner + * @ingroup tracking + */ + +#include "math/m_api.h" +#include "math/m_vec3.h" +#include "math/m_eigen_interop.hpp" +#include "os/os_time.h" +#include "util/u_logging.h" +#include "util/u_misc.h" +#include "util/u_trace_marker.h" + +#include "tinyceres/tiny_solver.hpp" +#include "tinyceres/tiny_solver_autodiff_function.hpp" +#include "lm_rotations.inl" + +#include +#include +#include +#include "lm_interface.hpp" +#include "lm_optimizer_params_packer.inl" +#include "lm_defines.hpp" + +#include "../hg_numerics_checker.hpp" +#include "../hg_stereographic_unprojection.hpp" + +namespace xrt::tracking::hand::mercury::lm { +using namespace xrt::auxiliary::math; + +// Generated from mercury_train's hand_depth_guess.ipynb. +float +sympy_guess_distance(float angle, float wrist_extra_distance_meters, float hand_size) +{ + float d1 = (1.0 / 2.0) * + (-wrist_extra_distance_meters * (angle - 1) + + sqrt((angle - 1) * (angle * pow(wrist_extra_distance_meters, 2) - 2 * pow(hand_size, 2) + + pow(wrist_extra_distance_meters, 2)))) / + (angle - 1); + float d2 = -(wrist_extra_distance_meters * (angle - 1) + + sqrt((angle - 1) * (angle * pow(wrist_extra_distance_meters, 2) - 2 * pow(hand_size, 2) + + pow(wrist_extra_distance_meters, 2)))) / + (2 * angle - 2); + + if (d1 > d2) { + return d1; + } + return d2; +} + +bool +hand_init_guess(one_frame_input &observation, const float hand_size, xrt_pose left_in_right, xrt_pose &out_wrist_guess) +{ + int num_observation_views = 0; + + + // We're actually going "forwards" in our transformations for once! + // For the right camera, instead of moving an estimation *into* right-camera-space and doing math in + // camera-space, we move everything into *left-camera-space*. + xrt_pose right_in_left; + math_pose_invert(&left_in_right, &right_in_left); + + xrt_vec3 wrist_global_sum = {}; + xrt_vec3 midpxm_global_sum = {}; + xrt_vec3 indpxm_global_sum = {}; + xrt_vec3 litpxm_global_sum = {}; + + for (int view = 0; view < 2; view++) { + one_frame_one_view &v = observation.views[view]; + if (!v.active) { + continue; + } + num_observation_views++; + + Eigen::Quaternionf rotate = map_quat(v.look_dir); + Eigen::Vector3f directions_rel_camera[21]; + + for (int i = 0; i < 21; i++) { + float sg_x = v.keypoints_in_scaled_stereographic[i].pos_2d.x * v.stereographic_radius; + float sg_y = v.keypoints_in_scaled_stereographic[i].pos_2d.y * v.stereographic_radius; + directions_rel_camera[i] = stereographic_unprojection(sg_x, sg_y); + directions_rel_camera[i] = rotate * directions_rel_camera[i]; + } + + xrt_vec3 midpxm_dir; + xrt_vec3 wrist_dir; + map_vec3(midpxm_dir) = directions_rel_camera[Joint21::MIDL_PXM]; + map_vec3(wrist_dir) = directions_rel_camera[Joint21::WRIST]; + + float angle = cos(m_vec3_angle(midpxm_dir, wrist_dir)); + float wrist_extra_distance_meters = + v.keypoints_in_scaled_stereographic[0].depth_relative_to_midpxm * hand_size; + + float distance = sympy_guess_distance(angle, wrist_extra_distance_meters, hand_size); + + if (distance != distance) { + // Nan check. + // This happens if the angle between midpxm_dir and wrist_dir is 0, generally when + // refine_center_of_distribution fails hard enough. Generally not worth tracking hands when this + // happens. + return false; + } + +// I wish I could make this an inline unit test. Useful for debugging so I'll leave it disabled. +#if 0 + Eigen::Vector3f wrist_rel_camera = directions_rel_camera[0] * (distance+wrist_extra_distance_meters); + Eigen::Vector3f midpxm_rel_camera = directions_rel_camera[9] * distance; + + float norm = (wrist_rel_camera-midpxm_rel_camera).norm(); + U_LOG_E("Recovered: %f", norm); +#endif + + xrt_vec3 wrist_rel_camera = {}; + xrt_vec3 midpxm_rel_camera = {}; + xrt_vec3 indpxm_rel_camera = {}; + xrt_vec3 litpxm_rel_camera = {}; + + map_vec3(wrist_rel_camera) = + directions_rel_camera[Joint21::WRIST] * (distance + wrist_extra_distance_meters); + map_vec3(midpxm_rel_camera) = directions_rel_camera[Joint21::MIDL_PXM] * (distance); + map_vec3(indpxm_rel_camera) = + directions_rel_camera[Joint21::INDX_PXM] * + (distance + + (v.keypoints_in_scaled_stereographic[Joint21::INDX_PXM].depth_relative_to_midpxm * hand_size)); + map_vec3(litpxm_rel_camera) = + directions_rel_camera[Joint21::LITL_PXM] * + (distance + + (v.keypoints_in_scaled_stereographic[Joint21::LITL_PXM].depth_relative_to_midpxm * hand_size)); + + if (view == 1) { + math_pose_transform_point(&right_in_left, &wrist_rel_camera, &wrist_rel_camera); + math_pose_transform_point(&right_in_left, &midpxm_rel_camera, &midpxm_rel_camera); + math_pose_transform_point(&right_in_left, &indpxm_rel_camera, &indpxm_rel_camera); + math_pose_transform_point(&right_in_left, &litpxm_rel_camera, &litpxm_rel_camera); + } + + wrist_global_sum += wrist_rel_camera; + midpxm_global_sum += midpxm_rel_camera; + indpxm_global_sum += indpxm_rel_camera; + litpxm_global_sum += litpxm_rel_camera; + } + + wrist_global_sum = m_vec3_mul_scalar(wrist_global_sum, 1.0f / (float)num_observation_views); + midpxm_global_sum = m_vec3_mul_scalar(midpxm_global_sum, 1.0f / (float)num_observation_views); + indpxm_global_sum = m_vec3_mul_scalar(indpxm_global_sum, 1.0f / (float)num_observation_views); + litpxm_global_sum = m_vec3_mul_scalar(wrist_global_sum, 1.0f / (float)num_observation_views); + + out_wrist_guess.position = wrist_global_sum; + +#if 0 + // Original + xrt_vec3 plus_z = m_vec3_normalize(wrist_global_sum - midpxm_global_sum); + xrt_vec3 plus_x = m_vec3_normalize(indpxm_global_sum - litpxm_global_sum); + +#elif 0 + // Negated + xrt_vec3 plus_z = m_vec3_normalize(midpxm_global_sum - wrist_global_sum); + // Not negated + xrt_vec3 plus_x = m_vec3_normalize(indpxm_global_sum - litpxm_global_sum); +#elif 0 + // Both negated + xrt_vec3 plus_z = m_vec3_normalize(midpxm_global_sum - wrist_global_sum); + xrt_vec3 plus_x = m_vec3_normalize(litpxm_global_sum - indpxm_global_sum); + +#else + // Not negated + xrt_vec3 plus_z = m_vec3_normalize(wrist_global_sum - midpxm_global_sum); + // Negated + xrt_vec3 plus_x = m_vec3_normalize(litpxm_global_sum - indpxm_global_sum); +#endif + + plus_x = m_vec3_orthonormalize(plus_z, plus_x); + math_quat_from_plus_x_z(&plus_x, &plus_z, &out_wrist_guess.orientation); + + return true; +} +} // namespace xrt::tracking::hand::mercury::lm diff --git a/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.hpp b/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.hpp new file mode 100644 index 000000000..241cccfdd --- /dev/null +++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.hpp @@ -0,0 +1,22 @@ +// Copyright 2022, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Levenberg-Marquardt kinematic optimizer + * @author Moses Turner + * @ingroup tracking + */ + +#pragma once +#include "math/m_api.h" +#include "math/m_vec3.h" + +#include "lm_defines.hpp" + + +namespace xrt::tracking::hand::mercury::lm { + +bool +hand_init_guess(one_frame_input &observation, const float hand_size, xrt_pose left_in_right, xrt_pose &out_wrist_guess); + +} // namespace xrt::tracking::hand::mercury::lm diff --git a/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp b/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp index 5ce36e47f..ca32666fc 100644 --- a/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp +++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp @@ -26,6 +26,7 @@ #include "lm_defines.hpp" #include "../hg_numerics_checker.hpp" +#include "lm_hand_init_guesser.hpp" /* @@ -953,33 +954,6 @@ optimizer_finish(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand, float out_reprojection_error = sum; } -void -hand_was_untracked(KinematicHandLM &state) -{ - state.first_frame = true; -#if 0 - state.last_frame_pre_rotation.w = 1.0; - state.last_frame_pre_rotation.x = 0.0; - state.last_frame_pre_rotation.y = 0.0; - state.last_frame_pre_rotation.z = 0.0; -#else - // Rotated 90 degrees so that the palm is facing the user and the fingers are up. - // The _idea_ is that having the palm be "in" the camera's plane will reduce initial local minima due to flat - // things having multiple possible unprojections. - state.this_frame_pre_rotation.w = sqrt(2) / 2; - state.this_frame_pre_rotation.x = -sqrt(2) / 2; - state.this_frame_pre_rotation.y = 0.0; - state.this_frame_pre_rotation.z = 0.0; -#endif - - state.this_frame_pre_position.x = 0.0f; - state.this_frame_pre_position.y = 0.0f; - state.this_frame_pre_position.z = -0.3f; - - OptimizerHandInit(state.last_frame, state.this_frame_pre_rotation); - OptimizerHandPackIntoVector(state.last_frame, state.optimize_hand_size, state.TinyOptimizerInput.data()); -} - void optimizer_run(KinematicHandLM *hand, one_frame_input &observation, @@ -998,8 +972,31 @@ optimizer_run(KinematicHandLM *hand, KinematicHandLM &state = *hand; state.smoothing_factor = smoothing_factor; + xrt_pose blah = XRT_POSE_IDENTITY; + hand_init_guess(observation, target_hand_size, state.left_in_right, blah); + if (hand_was_untracked_last_frame) { - hand_was_untracked(state); + OptimizerHandInit(state.last_frame, state.this_frame_pre_rotation); + OptimizerHandPackIntoVector(state.last_frame, state.optimize_hand_size, + state.TinyOptimizerInput.data()); + if (blah.position.z > 0.05) { + LM_WARN(state, "Initial position guess was too close to camera! Z axis was %f m", + blah.position.z); + state.this_frame_pre_position.x = 0.0f; + state.this_frame_pre_position.y = 0.0f; + state.this_frame_pre_position.z = -0.3f; + } else { + state.this_frame_pre_position.x = blah.position.x; + state.this_frame_pre_position.y = blah.position.y; + state.this_frame_pre_position.z = blah.position.z; + } + + + + state.this_frame_pre_rotation.x = blah.orientation.x; + state.this_frame_pre_rotation.y = blah.orientation.y; + state.this_frame_pre_rotation.z = blah.orientation.z; + state.this_frame_pre_rotation.w = blah.orientation.w; } state.num_observation_views = 0; @@ -1072,17 +1069,16 @@ optimizer_run(KinematicHandLM *hand, - // Postfix - unpack, + // Postfix - unpack our optimization result into state.last_frame. OptimizerHandUnpackFromVector(state.TinyOptimizerInput.data(), state, state.last_frame); - // Squash the orientations + // Have the final pose from this frame be the next frame's initial pose state.this_frame_pre_rotation = state.last_frame.wrist_final_orientation; state.this_frame_pre_position = state.last_frame.wrist_final_location; - // Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're - // using sin/asin. + // Reset this frame's post-transform to identity state.last_frame.wrist_post_location.x = 0.0f; state.last_frame.wrist_post_location.y = 0.0f; state.last_frame.wrist_post_location.z = 0.0f; @@ -1091,7 +1087,8 @@ optimizer_run(KinematicHandLM *hand, state.last_frame.wrist_post_orientation_aax.y = 0.0f; state.last_frame.wrist_post_orientation_aax.z = 0.0f; - + // Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're + // using sin/asin. OptimizerHandPackIntoVector(state.last_frame, hand->optimize_hand_size, state.TinyOptimizerInput.data()); optimizer_finish(state, out_viz_hand, out_reprojection_error); @@ -1122,9 +1119,6 @@ optimizer_create(xrt_pose left_in_right, bool is_right, u_logging_level log_leve hand->left_in_right_orientation.y = left_in_right.orientation.y; hand->left_in_right_orientation.z = left_in_right.orientation.z; - // Probably unnecessary. - hand_was_untracked(*hand); - *out_kinematic_hand = hand; }