h/mercury: Add wrist pose initialization guesser

2025-01-01 12:46:12 +00:00 · 2023-03-06 23:53:50 -06:00 · 2023-03-06 23:53:50 -06:00 · 03546e0b99
parent 3f2b71a9d5
commit 03546e0b99
4 changed files with 237 additions and 37 deletions
--- a/src/xrt/tracking/hand/mercury/kine_lm/CMakeLists.txt
+++ b/src/xrt/tracking/hand/mercury/kine_lm/CMakeLists.txt
@ -1,7 +1,10 @@
 # Copyright 2022, Collabora, Ltd.
 # SPDX-License-Identifier: BSL-1.0
-add_library(t_ht_mercury_kine_lm STATIC lm_interface.hpp lm_main.cpp)
+add_library(
 	t_ht_mercury_kine_lm STATIC lm_interface.hpp lm_main.cpp lm_hand_init_guesser.hpp
 				    lm_hand_init_guesser.cpp
 	)
 target_link_libraries(
 	t_ht_mercury_kine_lm
--- a/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.cpp
+++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.cpp
@ -0,0 +1,181 @@
 // Copyright 2022, Collabora, Ltd.
 // SPDX-License-Identifier: BSL-1.0
 /*!
 * @file
 * @brief Levenberg-Marquardt kinematic optimizer
 * @author Moses Turner <moses@collabora.com>
 * @ingroup tracking
 */
 #include "math/m_api.h"
 #include "math/m_vec3.h"
 #include "math/m_eigen_interop.hpp"
 #include "os/os_time.h"
 #include "util/u_logging.h"
 #include "util/u_misc.h"
 #include "util/u_trace_marker.h"
 #include "tinyceres/tiny_solver.hpp"
 #include "tinyceres/tiny_solver_autodiff_function.hpp"
 #include "lm_rotations.inl"
 #include <iostream>
 #include <cmath>
 #include <random>
 #include "lm_interface.hpp"
 #include "lm_optimizer_params_packer.inl"
 #include "lm_defines.hpp"
 #include "../hg_numerics_checker.hpp"
 #include "../hg_stereographic_unprojection.hpp"
 namespace xrt::tracking::hand::mercury::lm {
 using namespace xrt::auxiliary::math;
 // Generated from mercury_train's hand_depth_guess.ipynb.
 float
 sympy_guess_distance(float angle, float wrist_extra_distance_meters, float hand_size)
 {
 	float d1 = (1.0 / 2.0) *
 	           (-wrist_extra_distance_meters * (angle - 1) +
 	            sqrt((angle - 1) * (angle * pow(wrist_extra_distance_meters, 2) - 2 * pow(hand_size, 2) +
 	                                pow(wrist_extra_distance_meters, 2)))) /
 	           (angle - 1);
 	float d2 = -(wrist_extra_distance_meters * (angle - 1) +
 	             sqrt((angle - 1) * (angle * pow(wrist_extra_distance_meters, 2) - 2 * pow(hand_size, 2) +
 	                                 pow(wrist_extra_distance_meters, 2)))) /
 	           (2 * angle - 2);
 	if (d1 > d2) {
 		return d1;
 	}
 	return d2;
 }
 bool
 hand_init_guess(one_frame_input &observation, const float hand_size, xrt_pose left_in_right, xrt_pose &out_wrist_guess)
 {
 	int num_observation_views = 0;
 	// We're actually going "forwards" in our transformations for once!
 	// For the right camera, instead of moving an estimation *into* right-camera-space and doing math in
 	// camera-space, we move everything into *left-camera-space*.
 	xrt_pose right_in_left;
 	math_pose_invert(&left_in_right, &right_in_left);
 	xrt_vec3 wrist_global_sum = {};
 	xrt_vec3 midpxm_global_sum = {};
 	xrt_vec3 indpxm_global_sum = {};
 	xrt_vec3 litpxm_global_sum = {};
 	for (int view = 0; view < 2; view++) {
 		one_frame_one_view &v = observation.views[view];
 		if (!v.active) {
 			continue;
 		}
 		num_observation_views++;
 		Eigen::Quaternionf rotate = map_quat(v.look_dir);
 		Eigen::Vector3f directions_rel_camera[21];
 		for (int i = 0; i < 21; i++) {
 			float sg_x = v.keypoints_in_scaled_stereographic[i].pos_2d.x * v.stereographic_radius;
 			float sg_y = v.keypoints_in_scaled_stereographic[i].pos_2d.y * v.stereographic_radius;
 			directions_rel_camera[i] = stereographic_unprojection(sg_x, sg_y);
 			directions_rel_camera[i] = rotate * directions_rel_camera[i];
 		}
 		xrt_vec3 midpxm_dir;
 		xrt_vec3 wrist_dir;
 		map_vec3(midpxm_dir) = directions_rel_camera[Joint21::MIDL_PXM];
 		map_vec3(wrist_dir) = directions_rel_camera[Joint21::WRIST];
 		float angle = cos(m_vec3_angle(midpxm_dir, wrist_dir));
 		float wrist_extra_distance_meters =
 		    v.keypoints_in_scaled_stereographic[0].depth_relative_to_midpxm * hand_size;
 		float distance = sympy_guess_distance(angle, wrist_extra_distance_meters, hand_size);
 		if (distance != distance) {
 			// Nan check.
 			// This happens if the angle between midpxm_dir and wrist_dir is 0, generally when
 			// refine_center_of_distribution fails hard enough. Generally not worth tracking hands when this
 			// happens.
 			return false;
 		}
 // I wish I could make this an inline unit test. Useful for debugging so I'll leave it disabled.
 #if 0
    Eigen::Vector3f wrist_rel_camera = directions_rel_camera[0] * (distance+wrist_extra_distance_meters);
    Eigen::Vector3f midpxm_rel_camera = directions_rel_camera[9] * distance;
    float norm = (wrist_rel_camera-midpxm_rel_camera).norm();
    U_LOG_E("Recovered: %f", norm);
 #endif
 		xrt_vec3 wrist_rel_camera = {};
 		xrt_vec3 midpxm_rel_camera = {};
 		xrt_vec3 indpxm_rel_camera = {};
 		xrt_vec3 litpxm_rel_camera = {};
 		map_vec3(wrist_rel_camera) =
 		    directions_rel_camera[Joint21::WRIST] * (distance + wrist_extra_distance_meters);
 		map_vec3(midpxm_rel_camera) = directions_rel_camera[Joint21::MIDL_PXM] * (distance);
 		map_vec3(indpxm_rel_camera) =
 		    directions_rel_camera[Joint21::INDX_PXM] *
 		    (distance +
 		     (v.keypoints_in_scaled_stereographic[Joint21::INDX_PXM].depth_relative_to_midpxm * hand_size));
 		map_vec3(litpxm_rel_camera) =
 		    directions_rel_camera[Joint21::LITL_PXM] *
 		    (distance +
 		     (v.keypoints_in_scaled_stereographic[Joint21::LITL_PXM].depth_relative_to_midpxm * hand_size));
 		if (view == 1) {
 			math_pose_transform_point(&right_in_left, &wrist_rel_camera, &wrist_rel_camera);
 			math_pose_transform_point(&right_in_left, &midpxm_rel_camera, &midpxm_rel_camera);
 			math_pose_transform_point(&right_in_left, &indpxm_rel_camera, &indpxm_rel_camera);
 			math_pose_transform_point(&right_in_left, &litpxm_rel_camera, &litpxm_rel_camera);
 		}
 		wrist_global_sum += wrist_rel_camera;
 		midpxm_global_sum += midpxm_rel_camera;
 		indpxm_global_sum += indpxm_rel_camera;
 		litpxm_global_sum += litpxm_rel_camera;
 	}
 	wrist_global_sum = m_vec3_mul_scalar(wrist_global_sum, 1.0f / (float)num_observation_views);
 	midpxm_global_sum = m_vec3_mul_scalar(midpxm_global_sum, 1.0f / (float)num_observation_views);
 	indpxm_global_sum = m_vec3_mul_scalar(indpxm_global_sum, 1.0f / (float)num_observation_views);
 	litpxm_global_sum = m_vec3_mul_scalar(wrist_global_sum, 1.0f / (float)num_observation_views);
 	out_wrist_guess.position = wrist_global_sum;
 #if 0
 	// Original
 	xrt_vec3 plus_z = m_vec3_normalize(wrist_global_sum - midpxm_global_sum);
 	xrt_vec3 plus_x = m_vec3_normalize(indpxm_global_sum - litpxm_global_sum);
 #elif 0
 	// Negated
 	xrt_vec3 plus_z = m_vec3_normalize(midpxm_global_sum - wrist_global_sum);
 	// Not negated
 	xrt_vec3 plus_x = m_vec3_normalize(indpxm_global_sum - litpxm_global_sum);
 #elif 0
 	// Both negated
 	xrt_vec3 plus_z = m_vec3_normalize(midpxm_global_sum - wrist_global_sum);
 	xrt_vec3 plus_x = m_vec3_normalize(litpxm_global_sum - indpxm_global_sum);
 #else
 	// Not negated
 	xrt_vec3 plus_z = m_vec3_normalize(wrist_global_sum - midpxm_global_sum);
 	// Negated
 	xrt_vec3 plus_x = m_vec3_normalize(litpxm_global_sum - indpxm_global_sum);
 #endif
 	plus_x = m_vec3_orthonormalize(plus_z, plus_x);
 	math_quat_from_plus_x_z(&plus_x, &plus_z, &out_wrist_guess.orientation);
 	return true;
 }
 } // namespace xrt::tracking::hand::mercury::lm
--- a/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.hpp
+++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.hpp
@ -0,0 +1,22 @@
 // Copyright 2022, Collabora, Ltd.
 // SPDX-License-Identifier: BSL-1.0
 /*!
 * @file
 * @brief Levenberg-Marquardt kinematic optimizer
 * @author Moses Turner <moses@collabora.com>
 * @ingroup tracking
 */
 #pragma once
 #include "math/m_api.h"
 #include "math/m_vec3.h"
 #include "lm_defines.hpp"
 namespace xrt::tracking::hand::mercury::lm {
 bool
 hand_init_guess(one_frame_input &observation, const float hand_size, xrt_pose left_in_right, xrt_pose &out_wrist_guess);
 } // namespace xrt::tracking::hand::mercury::lm
--- a/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp
+++ b/src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp
@ -26,6 +26,7 @@
 #include "lm_defines.hpp"
 #include "../hg_numerics_checker.hpp"
 #include "lm_hand_init_guesser.hpp"
 /*
@ -953,33 +954,6 @@ optimizer_finish(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand, float
 	out_reprojection_error = sum;
 }
 void
 hand_was_untracked(KinematicHandLM &state)
 {
 	state.first_frame = true;
 #if 0
 	state.last_frame_pre_rotation.w = 1.0;
 	state.last_frame_pre_rotation.x = 0.0;
 	state.last_frame_pre_rotation.y = 0.0;
 	state.last_frame_pre_rotation.z = 0.0;
 #else
 	// Rotated 90 degrees so that the palm is facing the user and the fingers are up.
 	// The _idea_ is that having the palm be "in" the camera's plane will reduce initial local minima due to flat
 	// things having multiple possible unprojections.
 	state.this_frame_pre_rotation.w = sqrt(2) / 2;
 	state.this_frame_pre_rotation.x = -sqrt(2) / 2;
 	state.this_frame_pre_rotation.y = 0.0;
 	state.this_frame_pre_rotation.z = 0.0;
 #endif
 	state.this_frame_pre_position.x = 0.0f;
 	state.this_frame_pre_position.y = 0.0f;
 	state.this_frame_pre_position.z = -0.3f;
 	OptimizerHandInit(state.last_frame, state.this_frame_pre_rotation);
 	OptimizerHandPackIntoVector(state.last_frame, state.optimize_hand_size, state.TinyOptimizerInput.data());
 }
 void
 optimizer_run(KinematicHandLM *hand,
              one_frame_input &observation,
@ -998,8 +972,31 @@ optimizer_run(KinematicHandLM *hand,
 	KinematicHandLM &state = *hand;
 	state.smoothing_factor = smoothing_factor;
 	xrt_pose blah = XRT_POSE_IDENTITY;
 	hand_init_guess(observation, target_hand_size, state.left_in_right, blah);
 	if (hand_was_untracked_last_frame) {
-		hand_was_untracked(state);
+		OptimizerHandInit(state.last_frame, state.this_frame_pre_rotation);
 		OptimizerHandPackIntoVector(state.last_frame, state.optimize_hand_size,
 		                            state.TinyOptimizerInput.data());
 		if (blah.position.z > 0.05) {
 			LM_WARN(state, "Initial position guess was too close to camera! Z axis was %f m",
 			        blah.position.z);
 			state.this_frame_pre_position.x = 0.0f;
 			state.this_frame_pre_position.y = 0.0f;
 			state.this_frame_pre_position.z = -0.3f;
 		} else {
 			state.this_frame_pre_position.x = blah.position.x;
 			state.this_frame_pre_position.y = blah.position.y;
 			state.this_frame_pre_position.z = blah.position.z;
 		}
 		state.this_frame_pre_rotation.x = blah.orientation.x;
 		state.this_frame_pre_rotation.y = blah.orientation.y;
 		state.this_frame_pre_rotation.z = blah.orientation.z;
 		state.this_frame_pre_rotation.w = blah.orientation.w;
 	}
 	state.num_observation_views = 0;
@ -1072,17 +1069,16 @@ optimizer_run(KinematicHandLM *hand,
-	// Postfix - unpack,
+	// Postfix - unpack our optimization result into state.last_frame.
 	OptimizerHandUnpackFromVector(state.TinyOptimizerInput.data(), state, state.last_frame);
-	// Squash the orientations
+	// Have the final pose from this frame be the next frame's initial pose
 	state.this_frame_pre_rotation = state.last_frame.wrist_final_orientation;
 	state.this_frame_pre_position = state.last_frame.wrist_final_location;
 	// Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're
 	// using sin/asin.
 	// Reset this frame's post-transform to identity
 	state.last_frame.wrist_post_location.x = 0.0f;
 	state.last_frame.wrist_post_location.y = 0.0f;
 	state.last_frame.wrist_post_location.z = 0.0f;
@ -1091,7 +1087,8 @@ optimizer_run(KinematicHandLM *hand,
 	state.last_frame.wrist_post_orientation_aax.y = 0.0f;
 	state.last_frame.wrist_post_orientation_aax.z = 0.0f;
-
+	// Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're
 	// using sin/asin.
 	OptimizerHandPackIntoVector(state.last_frame, hand->optimize_hand_size, state.TinyOptimizerInput.data());
 	optimizer_finish(state, out_viz_hand, out_reprojection_error);
@ -1122,9 +1119,6 @@ optimizer_create(xrt_pose left_in_right, bool is_right, u_logging_level log_leve
 	hand->left_in_right_orientation.y = left_in_right.orientation.y;
 	hand->left_in_right_orientation.z = left_in_right.orientation.z;
 	// Probably unnecessary.
 	hand_was_untracked(*hand);
 	*out_kinematic_hand = hand;
 }