h/mercury: Add wrist pose initialization guesser

This commit is contained in:
Moshi Turner 2023-03-06 23:53:50 -06:00 committed by Moses Turner
parent 3f2b71a9d5
commit 03546e0b99
4 changed files with 237 additions and 37 deletions

View file

@ -1,7 +1,10 @@
# Copyright 2022, Collabora, Ltd.
# SPDX-License-Identifier: BSL-1.0
add_library(t_ht_mercury_kine_lm STATIC lm_interface.hpp lm_main.cpp)
add_library(
t_ht_mercury_kine_lm STATIC lm_interface.hpp lm_main.cpp lm_hand_init_guesser.hpp
lm_hand_init_guesser.cpp
)
target_link_libraries(
t_ht_mercury_kine_lm

View file

@ -0,0 +1,181 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Levenberg-Marquardt kinematic optimizer
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#include "math/m_api.h"
#include "math/m_vec3.h"
#include "math/m_eigen_interop.hpp"
#include "os/os_time.h"
#include "util/u_logging.h"
#include "util/u_misc.h"
#include "util/u_trace_marker.h"
#include "tinyceres/tiny_solver.hpp"
#include "tinyceres/tiny_solver_autodiff_function.hpp"
#include "lm_rotations.inl"
#include <iostream>
#include <cmath>
#include <random>
#include "lm_interface.hpp"
#include "lm_optimizer_params_packer.inl"
#include "lm_defines.hpp"
#include "../hg_numerics_checker.hpp"
#include "../hg_stereographic_unprojection.hpp"
namespace xrt::tracking::hand::mercury::lm {
using namespace xrt::auxiliary::math;
// Generated from mercury_train's hand_depth_guess.ipynb.
float
sympy_guess_distance(float angle, float wrist_extra_distance_meters, float hand_size)
{
float d1 = (1.0 / 2.0) *
(-wrist_extra_distance_meters * (angle - 1) +
sqrt((angle - 1) * (angle * pow(wrist_extra_distance_meters, 2) - 2 * pow(hand_size, 2) +
pow(wrist_extra_distance_meters, 2)))) /
(angle - 1);
float d2 = -(wrist_extra_distance_meters * (angle - 1) +
sqrt((angle - 1) * (angle * pow(wrist_extra_distance_meters, 2) - 2 * pow(hand_size, 2) +
pow(wrist_extra_distance_meters, 2)))) /
(2 * angle - 2);
if (d1 > d2) {
return d1;
}
return d2;
}
bool
hand_init_guess(one_frame_input &observation, const float hand_size, xrt_pose left_in_right, xrt_pose &out_wrist_guess)
{
int num_observation_views = 0;
// We're actually going "forwards" in our transformations for once!
// For the right camera, instead of moving an estimation *into* right-camera-space and doing math in
// camera-space, we move everything into *left-camera-space*.
xrt_pose right_in_left;
math_pose_invert(&left_in_right, &right_in_left);
xrt_vec3 wrist_global_sum = {};
xrt_vec3 midpxm_global_sum = {};
xrt_vec3 indpxm_global_sum = {};
xrt_vec3 litpxm_global_sum = {};
for (int view = 0; view < 2; view++) {
one_frame_one_view &v = observation.views[view];
if (!v.active) {
continue;
}
num_observation_views++;
Eigen::Quaternionf rotate = map_quat(v.look_dir);
Eigen::Vector3f directions_rel_camera[21];
for (int i = 0; i < 21; i++) {
float sg_x = v.keypoints_in_scaled_stereographic[i].pos_2d.x * v.stereographic_radius;
float sg_y = v.keypoints_in_scaled_stereographic[i].pos_2d.y * v.stereographic_radius;
directions_rel_camera[i] = stereographic_unprojection(sg_x, sg_y);
directions_rel_camera[i] = rotate * directions_rel_camera[i];
}
xrt_vec3 midpxm_dir;
xrt_vec3 wrist_dir;
map_vec3(midpxm_dir) = directions_rel_camera[Joint21::MIDL_PXM];
map_vec3(wrist_dir) = directions_rel_camera[Joint21::WRIST];
float angle = cos(m_vec3_angle(midpxm_dir, wrist_dir));
float wrist_extra_distance_meters =
v.keypoints_in_scaled_stereographic[0].depth_relative_to_midpxm * hand_size;
float distance = sympy_guess_distance(angle, wrist_extra_distance_meters, hand_size);
if (distance != distance) {
// Nan check.
// This happens if the angle between midpxm_dir and wrist_dir is 0, generally when
// refine_center_of_distribution fails hard enough. Generally not worth tracking hands when this
// happens.
return false;
}
// I wish I could make this an inline unit test. Useful for debugging so I'll leave it disabled.
#if 0
Eigen::Vector3f wrist_rel_camera = directions_rel_camera[0] * (distance+wrist_extra_distance_meters);
Eigen::Vector3f midpxm_rel_camera = directions_rel_camera[9] * distance;
float norm = (wrist_rel_camera-midpxm_rel_camera).norm();
U_LOG_E("Recovered: %f", norm);
#endif
xrt_vec3 wrist_rel_camera = {};
xrt_vec3 midpxm_rel_camera = {};
xrt_vec3 indpxm_rel_camera = {};
xrt_vec3 litpxm_rel_camera = {};
map_vec3(wrist_rel_camera) =
directions_rel_camera[Joint21::WRIST] * (distance + wrist_extra_distance_meters);
map_vec3(midpxm_rel_camera) = directions_rel_camera[Joint21::MIDL_PXM] * (distance);
map_vec3(indpxm_rel_camera) =
directions_rel_camera[Joint21::INDX_PXM] *
(distance +
(v.keypoints_in_scaled_stereographic[Joint21::INDX_PXM].depth_relative_to_midpxm * hand_size));
map_vec3(litpxm_rel_camera) =
directions_rel_camera[Joint21::LITL_PXM] *
(distance +
(v.keypoints_in_scaled_stereographic[Joint21::LITL_PXM].depth_relative_to_midpxm * hand_size));
if (view == 1) {
math_pose_transform_point(&right_in_left, &wrist_rel_camera, &wrist_rel_camera);
math_pose_transform_point(&right_in_left, &midpxm_rel_camera, &midpxm_rel_camera);
math_pose_transform_point(&right_in_left, &indpxm_rel_camera, &indpxm_rel_camera);
math_pose_transform_point(&right_in_left, &litpxm_rel_camera, &litpxm_rel_camera);
}
wrist_global_sum += wrist_rel_camera;
midpxm_global_sum += midpxm_rel_camera;
indpxm_global_sum += indpxm_rel_camera;
litpxm_global_sum += litpxm_rel_camera;
}
wrist_global_sum = m_vec3_mul_scalar(wrist_global_sum, 1.0f / (float)num_observation_views);
midpxm_global_sum = m_vec3_mul_scalar(midpxm_global_sum, 1.0f / (float)num_observation_views);
indpxm_global_sum = m_vec3_mul_scalar(indpxm_global_sum, 1.0f / (float)num_observation_views);
litpxm_global_sum = m_vec3_mul_scalar(wrist_global_sum, 1.0f / (float)num_observation_views);
out_wrist_guess.position = wrist_global_sum;
#if 0
// Original
xrt_vec3 plus_z = m_vec3_normalize(wrist_global_sum - midpxm_global_sum);
xrt_vec3 plus_x = m_vec3_normalize(indpxm_global_sum - litpxm_global_sum);
#elif 0
// Negated
xrt_vec3 plus_z = m_vec3_normalize(midpxm_global_sum - wrist_global_sum);
// Not negated
xrt_vec3 plus_x = m_vec3_normalize(indpxm_global_sum - litpxm_global_sum);
#elif 0
// Both negated
xrt_vec3 plus_z = m_vec3_normalize(midpxm_global_sum - wrist_global_sum);
xrt_vec3 plus_x = m_vec3_normalize(litpxm_global_sum - indpxm_global_sum);
#else
// Not negated
xrt_vec3 plus_z = m_vec3_normalize(wrist_global_sum - midpxm_global_sum);
// Negated
xrt_vec3 plus_x = m_vec3_normalize(litpxm_global_sum - indpxm_global_sum);
#endif
plus_x = m_vec3_orthonormalize(plus_z, plus_x);
math_quat_from_plus_x_z(&plus_x, &plus_z, &out_wrist_guess.orientation);
return true;
}
} // namespace xrt::tracking::hand::mercury::lm

View file

@ -0,0 +1,22 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Levenberg-Marquardt kinematic optimizer
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#pragma once
#include "math/m_api.h"
#include "math/m_vec3.h"
#include "lm_defines.hpp"
namespace xrt::tracking::hand::mercury::lm {
bool
hand_init_guess(one_frame_input &observation, const float hand_size, xrt_pose left_in_right, xrt_pose &out_wrist_guess);
} // namespace xrt::tracking::hand::mercury::lm

View file

@ -26,6 +26,7 @@
#include "lm_defines.hpp"
#include "../hg_numerics_checker.hpp"
#include "lm_hand_init_guesser.hpp"
/*
@ -953,33 +954,6 @@ optimizer_finish(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand, float
out_reprojection_error = sum;
}
void
hand_was_untracked(KinematicHandLM &state)
{
state.first_frame = true;
#if 0
state.last_frame_pre_rotation.w = 1.0;
state.last_frame_pre_rotation.x = 0.0;
state.last_frame_pre_rotation.y = 0.0;
state.last_frame_pre_rotation.z = 0.0;
#else
// Rotated 90 degrees so that the palm is facing the user and the fingers are up.
// The _idea_ is that having the palm be "in" the camera's plane will reduce initial local minima due to flat
// things having multiple possible unprojections.
state.this_frame_pre_rotation.w = sqrt(2) / 2;
state.this_frame_pre_rotation.x = -sqrt(2) / 2;
state.this_frame_pre_rotation.y = 0.0;
state.this_frame_pre_rotation.z = 0.0;
#endif
state.this_frame_pre_position.x = 0.0f;
state.this_frame_pre_position.y = 0.0f;
state.this_frame_pre_position.z = -0.3f;
OptimizerHandInit(state.last_frame, state.this_frame_pre_rotation);
OptimizerHandPackIntoVector(state.last_frame, state.optimize_hand_size, state.TinyOptimizerInput.data());
}
void
optimizer_run(KinematicHandLM *hand,
one_frame_input &observation,
@ -998,8 +972,31 @@ optimizer_run(KinematicHandLM *hand,
KinematicHandLM &state = *hand;
state.smoothing_factor = smoothing_factor;
xrt_pose blah = XRT_POSE_IDENTITY;
hand_init_guess(observation, target_hand_size, state.left_in_right, blah);
if (hand_was_untracked_last_frame) {
hand_was_untracked(state);
OptimizerHandInit(state.last_frame, state.this_frame_pre_rotation);
OptimizerHandPackIntoVector(state.last_frame, state.optimize_hand_size,
state.TinyOptimizerInput.data());
if (blah.position.z > 0.05) {
LM_WARN(state, "Initial position guess was too close to camera! Z axis was %f m",
blah.position.z);
state.this_frame_pre_position.x = 0.0f;
state.this_frame_pre_position.y = 0.0f;
state.this_frame_pre_position.z = -0.3f;
} else {
state.this_frame_pre_position.x = blah.position.x;
state.this_frame_pre_position.y = blah.position.y;
state.this_frame_pre_position.z = blah.position.z;
}
state.this_frame_pre_rotation.x = blah.orientation.x;
state.this_frame_pre_rotation.y = blah.orientation.y;
state.this_frame_pre_rotation.z = blah.orientation.z;
state.this_frame_pre_rotation.w = blah.orientation.w;
}
state.num_observation_views = 0;
@ -1072,17 +1069,16 @@ optimizer_run(KinematicHandLM *hand,
// Postfix - unpack,
// Postfix - unpack our optimization result into state.last_frame.
OptimizerHandUnpackFromVector(state.TinyOptimizerInput.data(), state, state.last_frame);
// Squash the orientations
// Have the final pose from this frame be the next frame's initial pose
state.this_frame_pre_rotation = state.last_frame.wrist_final_orientation;
state.this_frame_pre_position = state.last_frame.wrist_final_location;
// Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're
// using sin/asin.
// Reset this frame's post-transform to identity
state.last_frame.wrist_post_location.x = 0.0f;
state.last_frame.wrist_post_location.y = 0.0f;
state.last_frame.wrist_post_location.z = 0.0f;
@ -1091,7 +1087,8 @@ optimizer_run(KinematicHandLM *hand,
state.last_frame.wrist_post_orientation_aax.y = 0.0f;
state.last_frame.wrist_post_orientation_aax.z = 0.0f;
// Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're
// using sin/asin.
OptimizerHandPackIntoVector(state.last_frame, hand->optimize_hand_size, state.TinyOptimizerInput.data());
optimizer_finish(state, out_viz_hand, out_reprojection_error);
@ -1122,9 +1119,6 @@ optimizer_create(xrt_pose left_in_right, bool is_right, u_logging_level log_leve
hand->left_in_right_orientation.y = left_in_right.orientation.y;
hand->left_in_right_orientation.z = left_in_right.orientation.z;
// Probably unnecessary.
hand_was_untracked(*hand);
*out_kinematic_hand = hand;
}