mirror of
https://gitlab.freedesktop.org/monado/monado.git
synced 2024-12-29 11:06:18 +00:00
h/mercury: Add wrist pose initialization guesser
This commit is contained in:
parent
3f2b71a9d5
commit
03546e0b99
|
@ -1,7 +1,10 @@
|
|||
# Copyright 2022, Collabora, Ltd.
|
||||
# SPDX-License-Identifier: BSL-1.0
|
||||
|
||||
add_library(t_ht_mercury_kine_lm STATIC lm_interface.hpp lm_main.cpp)
|
||||
add_library(
|
||||
t_ht_mercury_kine_lm STATIC lm_interface.hpp lm_main.cpp lm_hand_init_guesser.hpp
|
||||
lm_hand_init_guesser.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
t_ht_mercury_kine_lm
|
||||
|
|
181
src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.cpp
Normal file
181
src/xrt/tracking/hand/mercury/kine_lm/lm_hand_init_guesser.cpp
Normal file
|
@ -0,0 +1,181 @@
|
|||
// Copyright 2022, Collabora, Ltd.
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
/*!
|
||||
* @file
|
||||
* @brief Levenberg-Marquardt kinematic optimizer
|
||||
* @author Moses Turner <moses@collabora.com>
|
||||
* @ingroup tracking
|
||||
*/
|
||||
|
||||
#include "math/m_api.h"
|
||||
#include "math/m_vec3.h"
|
||||
#include "math/m_eigen_interop.hpp"
|
||||
#include "os/os_time.h"
|
||||
#include "util/u_logging.h"
|
||||
#include "util/u_misc.h"
|
||||
#include "util/u_trace_marker.h"
|
||||
|
||||
#include "tinyceres/tiny_solver.hpp"
|
||||
#include "tinyceres/tiny_solver_autodiff_function.hpp"
|
||||
#include "lm_rotations.inl"
|
||||
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <random>
|
||||
#include "lm_interface.hpp"
|
||||
#include "lm_optimizer_params_packer.inl"
|
||||
#include "lm_defines.hpp"
|
||||
|
||||
#include "../hg_numerics_checker.hpp"
|
||||
#include "../hg_stereographic_unprojection.hpp"
|
||||
|
||||
namespace xrt::tracking::hand::mercury::lm {
|
||||
using namespace xrt::auxiliary::math;
|
||||
|
||||
// Generated from mercury_train's hand_depth_guess.ipynb.
|
||||
float
|
||||
sympy_guess_distance(float angle, float wrist_extra_distance_meters, float hand_size)
|
||||
{
|
||||
float d1 = (1.0 / 2.0) *
|
||||
(-wrist_extra_distance_meters * (angle - 1) +
|
||||
sqrt((angle - 1) * (angle * pow(wrist_extra_distance_meters, 2) - 2 * pow(hand_size, 2) +
|
||||
pow(wrist_extra_distance_meters, 2)))) /
|
||||
(angle - 1);
|
||||
float d2 = -(wrist_extra_distance_meters * (angle - 1) +
|
||||
sqrt((angle - 1) * (angle * pow(wrist_extra_distance_meters, 2) - 2 * pow(hand_size, 2) +
|
||||
pow(wrist_extra_distance_meters, 2)))) /
|
||||
(2 * angle - 2);
|
||||
|
||||
if (d1 > d2) {
|
||||
return d1;
|
||||
}
|
||||
return d2;
|
||||
}
|
||||
|
||||
bool
|
||||
hand_init_guess(one_frame_input &observation, const float hand_size, xrt_pose left_in_right, xrt_pose &out_wrist_guess)
|
||||
{
|
||||
int num_observation_views = 0;
|
||||
|
||||
|
||||
// We're actually going "forwards" in our transformations for once!
|
||||
// For the right camera, instead of moving an estimation *into* right-camera-space and doing math in
|
||||
// camera-space, we move everything into *left-camera-space*.
|
||||
xrt_pose right_in_left;
|
||||
math_pose_invert(&left_in_right, &right_in_left);
|
||||
|
||||
xrt_vec3 wrist_global_sum = {};
|
||||
xrt_vec3 midpxm_global_sum = {};
|
||||
xrt_vec3 indpxm_global_sum = {};
|
||||
xrt_vec3 litpxm_global_sum = {};
|
||||
|
||||
for (int view = 0; view < 2; view++) {
|
||||
one_frame_one_view &v = observation.views[view];
|
||||
if (!v.active) {
|
||||
continue;
|
||||
}
|
||||
num_observation_views++;
|
||||
|
||||
Eigen::Quaternionf rotate = map_quat(v.look_dir);
|
||||
Eigen::Vector3f directions_rel_camera[21];
|
||||
|
||||
for (int i = 0; i < 21; i++) {
|
||||
float sg_x = v.keypoints_in_scaled_stereographic[i].pos_2d.x * v.stereographic_radius;
|
||||
float sg_y = v.keypoints_in_scaled_stereographic[i].pos_2d.y * v.stereographic_radius;
|
||||
directions_rel_camera[i] = stereographic_unprojection(sg_x, sg_y);
|
||||
directions_rel_camera[i] = rotate * directions_rel_camera[i];
|
||||
}
|
||||
|
||||
xrt_vec3 midpxm_dir;
|
||||
xrt_vec3 wrist_dir;
|
||||
map_vec3(midpxm_dir) = directions_rel_camera[Joint21::MIDL_PXM];
|
||||
map_vec3(wrist_dir) = directions_rel_camera[Joint21::WRIST];
|
||||
|
||||
float angle = cos(m_vec3_angle(midpxm_dir, wrist_dir));
|
||||
float wrist_extra_distance_meters =
|
||||
v.keypoints_in_scaled_stereographic[0].depth_relative_to_midpxm * hand_size;
|
||||
|
||||
float distance = sympy_guess_distance(angle, wrist_extra_distance_meters, hand_size);
|
||||
|
||||
if (distance != distance) {
|
||||
// Nan check.
|
||||
// This happens if the angle between midpxm_dir and wrist_dir is 0, generally when
|
||||
// refine_center_of_distribution fails hard enough. Generally not worth tracking hands when this
|
||||
// happens.
|
||||
return false;
|
||||
}
|
||||
|
||||
// I wish I could make this an inline unit test. Useful for debugging so I'll leave it disabled.
|
||||
#if 0
|
||||
Eigen::Vector3f wrist_rel_camera = directions_rel_camera[0] * (distance+wrist_extra_distance_meters);
|
||||
Eigen::Vector3f midpxm_rel_camera = directions_rel_camera[9] * distance;
|
||||
|
||||
float norm = (wrist_rel_camera-midpxm_rel_camera).norm();
|
||||
U_LOG_E("Recovered: %f", norm);
|
||||
#endif
|
||||
|
||||
xrt_vec3 wrist_rel_camera = {};
|
||||
xrt_vec3 midpxm_rel_camera = {};
|
||||
xrt_vec3 indpxm_rel_camera = {};
|
||||
xrt_vec3 litpxm_rel_camera = {};
|
||||
|
||||
map_vec3(wrist_rel_camera) =
|
||||
directions_rel_camera[Joint21::WRIST] * (distance + wrist_extra_distance_meters);
|
||||
map_vec3(midpxm_rel_camera) = directions_rel_camera[Joint21::MIDL_PXM] * (distance);
|
||||
map_vec3(indpxm_rel_camera) =
|
||||
directions_rel_camera[Joint21::INDX_PXM] *
|
||||
(distance +
|
||||
(v.keypoints_in_scaled_stereographic[Joint21::INDX_PXM].depth_relative_to_midpxm * hand_size));
|
||||
map_vec3(litpxm_rel_camera) =
|
||||
directions_rel_camera[Joint21::LITL_PXM] *
|
||||
(distance +
|
||||
(v.keypoints_in_scaled_stereographic[Joint21::LITL_PXM].depth_relative_to_midpxm * hand_size));
|
||||
|
||||
if (view == 1) {
|
||||
math_pose_transform_point(&right_in_left, &wrist_rel_camera, &wrist_rel_camera);
|
||||
math_pose_transform_point(&right_in_left, &midpxm_rel_camera, &midpxm_rel_camera);
|
||||
math_pose_transform_point(&right_in_left, &indpxm_rel_camera, &indpxm_rel_camera);
|
||||
math_pose_transform_point(&right_in_left, &litpxm_rel_camera, &litpxm_rel_camera);
|
||||
}
|
||||
|
||||
wrist_global_sum += wrist_rel_camera;
|
||||
midpxm_global_sum += midpxm_rel_camera;
|
||||
indpxm_global_sum += indpxm_rel_camera;
|
||||
litpxm_global_sum += litpxm_rel_camera;
|
||||
}
|
||||
|
||||
wrist_global_sum = m_vec3_mul_scalar(wrist_global_sum, 1.0f / (float)num_observation_views);
|
||||
midpxm_global_sum = m_vec3_mul_scalar(midpxm_global_sum, 1.0f / (float)num_observation_views);
|
||||
indpxm_global_sum = m_vec3_mul_scalar(indpxm_global_sum, 1.0f / (float)num_observation_views);
|
||||
litpxm_global_sum = m_vec3_mul_scalar(wrist_global_sum, 1.0f / (float)num_observation_views);
|
||||
|
||||
out_wrist_guess.position = wrist_global_sum;
|
||||
|
||||
#if 0
|
||||
// Original
|
||||
xrt_vec3 plus_z = m_vec3_normalize(wrist_global_sum - midpxm_global_sum);
|
||||
xrt_vec3 plus_x = m_vec3_normalize(indpxm_global_sum - litpxm_global_sum);
|
||||
|
||||
#elif 0
|
||||
// Negated
|
||||
xrt_vec3 plus_z = m_vec3_normalize(midpxm_global_sum - wrist_global_sum);
|
||||
// Not negated
|
||||
xrt_vec3 plus_x = m_vec3_normalize(indpxm_global_sum - litpxm_global_sum);
|
||||
#elif 0
|
||||
// Both negated
|
||||
xrt_vec3 plus_z = m_vec3_normalize(midpxm_global_sum - wrist_global_sum);
|
||||
xrt_vec3 plus_x = m_vec3_normalize(litpxm_global_sum - indpxm_global_sum);
|
||||
|
||||
#else
|
||||
// Not negated
|
||||
xrt_vec3 plus_z = m_vec3_normalize(wrist_global_sum - midpxm_global_sum);
|
||||
// Negated
|
||||
xrt_vec3 plus_x = m_vec3_normalize(litpxm_global_sum - indpxm_global_sum);
|
||||
#endif
|
||||
|
||||
plus_x = m_vec3_orthonormalize(plus_z, plus_x);
|
||||
math_quat_from_plus_x_z(&plus_x, &plus_z, &out_wrist_guess.orientation);
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace xrt::tracking::hand::mercury::lm
|
|
@ -0,0 +1,22 @@
|
|||
// Copyright 2022, Collabora, Ltd.
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
/*!
|
||||
* @file
|
||||
* @brief Levenberg-Marquardt kinematic optimizer
|
||||
* @author Moses Turner <moses@collabora.com>
|
||||
* @ingroup tracking
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "math/m_api.h"
|
||||
#include "math/m_vec3.h"
|
||||
|
||||
#include "lm_defines.hpp"
|
||||
|
||||
|
||||
namespace xrt::tracking::hand::mercury::lm {
|
||||
|
||||
bool
|
||||
hand_init_guess(one_frame_input &observation, const float hand_size, xrt_pose left_in_right, xrt_pose &out_wrist_guess);
|
||||
|
||||
} // namespace xrt::tracking::hand::mercury::lm
|
|
@ -26,6 +26,7 @@
|
|||
#include "lm_defines.hpp"
|
||||
|
||||
#include "../hg_numerics_checker.hpp"
|
||||
#include "lm_hand_init_guesser.hpp"
|
||||
|
||||
/*
|
||||
|
||||
|
@ -953,33 +954,6 @@ optimizer_finish(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand, float
|
|||
out_reprojection_error = sum;
|
||||
}
|
||||
|
||||
void
|
||||
hand_was_untracked(KinematicHandLM &state)
|
||||
{
|
||||
state.first_frame = true;
|
||||
#if 0
|
||||
state.last_frame_pre_rotation.w = 1.0;
|
||||
state.last_frame_pre_rotation.x = 0.0;
|
||||
state.last_frame_pre_rotation.y = 0.0;
|
||||
state.last_frame_pre_rotation.z = 0.0;
|
||||
#else
|
||||
// Rotated 90 degrees so that the palm is facing the user and the fingers are up.
|
||||
// The _idea_ is that having the palm be "in" the camera's plane will reduce initial local minima due to flat
|
||||
// things having multiple possible unprojections.
|
||||
state.this_frame_pre_rotation.w = sqrt(2) / 2;
|
||||
state.this_frame_pre_rotation.x = -sqrt(2) / 2;
|
||||
state.this_frame_pre_rotation.y = 0.0;
|
||||
state.this_frame_pre_rotation.z = 0.0;
|
||||
#endif
|
||||
|
||||
state.this_frame_pre_position.x = 0.0f;
|
||||
state.this_frame_pre_position.y = 0.0f;
|
||||
state.this_frame_pre_position.z = -0.3f;
|
||||
|
||||
OptimizerHandInit(state.last_frame, state.this_frame_pre_rotation);
|
||||
OptimizerHandPackIntoVector(state.last_frame, state.optimize_hand_size, state.TinyOptimizerInput.data());
|
||||
}
|
||||
|
||||
void
|
||||
optimizer_run(KinematicHandLM *hand,
|
||||
one_frame_input &observation,
|
||||
|
@ -998,8 +972,31 @@ optimizer_run(KinematicHandLM *hand,
|
|||
KinematicHandLM &state = *hand;
|
||||
state.smoothing_factor = smoothing_factor;
|
||||
|
||||
xrt_pose blah = XRT_POSE_IDENTITY;
|
||||
hand_init_guess(observation, target_hand_size, state.left_in_right, blah);
|
||||
|
||||
if (hand_was_untracked_last_frame) {
|
||||
hand_was_untracked(state);
|
||||
OptimizerHandInit(state.last_frame, state.this_frame_pre_rotation);
|
||||
OptimizerHandPackIntoVector(state.last_frame, state.optimize_hand_size,
|
||||
state.TinyOptimizerInput.data());
|
||||
if (blah.position.z > 0.05) {
|
||||
LM_WARN(state, "Initial position guess was too close to camera! Z axis was %f m",
|
||||
blah.position.z);
|
||||
state.this_frame_pre_position.x = 0.0f;
|
||||
state.this_frame_pre_position.y = 0.0f;
|
||||
state.this_frame_pre_position.z = -0.3f;
|
||||
} else {
|
||||
state.this_frame_pre_position.x = blah.position.x;
|
||||
state.this_frame_pre_position.y = blah.position.y;
|
||||
state.this_frame_pre_position.z = blah.position.z;
|
||||
}
|
||||
|
||||
|
||||
|
||||
state.this_frame_pre_rotation.x = blah.orientation.x;
|
||||
state.this_frame_pre_rotation.y = blah.orientation.y;
|
||||
state.this_frame_pre_rotation.z = blah.orientation.z;
|
||||
state.this_frame_pre_rotation.w = blah.orientation.w;
|
||||
}
|
||||
|
||||
state.num_observation_views = 0;
|
||||
|
@ -1072,17 +1069,16 @@ optimizer_run(KinematicHandLM *hand,
|
|||
|
||||
|
||||
|
||||
// Postfix - unpack,
|
||||
// Postfix - unpack our optimization result into state.last_frame.
|
||||
OptimizerHandUnpackFromVector(state.TinyOptimizerInput.data(), state, state.last_frame);
|
||||
|
||||
|
||||
|
||||
// Squash the orientations
|
||||
// Have the final pose from this frame be the next frame's initial pose
|
||||
state.this_frame_pre_rotation = state.last_frame.wrist_final_orientation;
|
||||
state.this_frame_pre_position = state.last_frame.wrist_final_location;
|
||||
// Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're
|
||||
// using sin/asin.
|
||||
|
||||
// Reset this frame's post-transform to identity
|
||||
state.last_frame.wrist_post_location.x = 0.0f;
|
||||
state.last_frame.wrist_post_location.y = 0.0f;
|
||||
state.last_frame.wrist_post_location.z = 0.0f;
|
||||
|
@ -1091,7 +1087,8 @@ optimizer_run(KinematicHandLM *hand,
|
|||
state.last_frame.wrist_post_orientation_aax.y = 0.0f;
|
||||
state.last_frame.wrist_post_orientation_aax.z = 0.0f;
|
||||
|
||||
|
||||
// Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're
|
||||
// using sin/asin.
|
||||
OptimizerHandPackIntoVector(state.last_frame, hand->optimize_hand_size, state.TinyOptimizerInput.data());
|
||||
|
||||
optimizer_finish(state, out_viz_hand, out_reprojection_error);
|
||||
|
@ -1122,9 +1119,6 @@ optimizer_create(xrt_pose left_in_right, bool is_right, u_logging_level log_leve
|
|||
hand->left_in_right_orientation.y = left_in_right.orientation.y;
|
||||
hand->left_in_right_orientation.z = left_in_right.orientation.z;
|
||||
|
||||
// Probably unnecessary.
|
||||
hand_was_untracked(*hand);
|
||||
|
||||
*out_kinematic_hand = hand;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue