mirror of
https://gitlab.freedesktop.org/monado/monado.git
synced 2025-01-27 09:01:46 +00:00
h/mercury: Update hand tracking with new half-artificial model
This commit is contained in:
parent
bfdeaa7d8f
commit
e9f79c45bf
|
@ -24,6 +24,25 @@ target_link_libraries(
|
|||
ONNXRuntime::ONNXRuntime
|
||||
)
|
||||
|
||||
add_library(t_ht_mercury_distorter STATIC hg_image_distorter.cpp)
|
||||
|
||||
target_link_libraries(t_ht_mercury_distorter PRIVATE aux_math aux_tracking aux_os aux_util)
|
||||
|
||||
target_include_directories(
|
||||
t_ht_mercury_distorter SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
t_ht_mercury_distorter
|
||||
PRIVATE
|
||||
aux_math
|
||||
aux_tracking
|
||||
aux_os
|
||||
aux_util
|
||||
${OpenCV_LIBRARIES}
|
||||
ONNXRuntime::ONNXRuntime # no, wrong
|
||||
)
|
||||
|
||||
add_library(t_ht_mercury STATIC hg_sync.cpp hg_sync.hpp hg_interface.h kine_common.hpp)
|
||||
|
||||
target_link_libraries(
|
||||
|
@ -37,7 +56,7 @@ target_link_libraries(
|
|||
ONNXRuntime::ONNXRuntime
|
||||
t_ht_mercury_kine_lm
|
||||
t_ht_mercury_model
|
||||
# ncnn # Soon...
|
||||
t_ht_mercury_distorter
|
||||
${OpenCV_LIBRARIES}
|
||||
)
|
||||
|
||||
|
|
615
src/xrt/tracking/hand/mercury/hg_image_distorter.cpp
Normal file
615
src/xrt/tracking/hand/mercury/hg_image_distorter.cpp
Normal file
|
@ -0,0 +1,615 @@
|
|||
// Copyright 2021-2022, Collabora, Ltd.
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
/*!
|
||||
* @file
|
||||
* @brief Utility to do batch stereographic projections of images.
|
||||
* @author Moses Turner <moses@collabora.com>
|
||||
* @ingroup tracking
|
||||
*/
|
||||
|
||||
#include <cmath>
|
||||
#include <opencv2/core.hpp>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include "math/m_vec3.h"
|
||||
#include "math/m_vec2.h"
|
||||
|
||||
#include "util/u_time.h"
|
||||
|
||||
#include "xrt/xrt_defines.h"
|
||||
#include "math/m_space.h"
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include "os/os_time.h"
|
||||
#include "util/u_logging.h"
|
||||
#include "tracking/t_tracking.h"
|
||||
|
||||
#include "tracking/t_calibration_opencv.hpp"
|
||||
#include <iostream>
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
//!@todo This is the wrong way of doing it; we should probably be allocating some space up front for each
|
||||
//! keypoint estimator to scratch into. Patches welcome!
|
||||
#define EIGEN_STACK_ALLOCATION_LIMIT 128 * 128 * 4 * 3
|
||||
|
||||
#include "math/m_eigen_interop.hpp"
|
||||
#include "hg_sync.hpp"
|
||||
|
||||
namespace xrt::tracking::hand::mercury {
|
||||
|
||||
constexpr int wsize = 128;
|
||||
|
||||
template <typename T> using OutputSizedArray = Eigen::Array<T, wsize, wsize, Eigen::RowMajor>;
|
||||
using OutputSizedFloatArray = OutputSizedArray<float>;
|
||||
|
||||
struct projection_state
|
||||
{
|
||||
cv::Mat &input;
|
||||
Eigen::Map<OutputSizedArray<uint8_t>> distorted_image_eigen;
|
||||
t_camera_model_params dist;
|
||||
|
||||
const projection_instructions &instructions;
|
||||
|
||||
projection_state(const projection_instructions &instructions, cv::Mat &input, cv::Mat &output)
|
||||
: input(input), distorted_image_eigen(output.data, 128, 128), instructions(instructions){};
|
||||
};
|
||||
|
||||
|
||||
// A private, purpose-optimized version of the Kannalla-Brandt projection function.
|
||||
static void
|
||||
project_kb4(const t_camera_model_params &dist, //
|
||||
const OutputSizedFloatArray &x, //
|
||||
const OutputSizedFloatArray &y, //
|
||||
const OutputSizedFloatArray &z, //
|
||||
OutputSizedFloatArray &out_x, //
|
||||
OutputSizedFloatArray &out_y)
|
||||
{
|
||||
|
||||
const OutputSizedFloatArray r2 = x * x + y * y;
|
||||
const OutputSizedFloatArray r = sqrt(r2);
|
||||
|
||||
#if 0
|
||||
const I theta = atan2(r, z);
|
||||
#else
|
||||
// This works here but will not work in eg. a nonlinear optimizer, or for more general applications.
|
||||
// Takes about 200us off the runtime.
|
||||
// Basically:
|
||||
// * We can be sure that z won't be negative only because previous hand tracking code checks this for
|
||||
// us.
|
||||
// * x,y,z is normalized so we don't have to worry about numerical stability.
|
||||
// If neither of these were true we'd definitely need atan2.
|
||||
//
|
||||
// Grrr, we really need a good library for fast approximations of trigonometric functions.
|
||||
|
||||
const OutputSizedFloatArray theta = atan(r / z);
|
||||
#endif
|
||||
|
||||
const OutputSizedFloatArray theta2 = theta * theta;
|
||||
|
||||
|
||||
#if 0
|
||||
I r_theta = dist.fisheye.k4 * theta2;
|
||||
r_theta += dist.fisheye.k3;
|
||||
r_theta *= theta2;
|
||||
r_theta += dist.fisheye.k2;
|
||||
r_theta *= theta2;
|
||||
r_theta += dist.fisheye.k1;
|
||||
r_theta *= theta2;
|
||||
r_theta += 1;
|
||||
r_theta *= theta;
|
||||
#else
|
||||
// This version gives the compiler more options to do FMAs and avoid temporaries. Down to floating point
|
||||
// precision this should give the same result as the above.
|
||||
OutputSizedFloatArray r_theta =
|
||||
(((((dist.fisheye.k4 * theta2) + dist.fisheye.k3) * theta2 + dist.fisheye.k2) * theta2 + dist.fisheye.k1) *
|
||||
theta2 +
|
||||
1) *
|
||||
theta;
|
||||
#endif
|
||||
|
||||
const OutputSizedFloatArray mx = x * r_theta / r;
|
||||
const OutputSizedFloatArray my = y * r_theta / r;
|
||||
|
||||
out_x = dist.fx * mx + dist.cx;
|
||||
out_y = dist.fy * my + dist.cy;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Eigen::Vector3f
|
||||
stereographic_to_direction(float sg_x, float sg_y)
|
||||
{
|
||||
float X = sg_x;
|
||||
float Y = sg_y;
|
||||
|
||||
float denom = (1 + X * X + Y * Y);
|
||||
|
||||
float x = (2 * X) / denom;
|
||||
float y = (2 * Y) / denom;
|
||||
float z = (-1 + X * X + Y * Y) / denom;
|
||||
|
||||
// forward is -z
|
||||
return {x, y, z};
|
||||
// return {x / -z, y / -z};
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
map_ranges(T value, T from_low, T from_high, T to_low, T to_high)
|
||||
{
|
||||
return (value - from_low) * (to_high - to_low) / (from_high - from_low) + to_low;
|
||||
}
|
||||
|
||||
void
|
||||
naive_remap(OutputSizedArray<int16_t> &image_x,
|
||||
OutputSizedArray<int16_t> &image_y,
|
||||
cv::Mat &input,
|
||||
Eigen::Map<OutputSizedArray<uint8_t>> &output)
|
||||
{
|
||||
output = 0;
|
||||
|
||||
for (int y = 0; y < wsize; y++) {
|
||||
for (int x = 0; x < wsize; x++) {
|
||||
if (image_y(y, x) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (image_y(y, x) >= input.rows) {
|
||||
continue;
|
||||
}
|
||||
if (image_x(y, x) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (image_x(y, x) >= input.cols) {
|
||||
continue;
|
||||
}
|
||||
output(y, x) = input.at<uint8_t>(image_y(y, x), image_x(y, x));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
StereographicDistort(projection_state &mi)
|
||||
{
|
||||
XRT_TRACE_MARKER();
|
||||
|
||||
OutputSizedFloatArray sg_x;
|
||||
OutputSizedFloatArray sg_y;
|
||||
|
||||
// Please vectorize me?
|
||||
if (mi.instructions.flip) {
|
||||
for (int x = 0; x < wsize; ++x) {
|
||||
sg_x.col(x).setConstant(map_ranges<float>((float)x, 0.0f, (float)wsize,
|
||||
(float)mi.instructions.stereographic_radius,
|
||||
(float)-mi.instructions.stereographic_radius));
|
||||
}
|
||||
} else {
|
||||
for (int x = 0; x < wsize; ++x) {
|
||||
sg_x.col(x).setConstant(map_ranges<float>((float)x, 0.0f, (float)wsize,
|
||||
(float)-mi.instructions.stereographic_radius,
|
||||
(float)mi.instructions.stereographic_radius));
|
||||
}
|
||||
}
|
||||
// Ditto?
|
||||
for (int y = 0; y < wsize; ++y) {
|
||||
sg_y.row(y).setConstant(map_ranges<float>((float)y, 0.0f, (float)wsize,
|
||||
(float)mi.instructions.stereographic_radius,
|
||||
(float)-mi.instructions.stereographic_radius));
|
||||
}
|
||||
|
||||
|
||||
// STEREOGRAPHIC DIRECTION TO 3D DIRECTION
|
||||
// Note: we do not normalize the direction, because we don't need to. :)
|
||||
|
||||
OutputSizedFloatArray dir_x;
|
||||
OutputSizedFloatArray dir_y;
|
||||
OutputSizedFloatArray dir_z;
|
||||
|
||||
|
||||
#if 0
|
||||
dir_x = sg_x * 2;
|
||||
dir_y = sg_y * 2;
|
||||
#else
|
||||
// Adding something to itself is faster than multiplying itself by 2
|
||||
// and unless you have fast-math the compiler won't do it for you. =/
|
||||
dir_x = sg_x + sg_x;
|
||||
dir_y = sg_y + sg_y;
|
||||
#endif
|
||||
|
||||
dir_z = (sg_x * sg_x) + (sg_y * sg_y) - 1;
|
||||
// END STEREOGRAPHIC DIRECTION TO 3D DIRECTION
|
||||
|
||||
// QUATERNION ROTATING VECTOR
|
||||
Eigen::Matrix<OutputSizedFloatArray, 3, 1> rot_dir;
|
||||
|
||||
OutputSizedFloatArray uv0;
|
||||
OutputSizedFloatArray uv1;
|
||||
OutputSizedFloatArray uv2;
|
||||
|
||||
const Eigen::Quaternionf &q = mi.instructions.rot_quat;
|
||||
|
||||
uv0 = q.y() * dir_z - q.z() * dir_y;
|
||||
uv1 = q.z() * dir_x - q.x() * dir_z;
|
||||
uv2 = q.x() * dir_y - q.y() * dir_x;
|
||||
|
||||
uv0 += uv0;
|
||||
uv1 += uv1;
|
||||
uv2 += uv2;
|
||||
|
||||
rot_dir.x() = dir_x + q.w() * uv0;
|
||||
rot_dir.y() = dir_y + q.w() * uv1;
|
||||
rot_dir.z() = dir_z + q.w() * uv2;
|
||||
|
||||
rot_dir.x() += q.y() * uv2 - q.z() * uv1;
|
||||
rot_dir.y() += q.z() * uv0 - q.x() * uv2;
|
||||
rot_dir.z() += q.x() * uv1 - q.y() * uv0;
|
||||
// END QUATERNION ROTATING VECTOR
|
||||
|
||||
|
||||
|
||||
OutputSizedArray<int16_t> image_x;
|
||||
OutputSizedArray<int16_t> image_y;
|
||||
|
||||
OutputSizedFloatArray image_x_f;
|
||||
OutputSizedFloatArray image_y_f;
|
||||
|
||||
|
||||
//!@todo optimize
|
||||
rot_dir.y() *= -1;
|
||||
rot_dir.z() *= -1;
|
||||
|
||||
{
|
||||
XRT_TRACE_IDENT(camera_projection);
|
||||
|
||||
switch (mi.dist.model) {
|
||||
case T_DISTORTION_FISHEYE_KB4:
|
||||
// This takes 250us vs 500 because of the removed atan2.
|
||||
project_kb4(mi.dist, rot_dir.x(), rot_dir.y(), rot_dir.z(), image_x_f, image_y_f);
|
||||
break;
|
||||
case T_DISTORTION_OPENCV_RADTAN_8:
|
||||
// Regular C is plenty fast for radtan :)
|
||||
for (int i = 0; i < image_x_f.rows(); i++) {
|
||||
for (int j = 0; j < image_x_f.cols(); j++) {
|
||||
t_camera_models_project(&mi.dist, rot_dir.x()(i, j), rot_dir.y()(i, j),
|
||||
rot_dir.z()(i, j), &image_x_f(i, j), &image_y_f(i, j));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default: assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
image_x = image_x_f.cast<int16_t>();
|
||||
image_y = image_y_f.cast<int16_t>();
|
||||
|
||||
naive_remap(image_x, image_y, mi.input, mi.distorted_image_eigen);
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool
|
||||
slow(projection_state &mi, float x, float y, cv::Point2i &out)
|
||||
{
|
||||
float sg_x =
|
||||
map_ranges<float>(x, 0, wsize, -mi.instructions.stereographic_radius, mi.instructions.stereographic_radius);
|
||||
|
||||
float sg_y =
|
||||
map_ranges<float>(y, 0, wsize, mi.instructions.stereographic_radius, -mi.instructions.stereographic_radius);
|
||||
|
||||
Eigen::Vector3f dir = stereographic_to_direction(sg_x, sg_y);
|
||||
|
||||
dir = mi.instructions.rot_quat * dir;
|
||||
|
||||
dir.y() *= -1;
|
||||
dir.z() *= -1;
|
||||
|
||||
float _x = {};
|
||||
float _y = {};
|
||||
|
||||
bool ret = t_camera_models_project(&mi.dist, dir.x(), dir.y(), dir.z(), &_x, &_y);
|
||||
|
||||
out.x = _x;
|
||||
out.y = _y;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
draw_and_clear(cv::Mat img, std::vector<cv::Point> &line_vec, bool good, cv::Scalar color)
|
||||
{
|
||||
if (!good) {
|
||||
color[0] = 255 - color[0];
|
||||
color[1] = 255 - color[1];
|
||||
color[2] = 255 - color[2];
|
||||
}
|
||||
cv::polylines(img, line_vec, false, color);
|
||||
line_vec.clear();
|
||||
}
|
||||
|
||||
void
|
||||
add_or_draw_line(projection_state &mi, //
|
||||
int x, //
|
||||
int y, //
|
||||
std::vector<cv::Point> &line_vec, //
|
||||
cv::Scalar color, //
|
||||
bool &good_most_recent, //
|
||||
bool &started,
|
||||
cv::Mat &img)
|
||||
{
|
||||
cv::Point2i e = {};
|
||||
bool retval = slow(mi, x, y, e);
|
||||
|
||||
if (!started) {
|
||||
started = true;
|
||||
good_most_recent = retval;
|
||||
line_vec.push_back(e);
|
||||
return;
|
||||
}
|
||||
if (retval != good_most_recent) {
|
||||
line_vec.push_back(e);
|
||||
draw_and_clear(img, line_vec, good_most_recent, color);
|
||||
}
|
||||
good_most_recent = retval;
|
||||
line_vec.push_back(e);
|
||||
}
|
||||
|
||||
void
|
||||
draw_boundary(projection_state &mi, cv::Scalar color, cv::Mat img)
|
||||
{
|
||||
std::vector<cv::Point> line_vec = {};
|
||||
bool good_most_recent = true;
|
||||
bool started = false;
|
||||
|
||||
constexpr float step = 16;
|
||||
|
||||
// x = 0, y = 0->128
|
||||
for (int y = 0; y <= wsize; y += step) {
|
||||
int x = 0;
|
||||
add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img);
|
||||
}
|
||||
|
||||
// x = 0->128, y = 128
|
||||
for (int x = step; x <= wsize; x += step) {
|
||||
int y = wsize;
|
||||
add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img);
|
||||
}
|
||||
|
||||
// x = 128, y = 128->0
|
||||
for (int y = wsize - step; y >= 0; y -= step) {
|
||||
int x = wsize;
|
||||
add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img);
|
||||
}
|
||||
|
||||
// x = 128->0, y = 0
|
||||
for (int x = wsize - step; x >= 0; x -= step) {
|
||||
int y = 0;
|
||||
add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img);
|
||||
}
|
||||
|
||||
draw_and_clear(img, line_vec, good_most_recent, color);
|
||||
}
|
||||
|
||||
void
|
||||
project_21_points_unscaled(Eigen::Array<float, 3, 21> &joints_local, Eigen::Quaternionf rot_quat, hand21_2d &out_joints)
|
||||
{
|
||||
for (int i = 0; i < 21; i++) {
|
||||
Eigen::Vector3f direction = joints_local.col(i); //{d.x, d.y, d.z};
|
||||
direction.normalize();
|
||||
|
||||
direction = rot_quat.conjugate() * direction;
|
||||
float denom = 1 - direction.z();
|
||||
float sg_x = direction.x() / denom;
|
||||
float sg_y = direction.y() / denom;
|
||||
// sg_x *= mi.stereographic_radius;
|
||||
// sg_y *= mi.stereographic_radius;
|
||||
|
||||
out_joints[i].pos_2d.x = sg_x;
|
||||
out_joints[i].pos_2d.y = sg_y;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename V2>
|
||||
void
|
||||
project_point_scaled(projection_state &mi, Eigen::Vector3f direction, V2 &out_img_pt)
|
||||
{
|
||||
direction = mi.instructions.rot_quat.conjugate() * direction;
|
||||
float denom = 1 - direction.z();
|
||||
float sg_x = direction.x() / denom;
|
||||
float sg_y = direction.y() / denom;
|
||||
|
||||
out_img_pt.pos_2d.x = map_ranges<float>(sg_x, -mi.instructions.stereographic_radius,
|
||||
mi.instructions.stereographic_radius, 0, wsize);
|
||||
out_img_pt.pos_2d.y = map_ranges<float>(sg_y, mi.instructions.stereographic_radius,
|
||||
-mi.instructions.stereographic_radius, 0, wsize);
|
||||
}
|
||||
|
||||
void
|
||||
project_21_points_scaled(projection_state &mi, Eigen::Array<float, 3, 21> &joints_local, hand21_2d &out_joints_in_img)
|
||||
{
|
||||
for (int i = 0; i < 21; i++) {
|
||||
project_point_scaled(mi, Eigen::Vector3f(joints_local.col(i)), out_joints_in_img[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Eigen::Quaternionf
|
||||
direction(Eigen::Vector3f dir, float twist)
|
||||
{
|
||||
Eigen::Quaternionf one = Eigen::Quaternionf().setFromTwoVectors(-Eigen::Vector3f::UnitZ(), dir);
|
||||
|
||||
Eigen::Quaternionf two;
|
||||
two = Eigen::AngleAxisf(twist, -Eigen::Vector3f::UnitZ());
|
||||
return one * two;
|
||||
}
|
||||
|
||||
void
|
||||
add_rel_depth(const Eigen::Array<float, 3, 21> &joints, hand21_2d &out_joints_in_img)
|
||||
{
|
||||
float hand_size = (Eigen::Vector3f(joints.col(0)) - Eigen::Vector3f(joints.col(9))).norm();
|
||||
|
||||
float midpxm_depth = Eigen::Vector3f(joints.col(9)).norm();
|
||||
for (int i = 0; i < 21; i++) {
|
||||
float jd = Eigen::Vector3f(joints.col(i)).norm();
|
||||
out_joints_in_img[i].depth_relative_to_midpxm = (jd - midpxm_depth) / hand_size;
|
||||
}
|
||||
}
|
||||
|
||||
static float
|
||||
palm_length(hand21_2d &joints)
|
||||
{
|
||||
vec2_5 wrist = joints[0];
|
||||
vec2_5 middle_proximal = joints[9];
|
||||
|
||||
vec2_5 index_proximal = joints[5];
|
||||
vec2_5 ring_proximal = joints[17];
|
||||
|
||||
float fwd = m_vec2_len(wrist.pos_2d - middle_proximal.pos_2d);
|
||||
float side = m_vec2_len(index_proximal.pos_2d - ring_proximal.pos_2d);
|
||||
|
||||
float length = fmaxf(fwd, side);
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
make_projection_instructions(t_camera_model_params &dist,
|
||||
bool flip_after,
|
||||
float expand_val,
|
||||
float twist,
|
||||
Eigen::Array<float, 3, 21> &joints,
|
||||
projection_instructions &out_instructions,
|
||||
hand21_2d &out_hand)
|
||||
{
|
||||
|
||||
out_instructions.flip = flip_after;
|
||||
|
||||
Eigen::Vector3f dir = Eigen::Vector3f(joints.col(9)).normalized();
|
||||
|
||||
out_instructions.rot_quat = direction(dir, twist);
|
||||
|
||||
Eigen::Vector3f old_direction = dir;
|
||||
|
||||
|
||||
// Tested on Dec 7: This converges in 4 iterations max, usually 2.
|
||||
for (int i = 0; i < 8; i++) {
|
||||
project_21_points_unscaled(joints, out_instructions.rot_quat, out_hand);
|
||||
|
||||
xrt_vec2 min = out_hand[0].pos_2d;
|
||||
xrt_vec2 max = out_hand[0].pos_2d;
|
||||
|
||||
for (int i = 0; i < 21; i++) {
|
||||
xrt_vec2 pt = out_hand[i].pos_2d;
|
||||
min.x = fmin(pt.x, min.x);
|
||||
min.y = fmin(pt.y, min.y);
|
||||
|
||||
max.x = fmax(pt.x, max.x);
|
||||
max.y = fmax(pt.y, max.y);
|
||||
}
|
||||
|
||||
|
||||
xrt_vec2 center = m_vec2_mul_scalar(min + max, 0.5);
|
||||
|
||||
float r = fmax(center.x - min.x, center.y - min.y);
|
||||
out_instructions.stereographic_radius = r;
|
||||
|
||||
Eigen::Vector3f new_direction = stereographic_to_direction(center.x, center.y);
|
||||
|
||||
new_direction = out_instructions.rot_quat * new_direction;
|
||||
|
||||
out_instructions.rot_quat = direction(new_direction, twist);
|
||||
|
||||
|
||||
if ((old_direction - dir).norm() < 0.0001) {
|
||||
// We converged
|
||||
break;
|
||||
}
|
||||
old_direction = dir;
|
||||
}
|
||||
|
||||
// This can basically be removed (we will have converged very well in the above), but for correctness's
|
||||
// sake, let's project one last time.
|
||||
project_21_points_unscaled(joints, out_instructions.rot_quat, out_hand);
|
||||
|
||||
// These are to ensure that the bounding box doesn't get too small around a closed fist.
|
||||
float palm_l = palm_length(out_hand);
|
||||
float radius_around_palm = palm_l * 0.5 * (2.2 / 1.65) * expand_val;
|
||||
|
||||
out_instructions.stereographic_radius *= expand_val;
|
||||
|
||||
out_instructions.stereographic_radius = fmaxf(out_instructions.stereographic_radius, radius_around_palm);
|
||||
|
||||
// This is going straight into the (-1, 1)-normalized space
|
||||
for (int i = 0; i < 21; i++) {
|
||||
vec2_5 &v = out_hand[i];
|
||||
v.pos_2d.x = map_ranges<float>(v.pos_2d.x, -out_instructions.stereographic_radius,
|
||||
out_instructions.stereographic_radius, -1, 1);
|
||||
//!@todo optimize
|
||||
if (flip_after) {
|
||||
v.pos_2d.x *= -1;
|
||||
}
|
||||
//!@todo this is probably wrong, should probably be negated
|
||||
v.pos_2d.y = map_ranges<float>(v.pos_2d.y, out_instructions.stereographic_radius,
|
||||
-out_instructions.stereographic_radius, -1, 1);
|
||||
}
|
||||
add_rel_depth(joints, out_hand);
|
||||
}
|
||||
|
||||
void
|
||||
make_projection_instructions_angular(xrt_vec3 direction_3d,
|
||||
bool flip_after,
|
||||
float angular_radius,
|
||||
float expand_val,
|
||||
float twist,
|
||||
projection_instructions &out_instructions)
|
||||
{
|
||||
|
||||
out_instructions.flip = flip_after;
|
||||
|
||||
xrt_vec3 imaginary_direction = {0, sinf(angular_radius), -cosf(angular_radius)};
|
||||
|
||||
out_instructions.stereographic_radius = imaginary_direction.y / (1 - imaginary_direction.z);
|
||||
|
||||
math_vec3_normalize(&direction_3d);
|
||||
|
||||
Eigen::Vector3f dir = xrt::auxiliary::math::map_vec3(direction_3d);
|
||||
|
||||
|
||||
out_instructions.rot_quat = direction(dir, twist);
|
||||
|
||||
|
||||
out_instructions.stereographic_radius *= expand_val;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
stereographic_project_image(const t_camera_model_params &dist,
|
||||
const projection_instructions &instructions,
|
||||
cv::Mat &input_image,
|
||||
cv::Mat *debug_image,
|
||||
const cv::Scalar boundary_color,
|
||||
cv::Mat &out)
|
||||
|
||||
{
|
||||
out = cv::Mat(cv::Size(wsize, wsize), CV_8U);
|
||||
projection_state mi(instructions, input_image, out);
|
||||
mi.dist = dist;
|
||||
// Why am I doing it like this????
|
||||
// mi.stereographic_radius = instructions.stereographic_radius;
|
||||
// mi.rot_quat = instructions.rot_quat;
|
||||
// mi.flip = instructions.flip;
|
||||
|
||||
StereographicDistort(mi);
|
||||
|
||||
if (debug_image) {
|
||||
draw_boundary(mi, boundary_color, *debug_image);
|
||||
}
|
||||
}
|
||||
} // namespace xrt::tracking::hand::mercury
|
|
@ -72,38 +72,18 @@ hsv2rgb(float fH, float fS, float fV)
|
|||
return {fR * 255.0f, fG * 255.0f, fB * 255.0f};
|
||||
}
|
||||
|
||||
|
||||
//! @todo Make it take an array of vec2's and give out an array of vec2's, then
|
||||
// put it in its own target so we don't have to link to OpenCV.
|
||||
// Oooorrrrr... actually add good undistortion stuff to Monado so that we don't have to depend on OpenCV at all.
|
||||
|
||||
XRT_MAYBE_UNUSED static struct xrt_vec2
|
||||
raycoord(ht_view *htv, struct xrt_vec2 model_out)
|
||||
{
|
||||
model_out.x *= htv->hgt->multiply_px_coord_for_undistort;
|
||||
model_out.y *= htv->hgt->multiply_px_coord_for_undistort;
|
||||
cv::Mat in_px_coords(1, 1, CV_32FC2);
|
||||
float *write_in;
|
||||
write_in = in_px_coords.ptr<float>(0);
|
||||
write_in[0] = model_out.x;
|
||||
write_in[1] = model_out.y;
|
||||
cv::Mat out_ray(1, 1, CV_32FC2);
|
||||
|
||||
if (htv->hgt->use_fisheye) {
|
||||
cv::fisheye::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
|
||||
} else {
|
||||
cv::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
|
||||
}
|
||||
|
||||
float n_x = out_ray.at<float>(0, 0);
|
||||
float n_y = out_ray.at<float>(0, 1);
|
||||
|
||||
return {n_x, n_y};
|
||||
}
|
||||
|
||||
static void
|
||||
inline void
|
||||
handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type)
|
||||
{
|
||||
cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type);
|
||||
}
|
||||
|
||||
inline void
|
||||
handSquare(cv::Mat &debug_frame, xrt_vec2 center, float radius, cv::Scalar color)
|
||||
{
|
||||
cv::Point2i pt((int)center.x, (int)center.y);
|
||||
cv::rectangle(debug_frame, cv::Rect(pt - cv::Point2i(radius / 2, radius / 2), cv::Size(radius, radius)), color,
|
||||
1);
|
||||
}
|
||||
|
||||
} // namespace xrt::tracking::hand::mercury
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
// https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/fns_candy_style_transfer/fns_candy_style_transfer.c
|
||||
#include "hg_sync.hpp"
|
||||
#include "hg_image_math.inl"
|
||||
#include "hg_numerics_checker.hpp"
|
||||
|
||||
|
||||
#include <filesystem>
|
||||
|
@ -29,7 +30,6 @@ namespace xrt::tracking::hand::mercury {
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
static cv::Matx23f
|
||||
blackbar(const cv::Mat &in, enum t_camera_orientation rot, cv::Mat &out, xrt_size out_size)
|
||||
{
|
||||
|
@ -134,9 +134,71 @@ argmax(const float *data, int size)
|
|||
return out_idx;
|
||||
}
|
||||
|
||||
static void
|
||||
refine_center_of_distribution(
|
||||
const float *data, int coarse_x, int coarse_y, int w, int h, float *out_refined_x, float *out_refined_y)
|
||||
static bool
|
||||
hand_depth_center_of_mass(struct HandTracking *hgt, float data[22], float *out_depth, float *out_confidence)
|
||||
{
|
||||
float avg_location_px_coord = 0;
|
||||
float sum = 0;
|
||||
|
||||
for (int i = 0; i < 22; i++) {
|
||||
data[i] = fmin(1.0, fmax(data[i], 0.0));
|
||||
sum += data[i];
|
||||
avg_location_px_coord += data[i] * (float)i;
|
||||
}
|
||||
|
||||
if (sum < 1e-5) {
|
||||
HG_DEBUG(hgt, "All depth outputs were zero!");
|
||||
return false;
|
||||
}
|
||||
|
||||
avg_location_px_coord /= sum;
|
||||
|
||||
// std::cout << avg_location_px_coord << std::endl;
|
||||
|
||||
// bounds check
|
||||
if (avg_location_px_coord < 0 || avg_location_px_coord > 21) {
|
||||
HG_DEBUG(hgt, "Very bad! avg_location_px_coord was %f", avg_location_px_coord);
|
||||
for (int i = 0; i < 22; i++) {
|
||||
HG_DEBUG(hgt, "%f", data[i]);
|
||||
}
|
||||
|
||||
avg_location_px_coord = fmin(21.0, fmax(0.0, avg_location_px_coord));
|
||||
return false;
|
||||
}
|
||||
|
||||
// nan check
|
||||
if (avg_location_px_coord != avg_location_px_coord) {
|
||||
HG_DEBUG(hgt, "Very bad! avg_location_px_coord was not a number: %f", avg_location_px_coord);
|
||||
for (int i = 0; i < 22; i++) {
|
||||
HG_DEBUG(hgt, "%f", data[i]);
|
||||
}
|
||||
*out_depth = 0;
|
||||
*out_confidence = 0;
|
||||
return false;
|
||||
}
|
||||
// printf("%f %d\n", avg_location_px_coord, (int)avg_location_px_coord);
|
||||
*out_confidence = data[(int)avg_location_px_coord];
|
||||
|
||||
|
||||
|
||||
float depth_value = avg_location_px_coord + 0.5;
|
||||
depth_value /= 22;
|
||||
depth_value -= 0.5;
|
||||
depth_value *= 2 * 1.5;
|
||||
|
||||
*out_depth = depth_value;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
refine_center_of_distribution(struct HandTracking *hgt, //
|
||||
const float *data, //
|
||||
int coarse_x, //
|
||||
int coarse_y, //
|
||||
int w, //
|
||||
int h, //
|
||||
float *out_refined_x, //
|
||||
float *out_refined_y)
|
||||
{
|
||||
// Be VERY suspicious of this function, it's probably not centering correctly.
|
||||
float sum_of_values = 0;
|
||||
|
@ -170,18 +232,16 @@ refine_center_of_distribution(
|
|||
// Edge case, will fix soon
|
||||
*out_refined_x = coarse_x;
|
||||
*out_refined_y = coarse_y;
|
||||
U_LOG_E("Failed! %d %d %d %d %d", coarse_x, coarse_y, w, h, max_kern_width);
|
||||
return;
|
||||
HG_DEBUG(hgt, "Failed! %d %d %d %d %d", coarse_x, coarse_y, w, h, max_kern_width);
|
||||
return false;
|
||||
} else {
|
||||
*out_refined_x = sum_of_values_times_locations_x / sum_of_values;
|
||||
*out_refined_y = sum_of_values_times_locations_y / sum_of_values;
|
||||
return true;
|
||||
}
|
||||
|
||||
*out_refined_x = sum_of_values_times_locations_x / sum_of_values;
|
||||
*out_refined_y = sum_of_values_times_locations_y / sum_of_values;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
static bool
|
||||
normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out)
|
||||
{
|
||||
data_in.convertTo(data_out, CV_32FC1, 1 / 255.0);
|
||||
|
@ -192,7 +252,7 @@ normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out)
|
|||
|
||||
if (stddev.at<double>(0, 0) == 0) {
|
||||
U_LOG_W("Got image with zero standard deviation!");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
data_out *= 0.25 / stddev.at<double>(0, 0);
|
||||
|
@ -201,6 +261,7 @@ normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out)
|
|||
//! @todo optimize
|
||||
cv::meanStdDev(data_out, mean, stddev);
|
||||
data_out += (0.5 - mean.at<double>(0, 0));
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -228,18 +289,19 @@ setup_model_image_input(HandTracking *hgt, onnx_wrap *wrap, const char *name, in
|
|||
{
|
||||
model_input_wrap inputimg = {};
|
||||
inputimg.name = name;
|
||||
inputimg.dimensions.push_back(1);
|
||||
inputimg.dimensions.push_back(1);
|
||||
inputimg.dimensions.push_back(h);
|
||||
inputimg.dimensions.push_back(w);
|
||||
inputimg.dimensions[0] = 1;
|
||||
inputimg.dimensions[1] = 1;
|
||||
inputimg.dimensions[2] = h;
|
||||
inputimg.dimensions[3] = w;
|
||||
inputimg.num_dimensions = 4;
|
||||
size_t data_size = w * h * sizeof(float);
|
||||
inputimg.data = (float *)malloc(data_size);
|
||||
|
||||
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
|
||||
inputimg.data, //
|
||||
data_size, //
|
||||
inputimg.dimensions.data(), //
|
||||
inputimg.dimensions.size(), //
|
||||
inputimg.dimensions, //
|
||||
inputimg.num_dimensions, //
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
|
||||
&inputimg.tensor));
|
||||
|
||||
|
@ -320,12 +382,12 @@ run_hand_detection(void *ptr)
|
|||
|
||||
|
||||
for (int hand_idx = 0; hand_idx < 2; hand_idx++) {
|
||||
hand_bounding_box *output = info->outputs[hand_idx];
|
||||
hand_region_of_interest &output = info->outputs[hand_idx];
|
||||
|
||||
output->found = hand_exists[hand_idx] > 0.3;
|
||||
output.found = hand_exists[hand_idx] > 0.3;
|
||||
|
||||
if (output->found) {
|
||||
output->confidence = hand_exists[hand_idx];
|
||||
if (output.found) {
|
||||
output.hand_detection_confidence = hand_exists[hand_idx];
|
||||
|
||||
xrt_vec2 _pt = {};
|
||||
_pt.x = math_map_ranges(cx[hand_idx], -1, 1, 0, kDetectionInputSize);
|
||||
|
@ -344,14 +406,11 @@ run_hand_detection(void *ptr)
|
|||
|
||||
_pt = transformVecBy2x3(_pt, go_back);
|
||||
|
||||
output->center = _pt;
|
||||
output->size_px = size;
|
||||
output.center_px = _pt;
|
||||
output.size_px = size;
|
||||
|
||||
if (hgt->debug_scribble) {
|
||||
cv::Point2i pt((int)output->center.x, (int)output->center.y);
|
||||
cv::rectangle(debug_frame,
|
||||
cv::Rect(pt - cv::Point2i(size / 2, size / 2), cv::Size(size, size)),
|
||||
PINK, 1);
|
||||
handSquare(debug_frame, output.center_px, output.size_px, PINK);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -377,81 +436,151 @@ init_keypoint_estimation(HandTracking *hgt, onnx_wrap *wrap)
|
|||
|
||||
std::filesystem::path path = hgt->models_folder;
|
||||
|
||||
path /= "grayscale_keypoint_new.onnx";
|
||||
path /= "grayscale_keypoint_jan18.onnx";
|
||||
|
||||
wrap->wraps.clear();
|
||||
|
||||
setup_ort_api(hgt, wrap, path);
|
||||
|
||||
setup_model_image_input(hgt, wrap, "inputImg", kKeypointInputSize, kKeypointInputSize);
|
||||
}
|
||||
wrap->api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
|
||||
|
||||
void
|
||||
calc_src_tri(cv::Point2f center,
|
||||
cv::Point2f go_right,
|
||||
cv::Point2f go_down,
|
||||
enum t_camera_orientation rot,
|
||||
cv::Point2f out_src_tri[3])
|
||||
{
|
||||
cv::Point2f top_left = {center - go_down - go_right};
|
||||
cv::Point2f bottom_left = {center + go_down - go_right};
|
||||
cv::Point2f bottom_right = {center + go_down + go_right};
|
||||
cv::Point2f top_right = {center - go_down + go_right};
|
||||
|
||||
switch (rot) {
|
||||
case CAMERA_ORIENTATION_0: {
|
||||
OrtSessionOptions *opts = nullptr;
|
||||
ORT(CreateSessionOptions(&opts));
|
||||
|
||||
// top left
|
||||
out_src_tri[0] = top_left; // {center - go_down - go_right};
|
||||
// TODO review options, config for threads?
|
||||
ORT(SetSessionGraphOptimizationLevel(opts, ORT_ENABLE_ALL));
|
||||
ORT(SetIntraOpNumThreads(opts, 1));
|
||||
|
||||
// bottom left
|
||||
out_src_tri[1] = bottom_left; //{center + go_down - go_right};
|
||||
|
||||
// top right
|
||||
out_src_tri[2] = top_right; //{center - go_down + go_right};
|
||||
} break;
|
||||
case CAMERA_ORIENTATION_90: {
|
||||
// Need to rotate the view back by -90°
|
||||
// top left (becomes top right)
|
||||
out_src_tri[0] = top_right;
|
||||
ORT(CreateEnv(ORT_LOGGING_LEVEL_FATAL, "monado_ht", &wrap->env));
|
||||
|
||||
// bottom left (becomes top left)
|
||||
out_src_tri[1] = top_left;
|
||||
ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &wrap->meminfo));
|
||||
|
||||
// top right (becomes bottom right)
|
||||
out_src_tri[2] = bottom_right;
|
||||
} break;
|
||||
// HG_DEBUG(this->device, "Loading hand detection model from file '%s'", path.c_str());
|
||||
ORT(CreateSession(wrap->env, path.c_str(), opts, &wrap->session));
|
||||
assert(wrap->session != NULL);
|
||||
|
||||
case CAMERA_ORIENTATION_180: {
|
||||
// top left (becomes bottom right)
|
||||
out_src_tri[0] = bottom_right;
|
||||
// size_t input_size = wrap->input_shape[0] * wrap->input_shape[1] * wrap->input_shape[2] *
|
||||
// wrap->input_shape[3];
|
||||
|
||||
// bottom left (becomes top right)
|
||||
out_src_tri[1] = top_right;
|
||||
// wrap->data = (float *)malloc(input_size * sizeof(float));
|
||||
{
|
||||
model_input_wrap inputimg = {};
|
||||
inputimg.name = "inputImg";
|
||||
inputimg.dimensions[0] = 1;
|
||||
inputimg.dimensions[1] = 1;
|
||||
inputimg.dimensions[2] = 128;
|
||||
inputimg.dimensions[3] = 128;
|
||||
inputimg.num_dimensions = 4;
|
||||
|
||||
// top right (becomes bottom left)
|
||||
out_src_tri[2] = bottom_left;
|
||||
} break;
|
||||
case CAMERA_ORIENTATION_270: {
|
||||
// Need to rotate the view clockwise 90°
|
||||
// top left (becomes bottom left)
|
||||
out_src_tri[0] = bottom_left; //{center + go_down - go_right};
|
||||
inputimg.data = (float *)malloc(128 * 128 * sizeof(float)); // SORRY IM BUSY
|
||||
|
||||
// bottom left (becomes bottom right)
|
||||
out_src_tri[1] = bottom_right; //{center + go_down + go_right};
|
||||
|
||||
// top right (becomes top left)
|
||||
out_src_tri[2] = top_left; //{center - go_down - go_right};
|
||||
} break;
|
||||
default: assert(false);
|
||||
|
||||
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
|
||||
inputimg.data, //
|
||||
128 * 128 * sizeof(float), //
|
||||
inputimg.dimensions, //
|
||||
inputimg.num_dimensions, //
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
|
||||
&inputimg.tensor));
|
||||
|
||||
assert(inputimg.tensor);
|
||||
int is_tensor;
|
||||
ORT(IsTensor(inputimg.tensor, &is_tensor));
|
||||
assert(is_tensor);
|
||||
|
||||
wrap->wraps.push_back(inputimg);
|
||||
}
|
||||
|
||||
{
|
||||
model_input_wrap inputimg = {};
|
||||
inputimg.name = "lastKeypoints";
|
||||
inputimg.dimensions[0] = 1;
|
||||
inputimg.dimensions[1] = 42;
|
||||
inputimg.num_dimensions = 2;
|
||||
|
||||
inputimg.data = (float *)malloc(1 * 42 * sizeof(float)); // SORRY IM BUSY
|
||||
|
||||
|
||||
|
||||
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
|
||||
inputimg.data, //
|
||||
1 * 42 * sizeof(float), //
|
||||
inputimg.dimensions, //
|
||||
inputimg.num_dimensions, //
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
|
||||
&inputimg.tensor));
|
||||
|
||||
assert(inputimg.tensor);
|
||||
int is_tensor;
|
||||
ORT(IsTensor(inputimg.tensor, &is_tensor));
|
||||
assert(is_tensor);
|
||||
wrap->wraps.push_back(inputimg);
|
||||
}
|
||||
|
||||
{
|
||||
model_input_wrap inputimg = {};
|
||||
inputimg.name = "useLastKeypoints";
|
||||
inputimg.dimensions[0] = 1;
|
||||
inputimg.num_dimensions = 1;
|
||||
|
||||
inputimg.data = (float *)malloc(1 * sizeof(float)); // SORRY IM BUSY
|
||||
|
||||
|
||||
|
||||
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
|
||||
inputimg.data, //
|
||||
1 * sizeof(float), //
|
||||
inputimg.dimensions, //
|
||||
inputimg.num_dimensions, //
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
|
||||
&inputimg.tensor));
|
||||
|
||||
assert(inputimg.tensor);
|
||||
int is_tensor;
|
||||
ORT(IsTensor(inputimg.tensor, &is_tensor));
|
||||
assert(is_tensor);
|
||||
wrap->wraps.push_back(inputimg);
|
||||
}
|
||||
|
||||
|
||||
wrap->api->ReleaseSessionOptions(opts);
|
||||
}
|
||||
|
||||
enum xrt_hand_joint joints_ml_to_xr[21]{
|
||||
XRT_HAND_JOINT_WRIST, //
|
||||
XRT_HAND_JOINT_THUMB_METACARPAL, //
|
||||
XRT_HAND_JOINT_THUMB_PROXIMAL, //
|
||||
XRT_HAND_JOINT_THUMB_DISTAL, //
|
||||
XRT_HAND_JOINT_THUMB_TIP, //
|
||||
//
|
||||
XRT_HAND_JOINT_INDEX_PROXIMAL, //
|
||||
XRT_HAND_JOINT_INDEX_INTERMEDIATE, //
|
||||
XRT_HAND_JOINT_INDEX_DISTAL, //
|
||||
XRT_HAND_JOINT_INDEX_TIP, //
|
||||
//
|
||||
XRT_HAND_JOINT_MIDDLE_PROXIMAL, //
|
||||
XRT_HAND_JOINT_MIDDLE_INTERMEDIATE, //
|
||||
XRT_HAND_JOINT_MIDDLE_DISTAL, //
|
||||
XRT_HAND_JOINT_MIDDLE_TIP, //
|
||||
//
|
||||
XRT_HAND_JOINT_RING_PROXIMAL, //
|
||||
XRT_HAND_JOINT_RING_INTERMEDIATE, //
|
||||
XRT_HAND_JOINT_RING_DISTAL, //
|
||||
XRT_HAND_JOINT_RING_TIP, //
|
||||
//
|
||||
XRT_HAND_JOINT_LITTLE_PROXIMAL, //
|
||||
XRT_HAND_JOINT_LITTLE_INTERMEDIATE, //
|
||||
XRT_HAND_JOINT_LITTLE_DISTAL, //
|
||||
XRT_HAND_JOINT_LITTLE_TIP, //
|
||||
};
|
||||
|
||||
void
|
||||
make_keypoint_heatmap_output(int camera_idx, int hand_idx, int grid_pt_x, int grid_pt_y, float *plane, cv::Mat &out)
|
||||
{
|
||||
int root_x = kVisSpacerSize + ((1 + 2 * hand_idx) * (kKeypointInputSize + kVisSpacerSize));
|
||||
int root_y = kVisSpacerSize + (camera_idx * (kKeypointInputSize + kVisSpacerSize));
|
||||
int root_x = 8 + ((1 + 2 * hand_idx) * (128 + 8));
|
||||
int root_y = 8 + ((2 * camera_idx) * (128 + 8));
|
||||
|
||||
int org_x = (root_x) + (grid_pt_x * 25);
|
||||
int org_y = (root_y) + (grid_pt_y * 25);
|
||||
|
@ -464,168 +593,383 @@ make_keypoint_heatmap_output(int camera_idx, int hand_idx, int grid_pt_x, int gr
|
|||
start.copyTo(out(p));
|
||||
}
|
||||
|
||||
void
|
||||
make_keypoint_depth_heatmap_output(int camera_idx, //
|
||||
int hand_idx, //
|
||||
int grid_pt_x, //
|
||||
int grid_pt_y,
|
||||
float *plane, //
|
||||
cv::Mat &out)
|
||||
{
|
||||
int root_x = 8 + ((1 + 2 * hand_idx) * (128 + 8));
|
||||
int root_y = 8 + ((1 + 2 * camera_idx) * (128 + 8));
|
||||
|
||||
int org_x = (root_x) + (grid_pt_x * 25);
|
||||
int org_y = (root_y) + (grid_pt_y * 25);
|
||||
|
||||
|
||||
cv::Rect p = cv::Rect(org_x, org_y, 22, 22);
|
||||
|
||||
|
||||
cv::Mat start(cv::Size(22, 22), CV_32FC1);
|
||||
float *ptr = (float *)start.data; // Cope cope cope cope cope
|
||||
|
||||
|
||||
for (int i = 0; i < 22; i++) {
|
||||
for (int j = 0; j < 22; j++) {
|
||||
ptr[(i * 22) + j] = plane[i];
|
||||
}
|
||||
}
|
||||
|
||||
start *= 255.0;
|
||||
|
||||
start.copyTo(out(p));
|
||||
}
|
||||
|
||||
void
|
||||
set_predicted_zero(float *data)
|
||||
{
|
||||
for (int i = 0; i < 42; i++) {
|
||||
data[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
run_keypoint_estimation(void *ptr)
|
||||
{
|
||||
XRT_TRACE_MARKER();
|
||||
keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr;
|
||||
keypoint_estimation_run_info info = *(keypoint_estimation_run_info *)ptr;
|
||||
|
||||
onnx_wrap *wrap = &info->view->keypoint[info->hand_idx];
|
||||
struct HandTracking *hgt = info->view->hgt;
|
||||
onnx_wrap *wrap = &info.view->keypoint[info.hand_idx];
|
||||
struct HandTracking *hgt = info.view->hgt;
|
||||
|
||||
int view_idx = info.view->view;
|
||||
int hand_idx = info.hand_idx;
|
||||
one_frame_one_view &this_output = hgt->keypoint_outputs[hand_idx].views[view_idx];
|
||||
MLOutput2D &px_coord = this_output.keypoints_in_scaled_stereographic;
|
||||
// Factor out starting here
|
||||
|
||||
cv::Mat &debug = info->view->debug_out_to_this;
|
||||
hand_region_of_interest &output = info.view->regions_of_interest_this_frame[hand_idx];
|
||||
|
||||
hand_bounding_box *output = &info->view->bboxes_this_frame[info->hand_idx];
|
||||
cv::Mat data_128x128_uint8;
|
||||
|
||||
cv::Point2f src_tri[3];
|
||||
cv::Point2f dst_tri[3];
|
||||
// top-left
|
||||
cv::Point2f center = {output->center.x, output->center.y};
|
||||
projection_instructions instr(info.view->hgdist);
|
||||
instr.rot_quat = Eigen::Quaternionf::Identity();
|
||||
instr.stereographic_radius = 0.4;
|
||||
|
||||
cv::Point2f go_right = {output->size_px / 2, 0};
|
||||
cv::Point2f go_down = {0, output->size_px / 2};
|
||||
|
||||
calc_src_tri(center, go_right, go_down, info->view->camera_info.camera_orientation, src_tri);
|
||||
|
||||
/* For the right hand, flip the result horizontally since
|
||||
* the model is trained on left hands.
|
||||
* Top left, bottom left, top right */
|
||||
if (info->hand_idx == 1) {
|
||||
dst_tri[0] = {kKeypointInputSize, 0};
|
||||
dst_tri[1] = {kKeypointInputSize, kKeypointInputSize};
|
||||
dst_tri[2] = {0, 0};
|
||||
t_camera_model_params dist = info.view->hgdist;
|
||||
|
||||
|
||||
float twist = 0;
|
||||
|
||||
if (output.provenance == ROIProvenance::HAND_DETECTION) {
|
||||
|
||||
xrt_vec3 center;
|
||||
|
||||
xrt_vec3 edges[4];
|
||||
|
||||
|
||||
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, output.center_px.x, output.center_px.y,
|
||||
¢er.x, ¢er.y, ¢er.z);
|
||||
|
||||
xrt_vec2 r = {output.size_px / 2, 0};
|
||||
xrt_vec2 d = {0, output.size_px / 2};
|
||||
xrt_vec2 v;
|
||||
|
||||
// note! we do not need to rotate this! it's *already* in camera space.
|
||||
int acc_idx = 0;
|
||||
v = output.center_px + r + d;
|
||||
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x,
|
||||
&edges[acc_idx].y, &edges[acc_idx].z);
|
||||
acc_idx++;
|
||||
|
||||
v = output.center_px - r + d;
|
||||
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x,
|
||||
&edges[acc_idx].y, &edges[acc_idx].z);
|
||||
acc_idx++;
|
||||
|
||||
v = output.center_px - r - d;
|
||||
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x,
|
||||
&edges[acc_idx].y, &edges[acc_idx].z);
|
||||
acc_idx++;
|
||||
|
||||
v = output.center_px + r - d;
|
||||
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x,
|
||||
&edges[acc_idx].y, &edges[acc_idx].z);
|
||||
|
||||
|
||||
float angle = 0;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
angle = fmaxf(angle, m_vec3_angle(center, edges[i]));
|
||||
}
|
||||
|
||||
|
||||
make_projection_instructions_angular(center, hand_idx, angle,
|
||||
hgt->tuneable_values.after_detection_fac.val, twist, instr);
|
||||
|
||||
wrap->wraps[2].data[0] = 0.0f;
|
||||
set_predicted_zero(wrap->wraps[1].data);
|
||||
} else {
|
||||
dst_tri[0] = {0, 0};
|
||||
dst_tri[1] = {0, kKeypointInputSize};
|
||||
dst_tri[2] = {kKeypointInputSize, 0};
|
||||
Eigen::Array<float, 3, 21> keypoints_in_camera;
|
||||
|
||||
|
||||
if (view_idx == 0) {
|
||||
keypoints_in_camera = hgt->pose_predicted_keypoints[hand_idx];
|
||||
} else {
|
||||
for (int i = 0; i < 21; i++) {
|
||||
|
||||
Eigen::Quaternionf ori = map_quat(hgt->left_in_right.orientation);
|
||||
Eigen::Vector3f tmp = hgt->pose_predicted_keypoints[hand_idx].col(i);
|
||||
|
||||
tmp = ori * tmp;
|
||||
tmp += map_vec3(hgt->left_in_right.position);
|
||||
keypoints_in_camera.col(i) = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
hand21_2d bleh;
|
||||
|
||||
make_projection_instructions(dist, hand_idx, hgt->tuneable_values.dyn_radii_fac.val, twist,
|
||||
keypoints_in_camera, instr, bleh);
|
||||
|
||||
if (hgt->tuneable_values.enable_pose_predicted_input) {
|
||||
for (int ml_joint_idx = 0; ml_joint_idx < 21; ml_joint_idx++) {
|
||||
float *data = wrap->wraps[1].data;
|
||||
data[(ml_joint_idx * 2) + 0] = bleh[ml_joint_idx].pos_2d.x;
|
||||
data[(ml_joint_idx * 2) + 1] = bleh[ml_joint_idx].pos_2d.y;
|
||||
// data[(ml_joint_idx * 2) + 2] = bleh[ml_joint_idx].depth_relative_to_midpxm;
|
||||
}
|
||||
|
||||
|
||||
wrap->wraps[2].data[0] = 1.0f;
|
||||
} else {
|
||||
wrap->wraps[2].data[0] = 0.0f;
|
||||
set_predicted_zero(wrap->wraps[1].data);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Matx23f go_there = getAffineTransform(src_tri, dst_tri);
|
||||
cv::Matx23f go_back = getAffineTransform(dst_tri, src_tri); // NOLINT
|
||||
stereographic_project_image(dist, instr, hgt->views[view_idx].run_model_on_this,
|
||||
&hgt->views[view_idx].debug_out_to_this, info.hand_idx ? RED : YELLOW,
|
||||
data_128x128_uint8);
|
||||
|
||||
cv::Mat cropped_image_uint8;
|
||||
|
||||
xrt::auxiliary::math::map_quat(this_output.look_dir) = instr.rot_quat;
|
||||
this_output.stereographic_radius = instr.stereographic_radius;
|
||||
|
||||
bool is_hand = true;
|
||||
|
||||
{
|
||||
XRT_TRACE_IDENT(transforms);
|
||||
XRT_TRACE_IDENT(convert_format);
|
||||
|
||||
cv::warpAffine(info->view->run_model_on_this, cropped_image_uint8, go_there,
|
||||
cv::Size(kKeypointInputSize, kKeypointInputSize), cv::INTER_LINEAR);
|
||||
// here!
|
||||
cv::Mat data_128x128_float(cv::Size(128, 128), CV_32FC1, wrap->wraps[0].data, 128 * sizeof(float));
|
||||
|
||||
cv::Mat cropped_image_float_wrapper(cv::Size(kKeypointInputSize, kKeypointInputSize), //
|
||||
CV_32FC1, //
|
||||
wrap->wraps[0].data, //
|
||||
kKeypointInputSize * sizeof(float));
|
||||
|
||||
normalizeGrayscaleImage(cropped_image_uint8, cropped_image_float_wrapper);
|
||||
is_hand = is_hand && normalizeGrayscaleImage(data_128x128_uint8, data_128x128_float);
|
||||
}
|
||||
|
||||
|
||||
// Ending here
|
||||
|
||||
const OrtValue *inputs[] = {wrap->wraps[0].tensor};
|
||||
const char *input_names[] = {wrap->wraps[0].name};
|
||||
|
||||
OrtValue *output_tensors[] = {nullptr};
|
||||
const OrtValue *inputs[] = {wrap->wraps[0].tensor, wrap->wraps[1].tensor, wrap->wraps[2].tensor};
|
||||
const char *input_names[] = {wrap->wraps[0].name, wrap->wraps[1].name, wrap->wraps[2].name};
|
||||
|
||||
const char *output_names[] = {"heatmap"};
|
||||
|
||||
|
||||
// OrtValue *output_tensor = nullptr;
|
||||
OrtValue *output_tensors[] = {nullptr, nullptr, nullptr, nullptr};
|
||||
const char *output_names[] = {"heatmap_xy", "heatmap_depth", "scalar_extras", "curls"};
|
||||
|
||||
{
|
||||
XRT_TRACE_IDENT(model);
|
||||
static_assert(ARRAY_SIZE(input_names) == ARRAY_SIZE(inputs));
|
||||
static_assert(ARRAY_SIZE(output_names) == ARRAY_SIZE(output_tensors));
|
||||
assert(ARRAY_SIZE(input_names) == ARRAY_SIZE(inputs));
|
||||
assert(ARRAY_SIZE(output_names) == ARRAY_SIZE(output_tensors));
|
||||
ORT(Run(wrap->session, nullptr, input_names, inputs, ARRAY_SIZE(input_names), output_names,
|
||||
ARRAY_SIZE(output_names), output_tensors));
|
||||
}
|
||||
|
||||
// To here
|
||||
|
||||
// Interpret model outputs!
|
||||
|
||||
|
||||
float *out_data = nullptr;
|
||||
|
||||
|
||||
ORT(GetTensorMutableData(output_tensors[0], (void **)&out_data));
|
||||
|
||||
Hand2D &px_coord = info->view->keypoint_outputs[info->hand_idx].hand_px_coord;
|
||||
Hand2D &tan_space = info->view->keypoint_outputs[info->hand_idx].hand_tan_space;
|
||||
float *confidences = info->view->keypoint_outputs[info->hand_idx].hand_tan_space.confidences;
|
||||
xrt_vec2 *keypoints_global = px_coord.kps;
|
||||
// I don't know why this was added
|
||||
// float *confidences = info.view->keypoint_outputs.views[hand_idx].confidences;
|
||||
|
||||
size_t plane_size = kKeypointOutputHeatmapSize * kKeypointOutputHeatmapSize;
|
||||
// This was added for debug scribbling, and should be added back at some point.
|
||||
// xrt_vec2 *keypoints_global = px_coord.kps;
|
||||
|
||||
size_t plane_size = 22 * 22;
|
||||
|
||||
for (int i = 0; i < 21; i++) {
|
||||
float *data = &out_data[i * plane_size];
|
||||
int out_idx = argmax(data, kKeypointOutputHeatmapSize * kKeypointOutputHeatmapSize);
|
||||
int row = out_idx / kKeypointOutputHeatmapSize;
|
||||
int col = out_idx % kKeypointOutputHeatmapSize;
|
||||
|
||||
xrt_vec2 loc;
|
||||
|
||||
|
||||
refine_center_of_distribution(data, col, row, kKeypointOutputHeatmapSize, kKeypointOutputHeatmapSize,
|
||||
&loc.x, &loc.y);
|
||||
// This will be optimized out if nan checking is disabled in hg_numerics_checker
|
||||
for (size_t x = 0; x < plane_size; x++) {
|
||||
CHECK_NOT_NAN(data[i]);
|
||||
}
|
||||
|
||||
// 128.0/22.0f
|
||||
loc.x *= float(kKeypointInputSize) / float(kKeypointOutputHeatmapSize);
|
||||
loc.y *= float(kKeypointInputSize) / float(kKeypointOutputHeatmapSize);
|
||||
|
||||
loc = transformVecBy2x3(loc, go_back);
|
||||
int out_idx = argmax(data, 22 * 22);
|
||||
int row = out_idx / 22;
|
||||
int col = out_idx % 22;
|
||||
|
||||
confidences[i] = data[out_idx];
|
||||
px_coord.kps[i] = loc;
|
||||
xrt_vec2 loc = {};
|
||||
|
||||
tan_space.kps[i] = raycoord(info->view, loc);
|
||||
// This is a good start but rethink it. Maybe fail if less than 18/21 joints failed?
|
||||
// is_hand = is_hand &&
|
||||
refine_center_of_distribution(hgt, data, col, row, 22, 22, &loc.x, &loc.y);
|
||||
|
||||
if (hand_idx == 0) {
|
||||
px_coord[i].pos_2d.x = math_map_ranges(loc.x, 0, 22, -1, 1);
|
||||
} else {
|
||||
px_coord[i].pos_2d.x = math_map_ranges(loc.x, 0, 22, 1, -1);
|
||||
}
|
||||
|
||||
//!@todo when you change this to have +Z-forward
|
||||
// note note note the flip!!!
|
||||
px_coord[i].pos_2d.y = math_map_ranges(loc.y, 0, 22, 1, -1);
|
||||
|
||||
px_coord[i].confidence_xy = data[out_idx];
|
||||
}
|
||||
|
||||
|
||||
float *out_data_depth = nullptr;
|
||||
ORT(GetTensorMutableData(output_tensors[1], (void **)&out_data_depth));
|
||||
|
||||
for (int joint_idx = 0; joint_idx < 21; joint_idx++) {
|
||||
float *p_ptr = &out_data_depth[(joint_idx * 22)];
|
||||
|
||||
|
||||
float depth = 0;
|
||||
float confidence = 0;
|
||||
|
||||
// This function can fail
|
||||
if (hand_depth_center_of_mass(hgt, p_ptr, &depth, &confidence)) {
|
||||
|
||||
px_coord[joint_idx].depth_relative_to_midpxm = depth;
|
||||
px_coord[joint_idx].confidence_depth = confidence;
|
||||
} else {
|
||||
px_coord[joint_idx].depth_relative_to_midpxm = 0;
|
||||
px_coord[joint_idx].confidence_depth = 0;
|
||||
}
|
||||
}
|
||||
|
||||
float *out_data_extras = nullptr;
|
||||
ORT(GetTensorMutableData(output_tensors[2], (void **)&out_data_extras));
|
||||
|
||||
float is_hand_explicit = out_data_extras[0];
|
||||
|
||||
is_hand_explicit = (1.0) / (1.0 + powf(2.71828182845904523536, -is_hand_explicit));
|
||||
|
||||
// When the model is sure, it's _really_ sure.
|
||||
// Index was fine with 0.99.
|
||||
// North Star seemed to need 0.97.
|
||||
if (is_hand_explicit < 0.97) {
|
||||
U_LOG_D("Not hand! %f", is_hand_explicit);
|
||||
is_hand = false;
|
||||
}
|
||||
|
||||
this_output.active = is_hand;
|
||||
|
||||
|
||||
float *out_data_curls = nullptr;
|
||||
ORT(GetTensorMutableData(output_tensors[3], (void **)&out_data_curls));
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
float curl = out_data_curls[i];
|
||||
float variance = out_data_curls[5 + i];
|
||||
|
||||
// Next two lines directly correspond to py_training/settings.py
|
||||
// We don't want it to be negative
|
||||
variance = fabsf(variance);
|
||||
// We don't want it to be possible to be zero
|
||||
variance += 0.01;
|
||||
|
||||
this_output.curls[i].value = curl;
|
||||
this_output.curls[i].variance = curl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (hgt->debug_scribble) {
|
||||
int data_acc_idx = 0;
|
||||
|
||||
int root_x = kVisSpacerSize + ((2 * info->hand_idx) * (kKeypointInputSize + kVisSpacerSize));
|
||||
int root_y = kVisSpacerSize + (info->view->view * (kKeypointInputSize + kVisSpacerSize));
|
||||
int root_x = 8 + ((2 * hand_idx) * (128 + 8));
|
||||
int root_y = 8 + (2 * info.view->view * (128 + 8));
|
||||
|
||||
cv::Rect p = cv::Rect(root_x, root_y, kKeypointInputSize, kKeypointInputSize);
|
||||
cv::Rect p = cv::Rect(root_x, root_y, 128, 128);
|
||||
|
||||
cropped_image_uint8.copyTo(hgt->visualizers.mat(p));
|
||||
data_128x128_uint8.copyTo(hgt->visualizers.mat(p));
|
||||
|
||||
make_keypoint_heatmap_output(info.view->view, hand_idx, 0, 0, out_data + (data_acc_idx * plane_size),
|
||||
hgt->visualizers.mat);
|
||||
make_keypoint_depth_heatmap_output(info.view->view, hand_idx, 0, 0,
|
||||
out_data_depth + (data_acc_idx * 22), hgt->visualizers.mat);
|
||||
|
||||
make_keypoint_heatmap_output(info->view->view, info->hand_idx, 0, 0,
|
||||
out_data + (data_acc_idx * plane_size), hgt->visualizers.mat);
|
||||
data_acc_idx++;
|
||||
|
||||
for (int finger = 0; finger < 5; finger++) {
|
||||
for (int joint = 0; joint < 4; joint++) {
|
||||
|
||||
make_keypoint_heatmap_output(info->view->view, info->hand_idx, 1 + joint, finger,
|
||||
make_keypoint_heatmap_output(info.view->view, hand_idx, 1 + joint, finger,
|
||||
out_data + (data_acc_idx * plane_size),
|
||||
hgt->visualizers.mat);
|
||||
make_keypoint_depth_heatmap_output(info.view->view, hand_idx, 1 + joint, finger,
|
||||
out_data_depth + (data_acc_idx * 22),
|
||||
hgt->visualizers.mat);
|
||||
data_acc_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// Hand existence
|
||||
char amt[5];
|
||||
|
||||
snprintf(amt, ARRAY_SIZE(amt), "%.2f", is_hand_explicit);
|
||||
|
||||
cv::Point2i text_origin;
|
||||
|
||||
text_origin.x = root_x + 128 + 2;
|
||||
text_origin.y = root_y + 60;
|
||||
|
||||
// Clear out what was there before
|
||||
cv::rectangle(hgt->visualizers.mat, cv::Rect(text_origin - cv::Point2i{0, 25}, cv::Size{30, 30}), {255},
|
||||
cv::FILLED);
|
||||
|
||||
|
||||
if (hgt->tuneable_values.scribble_keypoint_model_outputs) {
|
||||
for (int finger = 0; finger < 5; finger++) {
|
||||
cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y};
|
||||
for (int joint = 0; joint < 4; joint++) {
|
||||
cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x,
|
||||
(int)keypoints_global[1 + finger * 4 + joint].y};
|
||||
cv::putText(hgt->visualizers.mat, amt, text_origin, cv::FONT_HERSHEY_SIMPLEX, 0.3, {0, 0, 0});
|
||||
|
||||
cv::line(debug, last, the_new, colors[info->hand_idx]);
|
||||
last = the_new;
|
||||
}
|
||||
}
|
||||
// Curls
|
||||
cv::rectangle(hgt->visualizers.mat, cv::Rect(cv::Point2i{root_x, root_y + 128 + 22}, cv::Size{128, 60}),
|
||||
{255}, cv::FILLED);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
int r = 15;
|
||||
|
||||
for (int i = 0; i < 21; i++) {
|
||||
xrt_vec2 loc = keypoints_global[i];
|
||||
handDot(debug, loc, 2, (float)(i) / 21.0, 1, 2);
|
||||
}
|
||||
cv::Point2i center = {root_x + r + (20 * i), root_y + 128 + 60};
|
||||
|
||||
cv::circle(hgt->visualizers.mat, center, 1, {0}, 1);
|
||||
|
||||
float c = this_output.curls[i].value * 0.3;
|
||||
int x = cos(c) * r;
|
||||
// Remember, OpenCV has (0,0) at top left
|
||||
int y = -sin(c) * r;
|
||||
|
||||
cv::Point2i pt2 = {x, y};
|
||||
pt2 += center;
|
||||
cv::circle(hgt->visualizers.mat, pt2, 1, {0}, 1);
|
||||
cv::line(hgt->visualizers.mat, center, pt2, {0}, 1);
|
||||
}
|
||||
}
|
||||
|
||||
wrap->api->ReleaseValue(output_tensors[0]);
|
||||
for (size_t i = 0; i < ARRAY_SIZE(output_tensors); i++) {
|
||||
wrap->api->ReleaseValue(output_tensors[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -640,4 +984,4 @@ release_onnx_wrap(onnx_wrap *wrap)
|
|||
wrap->api->ReleaseEnv(wrap->env);
|
||||
}
|
||||
|
||||
} // namespace xrt::tracking::hand::mercury
|
||||
} // namespace xrt::tracking::hand::mercury
|
62
src/xrt/tracking/hand/mercury/hg_numerics_checker.hpp
Normal file
62
src/xrt/tracking/hand/mercury/hg_numerics_checker.hpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
// Copyright 2021-2022, Collabora, Ltd.
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
/*!
|
||||
* @file
|
||||
* @brief Simple util for setting floating-point exceptions and checking for NaNs.
|
||||
* @author Moses Turner <moses@collabora.com>
|
||||
* @ingroup drv_ht
|
||||
*/
|
||||
|
||||
#undef PEDANTIC_NAN_CHECKS
|
||||
#undef NAN_EXCEPTIONS
|
||||
|
||||
#ifdef NAN_EXCEPTIONS
|
||||
#include <cfenv>
|
||||
#endif
|
||||
|
||||
namespace xrt::tracking::hand::mercury::numerics_checker {
|
||||
|
||||
#ifdef PEDANTIC_NAN_CHECKS
|
||||
#define CHECK_NOT_NAN(val) \
|
||||
do { \
|
||||
if (val != val) { \
|
||||
U_LOG_E(" was NAN at %d", __LINE__); \
|
||||
assert(false); \
|
||||
} \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#define CHECK_NOT_NAN(val) (void)val;
|
||||
#endif
|
||||
|
||||
#ifdef NAN_EXCEPTIONS
|
||||
static int ex = FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID;
|
||||
|
||||
|
||||
static inline void
|
||||
set_floating_exceptions()
|
||||
{
|
||||
// NO: FE_UNDERFLOW, FE_INEXACT
|
||||
// https://stackoverflow.com/questions/60731382/c-setting-floating-point-exception-environment
|
||||
feenableexcept(ex); // Uncomment this for version 2
|
||||
}
|
||||
|
||||
static inline void
|
||||
remove_floating_exceptions()
|
||||
{
|
||||
fedisableexcept(ex);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void
|
||||
set_floating_exceptions()
|
||||
{}
|
||||
|
||||
static inline void
|
||||
remove_floating_exceptions()
|
||||
{}
|
||||
#endif
|
||||
|
||||
|
||||
} // namespace xrt::tracking::hand::mercury::numerics_checker
|
File diff suppressed because it is too large
Load diff
|
@ -12,9 +12,10 @@
|
|||
|
||||
#include "hg_interface.h"
|
||||
|
||||
#include "tracking/t_calibration_opencv.hpp"
|
||||
|
||||
#include "tracking/t_hand_tracking.h"
|
||||
#include "tracking/t_camera_models.h"
|
||||
|
||||
#include "xrt/xrt_defines.h"
|
||||
#include "xrt/xrt_frame.h"
|
||||
|
||||
|
@ -22,6 +23,7 @@
|
|||
#include "math/m_vec2.h"
|
||||
#include "math/m_vec3.h"
|
||||
#include "math/m_mathinclude.h"
|
||||
#include "math/m_eigen_interop.hpp"
|
||||
|
||||
#include "util/u_frame_times_widget.h"
|
||||
#include "util/u_logging.h"
|
||||
|
@ -48,10 +50,10 @@
|
|||
#include "kine_lm/lm_interface.hpp"
|
||||
|
||||
|
||||
|
||||
namespace xrt::tracking::hand::mercury {
|
||||
|
||||
using namespace xrt::auxiliary::util;
|
||||
using namespace xrt::auxiliary::math;
|
||||
|
||||
#define HG_TRACE(hgt, ...) U_LOG_IFL_T(hgt->log_level, __VA_ARGS__)
|
||||
#define HG_DEBUG(hgt, ...) U_LOG_IFL_D(hgt->log_level, __VA_ARGS__)
|
||||
|
@ -68,30 +70,85 @@ static constexpr uint16_t kVisSpacerSize = 8;
|
|||
static const cv::Scalar RED(255, 30, 30);
|
||||
static const cv::Scalar YELLOW(255, 255, 0);
|
||||
static const cv::Scalar PINK(255, 0, 255);
|
||||
static const cv::Scalar GREEN(0, 255, 0);
|
||||
|
||||
static const cv::Scalar colors[2] = {YELLOW, RED};
|
||||
|
||||
constexpr enum xrt_hand_joint joints_5x5_to_26[5][5] = {
|
||||
{
|
||||
XRT_HAND_JOINT_WRIST,
|
||||
XRT_HAND_JOINT_THUMB_METACARPAL,
|
||||
XRT_HAND_JOINT_THUMB_PROXIMAL,
|
||||
XRT_HAND_JOINT_THUMB_DISTAL,
|
||||
XRT_HAND_JOINT_THUMB_TIP,
|
||||
},
|
||||
{
|
||||
XRT_HAND_JOINT_INDEX_METACARPAL,
|
||||
XRT_HAND_JOINT_INDEX_PROXIMAL,
|
||||
XRT_HAND_JOINT_INDEX_INTERMEDIATE,
|
||||
XRT_HAND_JOINT_INDEX_DISTAL,
|
||||
XRT_HAND_JOINT_INDEX_TIP,
|
||||
},
|
||||
{
|
||||
XRT_HAND_JOINT_MIDDLE_METACARPAL,
|
||||
XRT_HAND_JOINT_MIDDLE_PROXIMAL,
|
||||
XRT_HAND_JOINT_MIDDLE_INTERMEDIATE,
|
||||
XRT_HAND_JOINT_MIDDLE_DISTAL,
|
||||
XRT_HAND_JOINT_MIDDLE_TIP,
|
||||
},
|
||||
{
|
||||
XRT_HAND_JOINT_RING_METACARPAL,
|
||||
XRT_HAND_JOINT_RING_PROXIMAL,
|
||||
XRT_HAND_JOINT_RING_INTERMEDIATE,
|
||||
XRT_HAND_JOINT_RING_DISTAL,
|
||||
XRT_HAND_JOINT_RING_TIP,
|
||||
},
|
||||
{
|
||||
XRT_HAND_JOINT_LITTLE_METACARPAL,
|
||||
XRT_HAND_JOINT_LITTLE_PROXIMAL,
|
||||
XRT_HAND_JOINT_LITTLE_INTERMEDIATE,
|
||||
XRT_HAND_JOINT_LITTLE_DISTAL,
|
||||
XRT_HAND_JOINT_LITTLE_TIP,
|
||||
},
|
||||
};
|
||||
|
||||
namespace ROIProvenance {
|
||||
enum ROIProvenance
|
||||
{
|
||||
HAND_DETECTION,
|
||||
POSE_PREDICTION
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
// Forward declaration for ht_view
|
||||
struct HandTracking;
|
||||
struct ht_view;
|
||||
|
||||
// Using the compiler to stop me from getting 2D space mixed up with 3D space.
|
||||
struct Hand2D
|
||||
{
|
||||
struct xrt_vec2 kps[21];
|
||||
float confidences[21];
|
||||
};
|
||||
|
||||
struct Hand3D
|
||||
{
|
||||
struct xrt_vec3 kps[21];
|
||||
};
|
||||
|
||||
using hand21_2d = std::array<vec2_5, 21>;
|
||||
|
||||
struct projection_instructions
|
||||
{
|
||||
Eigen::Quaternionf rot_quat = Eigen::Quaternionf::Identity();
|
||||
float stereographic_radius = 0;
|
||||
bool flip = false;
|
||||
const t_camera_model_params &dist;
|
||||
|
||||
projection_instructions(const t_camera_model_params &dist) : dist(dist) {}
|
||||
};
|
||||
|
||||
struct model_input_wrap
|
||||
{
|
||||
float *data = nullptr;
|
||||
// int64_t isn't a bug; that's what onnxruntime wants.
|
||||
std::vector<int64_t> dimensions = {};
|
||||
int64_t dimensions[4];
|
||||
size_t num_dimensions = 0;
|
||||
|
||||
OrtValue *tensor = nullptr;
|
||||
const char *name;
|
||||
};
|
||||
|
@ -104,31 +161,37 @@ struct onnx_wrap
|
|||
OrtMemoryInfo *meminfo = nullptr;
|
||||
OrtSession *session = nullptr;
|
||||
|
||||
std::vector<model_input_wrap> wraps;
|
||||
std::vector<model_input_wrap> wraps = {};
|
||||
};
|
||||
|
||||
struct hand_bounding_box
|
||||
// Multipurpose.
|
||||
// * Hand detector writes into center_px, size_px, found and hand_detection_confidence
|
||||
// * Keypoint estimator operates on this to a direction/radius for the stereographic projection, and for the associated
|
||||
// keypoints.
|
||||
struct hand_region_of_interest
|
||||
{
|
||||
xrt_vec2 center;
|
||||
ROIProvenance::ROIProvenance provenance;
|
||||
|
||||
// Either set by the detection model or by predict_new_regions_of_interest/back_project
|
||||
xrt_vec2 center_px;
|
||||
float size_px;
|
||||
|
||||
bool found;
|
||||
float confidence;
|
||||
bool hand_detection_confidence;
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct hand_detection_run_info
|
||||
{
|
||||
ht_view *view;
|
||||
hand_bounding_box *outputs[2];
|
||||
// These are not duplicates of ht_view's regions_of_interest_this_frame!
|
||||
// If some hands are already tracked, we have logic that only copies new ROIs to this frame's regions of
|
||||
// interest.
|
||||
hand_region_of_interest outputs[2];
|
||||
};
|
||||
|
||||
|
||||
struct keypoint_output
|
||||
{
|
||||
Hand2D hand_px_coord;
|
||||
Hand2D hand_tan_space;
|
||||
float confidences[21];
|
||||
};
|
||||
|
||||
struct keypoint_estimation_run_info
|
||||
{
|
||||
ht_view *view;
|
||||
|
@ -144,33 +207,19 @@ struct ht_view
|
|||
|
||||
struct t_camera_extra_info_one_view camera_info;
|
||||
|
||||
cv::Mat distortion;
|
||||
cv::Matx<double, 3, 3> cameraMatrix;
|
||||
cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2
|
||||
t_camera_model_params hgdist_orig;
|
||||
// With fx, fy, cx, cy scaled to the current camera resolution as appropriate.
|
||||
t_camera_model_params hgdist;
|
||||
|
||||
|
||||
cv::Mat run_model_on_this;
|
||||
cv::Mat debug_out_to_this;
|
||||
|
||||
struct hand_bounding_box bboxes_this_frame[2]; // left, right
|
||||
struct hand_region_of_interest regions_of_interest_this_frame[2]; // left, right
|
||||
|
||||
struct keypoint_estimation_run_info run_info[2];
|
||||
|
||||
struct keypoint_output keypoint_outputs[2];
|
||||
};
|
||||
|
||||
struct hand_history
|
||||
{
|
||||
HistoryBuffer<xrt_hand_joint_set, 5> hands;
|
||||
HistoryBuffer<uint64_t, 5> timestamps;
|
||||
};
|
||||
|
||||
struct output_struct_one_frame
|
||||
{
|
||||
xrt_vec2 left[21];
|
||||
float confidences_left[21];
|
||||
xrt_vec2 right[21];
|
||||
float confidences_right[21];
|
||||
};
|
||||
|
||||
struct hand_size_refinement
|
||||
{
|
||||
|
@ -209,7 +258,6 @@ public:
|
|||
|
||||
float multiply_px_coord_for_undistort;
|
||||
|
||||
bool use_fisheye;
|
||||
|
||||
struct t_stereo_camera_calibration *calib;
|
||||
|
||||
|
@ -240,22 +288,44 @@ public:
|
|||
|
||||
lm::KinematicHandLM *kinematic_hands[2];
|
||||
|
||||
// struct hand_detection_state_tracker st_det[2] = {};
|
||||
// These are produced by the keypoint estimator and consumed by the nonlinear optimizer
|
||||
// left hand, right hand THEN left view, right view
|
||||
struct one_frame_input keypoint_outputs[2];
|
||||
|
||||
// Used to track whether this hand has *ever* been seen during this user's session, so that we can spend some
|
||||
// extra time optimizing their hand size if one of their hands isn't visible for the first bit.
|
||||
bool hand_seen_before[2] = {false, false};
|
||||
|
||||
// Used to:
|
||||
// * see if a hand is currently being tracked.
|
||||
// * If so, don't replace the bounding box with that from a hand detection.
|
||||
// * Also, if both hands are being tracked, we just don't run the hand detector.
|
||||
bool last_frame_hand_detected[2] = {false, false};
|
||||
|
||||
// Used to decide whether to run the keypoint estimator/nonlinear optimizer.
|
||||
bool this_frame_hand_detected[2] = {false, false};
|
||||
|
||||
struct hand_history histories[2];
|
||||
// Used to determine pose-predicted regions of interest. Contains the last five hand keypoint positions, or less
|
||||
// if the hand has just started being tracked.
|
||||
HistoryBuffer<Eigen::Array<float, 3, 21>, 5> history_hands[2] = {};
|
||||
|
||||
// Contains the last 5 timestamps, or less if hand tracking has just started.
|
||||
HistoryBuffer<uint64_t, 5> history_timestamps = {};
|
||||
|
||||
|
||||
// left hand, right hand
|
||||
Eigen::Array<float, 3, 21> pose_predicted_keypoints[2];
|
||||
|
||||
int detection_counter = 0;
|
||||
|
||||
struct hand_size_refinement refinement = {};
|
||||
// Moses hand size is ~0.095; they has big-ish hands so let's do 0.09
|
||||
float target_hand_size = 0.09;
|
||||
float target_hand_size = STANDARD_HAND_SIZE;
|
||||
|
||||
|
||||
xrt_frame *debug_frame;
|
||||
|
||||
|
||||
// This should be removed.
|
||||
void (*keypoint_estimation_run_func)(void *);
|
||||
|
||||
|
||||
|
@ -267,15 +337,25 @@ public:
|
|||
struct
|
||||
{
|
||||
bool new_user_event = false;
|
||||
struct u_var_draggable_f32 after_detection_fac;
|
||||
struct u_var_draggable_f32 dyn_radii_fac;
|
||||
struct u_var_draggable_f32 dyn_joint_y_angle_error;
|
||||
struct u_var_draggable_f32 amount_to_lerp_prediction;
|
||||
struct u_var_draggable_f32 max_permissible_iou;
|
||||
bool scribble_predictions_into_this_frame = false;
|
||||
struct u_var_draggable_f32 amt_use_depth;
|
||||
struct u_var_draggable_f32 mpiou_any;
|
||||
struct u_var_draggable_f32 mpiou_single_detection;
|
||||
struct u_var_draggable_f32 mpiou_double_detection;
|
||||
struct u_var_draggable_f32 max_reprojection_error;
|
||||
struct u_var_draggable_f32 opt_smooth_factor;
|
||||
struct u_var_draggable_f32 max_hand_dist;
|
||||
bool scribble_predictions_into_next_frame = false;
|
||||
bool scribble_keypoint_model_outputs = false;
|
||||
bool scribble_optimizer_outputs = true;
|
||||
bool always_run_detection_model = false;
|
||||
int max_num_outside_view = 3;
|
||||
bool always_run_detection_model = false; // true
|
||||
bool optimize_hand_size = true;
|
||||
int max_num_outside_view = 6;
|
||||
bool enable_pose_predicted_input = true;
|
||||
bool enable_framerate_based_smoothing = false;
|
||||
} tuneable_values;
|
||||
|
||||
|
||||
|
@ -317,4 +397,33 @@ run_keypoint_estimation(void *ptr);
|
|||
void
|
||||
release_onnx_wrap(onnx_wrap *wrap);
|
||||
|
||||
|
||||
void
|
||||
make_projection_instructions(t_camera_model_params &dist,
|
||||
bool flip_after,
|
||||
float expand_val,
|
||||
float twist,
|
||||
Eigen::Array<float, 3, 21> &joints,
|
||||
projection_instructions &out_instructions,
|
||||
hand21_2d &out_hand);
|
||||
|
||||
|
||||
void
|
||||
make_projection_instructions_angular(xrt_vec3 direction_3d,
|
||||
bool flip_after,
|
||||
float angular_radius,
|
||||
float expand_val,
|
||||
float twist,
|
||||
projection_instructions &out_instructions);
|
||||
|
||||
void
|
||||
stereographic_project_image(const t_camera_model_params &dist,
|
||||
const projection_instructions &instructions,
|
||||
cv::Mat &input_image,
|
||||
cv::Mat *debug_image,
|
||||
const cv::Scalar boundary_color,
|
||||
cv::Mat &out);
|
||||
|
||||
|
||||
|
||||
} // namespace xrt::tracking::hand::mercury
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
*/
|
||||
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include "xrt/xrt_defines.h"
|
||||
namespace xrt::tracking::hand::mercury {
|
||||
|
||||
|
@ -17,14 +18,35 @@ namespace xrt::tracking::hand::mercury {
|
|||
// Different from `Scalar` in lm_* templates - those can be `Ceres::Jet`s too.
|
||||
typedef float HandScalar;
|
||||
|
||||
// Used for "2.5D" joint locations, with a 2D ray direction in stereographic-space and a 1D depth relative to the
|
||||
// middle-proximal joint.
|
||||
struct vec2_5
|
||||
{
|
||||
xrt_vec2 pos_2d;
|
||||
float depth_relative_to_midpxm;
|
||||
|
||||
float confidence_xy;
|
||||
float confidence_depth;
|
||||
};
|
||||
|
||||
// Using the compiler to stop me from getting 2D space mixed up with 3D space.
|
||||
using MLOutput2D = std::array<vec2_5, 21>;
|
||||
|
||||
struct one_curl
|
||||
{
|
||||
float value;
|
||||
float variance;
|
||||
};
|
||||
|
||||
// Inputs to kinematic optimizers
|
||||
//!@todo Ask Ryan if adding `= {}` only does something if we do one_frame_one_view bla = {}.
|
||||
struct one_frame_one_view
|
||||
{
|
||||
bool active = true;
|
||||
xrt_vec3 rays[21] = {};
|
||||
HandScalar confidences[21] = {};
|
||||
xrt_quat look_dir;
|
||||
float stereographic_radius;
|
||||
MLOutput2D keypoints_in_scaled_stereographic;
|
||||
one_curl curls[5];
|
||||
};
|
||||
|
||||
struct one_frame_input
|
||||
|
@ -64,4 +86,9 @@ namespace Joint21 {
|
|||
};
|
||||
}
|
||||
|
||||
//!@todo These are not backed up by real anthropometry data; they are just guesstimates. Patches welcome!
|
||||
constexpr HandScalar MIN_HAND_SIZE = 0.06;
|
||||
constexpr HandScalar STANDARD_HAND_SIZE = 0.09;
|
||||
constexpr HandScalar MAX_HAND_SIZE = 0.12;
|
||||
|
||||
} // namespace xrt::tracking::hand::mercury
|
||||
|
|
|
@ -8,11 +8,8 @@
|
|||
*/
|
||||
#pragma once
|
||||
|
||||
// #include <Eigen/Core>
|
||||
// #include <Eigen/Geometry>
|
||||
#include "math/m_mathinclude.h"
|
||||
#include "../kine_common.hpp"
|
||||
#include <type_traits>
|
||||
|
||||
namespace xrt::tracking::hand::mercury::lm {
|
||||
|
||||
|
@ -48,17 +45,21 @@ static constexpr size_t kNumOrientationsInFinger = 4;
|
|||
|
||||
// Not tested/tuned well enough; might make tracking slow.
|
||||
#undef USE_HAND_PLAUSIBILITY
|
||||
// Should work, but our neural nets aren't good enough yet.
|
||||
#undef USE_HAND_CURLS
|
||||
|
||||
#undef RESIDUALS_HACKING
|
||||
|
||||
static constexpr size_t kMetacarpalBoneDim = 3;
|
||||
static constexpr size_t kProximalBoneDim = 2;
|
||||
static constexpr size_t kFingerDim = kProximalBoneDim + kMetacarpalBoneDim + 2;
|
||||
static constexpr size_t kFingerDim = kProximalBoneDim + 2;
|
||||
static constexpr size_t kThumbDim = kMetacarpalBoneDim + 2;
|
||||
static constexpr size_t kHandSizeDim = 1;
|
||||
static constexpr size_t kHandTranslationDim = 3;
|
||||
static constexpr size_t kHandOrientationDim = 3;
|
||||
|
||||
|
||||
|
||||
// HRTC = Hand Residual Temporal Consistency
|
||||
static constexpr size_t kHRTC_HandSize = 1;
|
||||
static constexpr size_t kHRTC_RootBoneTranslation = 3;
|
||||
static constexpr size_t kHRTC_RootBoneOrientation = 3; // Direct difference between the two angle-axis rotations. This
|
||||
|
@ -69,12 +70,21 @@ static constexpr size_t kHRTC_ThumbCurls = 2;
|
|||
|
||||
static constexpr size_t kHRTC_ProximalSimilarity = 2;
|
||||
|
||||
static constexpr size_t kHRTC_FingerMCPSwingTwist = 3;
|
||||
static constexpr size_t kHRTC_FingerMCPSwingTwist = 0;
|
||||
static constexpr size_t kHRTC_FingerPXMSwing = 2;
|
||||
static constexpr size_t kHRTC_FingerCurls = 2;
|
||||
static constexpr size_t kHRTC_CurlSimilarity = 1;
|
||||
|
||||
static constexpr size_t kHandResidualOneSideSize = 21 * 2;
|
||||
|
||||
static constexpr size_t kHandResidualOneSideXY = (kNumNNJoints * 2);
|
||||
static constexpr size_t kHandResidualOneSideDepth = 20; // one less because midxpm joint isn't used
|
||||
#ifdef USE_HAND_CURLS
|
||||
static constexpr size_t kHandResidualOneSideMatchCurls = 4;
|
||||
#else
|
||||
static constexpr size_t kHandResidualOneSideMatchCurls = 0;
|
||||
#endif
|
||||
static constexpr size_t kHandResidualOneSideSize =
|
||||
kHandResidualOneSideXY + kHandResidualOneSideDepth + kHandResidualOneSideMatchCurls;
|
||||
|
||||
static constexpr size_t kHandResidualTemporalConsistencyOneFingerSize = //
|
||||
kHRTC_FingerMCPSwingTwist + //
|
||||
|
@ -97,27 +107,49 @@ static constexpr size_t kHandResidualTemporalConsistencySize = //
|
|||
0;
|
||||
|
||||
|
||||
// Factors to multiply different values by to get a smooth hand trajectory without introducing too much latency
|
||||
class HandStability
|
||||
{
|
||||
public:
|
||||
HandScalar stabilityRoot;
|
||||
HandScalar stabilityCurlRoot;
|
||||
HandScalar stabilityOtherRoot;
|
||||
|
||||
// 1.0 is good, a little jittery.
|
||||
// Anything above 3.0 generally breaks.
|
||||
static constexpr HandScalar kStabilityRoot = 1.0;
|
||||
static constexpr HandScalar kStabilityCurlRoot = kStabilityRoot * 0.03f;
|
||||
static constexpr HandScalar kStabilityOtherRoot = kStabilityRoot * 0.03f;
|
||||
HandScalar stabilityThumbMCPSwing;
|
||||
HandScalar stabilityThumbMCPTwist;
|
||||
|
||||
static constexpr HandScalar kStabilityThumbMCPSwing = kStabilityCurlRoot * 1.5f;
|
||||
static constexpr HandScalar kStabilityThumbMCPTwist = kStabilityCurlRoot * 1.5f;
|
||||
HandScalar stabilityFingerMCPSwing;
|
||||
HandScalar stabilityFingerMCPTwist;
|
||||
|
||||
static constexpr HandScalar kStabilityFingerMCPSwing = kStabilityCurlRoot * 3.0f;
|
||||
static constexpr HandScalar kStabilityFingerMCPTwist = kStabilityCurlRoot * 3.0f;
|
||||
HandScalar stabilityFingerPXMSwingX;
|
||||
HandScalar stabilityFingerPXMSwingY;
|
||||
|
||||
static constexpr HandScalar kStabilityFingerPXMSwingX = kStabilityCurlRoot * 1.0f;
|
||||
static constexpr HandScalar kStabilityFingerPXMSwingY = kStabilityCurlRoot * 1.6f;
|
||||
HandScalar stabilityRootPosition;
|
||||
HandScalar stabilityHandSize;
|
||||
|
||||
static constexpr HandScalar kStabilityRootPosition = kStabilityOtherRoot * 30;
|
||||
static constexpr HandScalar kStabilityHandSize = kStabilityOtherRoot * 1000;
|
||||
HandScalar stabilityHandOrientationZ;
|
||||
HandScalar stabilityHandOrientationXY;
|
||||
HandStability(float root = 15.0f)
|
||||
{
|
||||
this->stabilityRoot = root;
|
||||
this->stabilityCurlRoot = this->stabilityRoot * 0.03f;
|
||||
this->stabilityOtherRoot = this->stabilityRoot * 0.03f;
|
||||
|
||||
static constexpr HandScalar kStabilityHandOrientation = kStabilityOtherRoot * 3;
|
||||
this->stabilityThumbMCPSwing = this->stabilityCurlRoot * 1.5f;
|
||||
this->stabilityThumbMCPTwist = this->stabilityCurlRoot * 1.5f;
|
||||
|
||||
this->stabilityFingerMCPSwing = this->stabilityCurlRoot * 3.0f;
|
||||
this->stabilityFingerMCPTwist = this->stabilityCurlRoot * 3.0f;
|
||||
|
||||
this->stabilityFingerPXMSwingX = this->stabilityCurlRoot * 0.6f;
|
||||
this->stabilityFingerPXMSwingY = this->stabilityCurlRoot * 1.6f;
|
||||
|
||||
this->stabilityRootPosition = this->stabilityOtherRoot * 25;
|
||||
this->stabilityHandSize = this->stabilityOtherRoot * 1000;
|
||||
|
||||
this->stabilityHandOrientationZ = this->stabilityOtherRoot * 0.5;
|
||||
this->stabilityHandOrientationXY = this->stabilityOtherRoot * 0.8;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static constexpr HandScalar kPlausibilityRoot = 1.0;
|
||||
|
@ -154,7 +186,6 @@ calc_input_size(bool optimize_hand_size)
|
|||
return out;
|
||||
}
|
||||
|
||||
|
||||
constexpr size_t
|
||||
calc_residual_size(bool stability, bool optimize_hand_size, int num_views)
|
||||
{
|
||||
|
@ -179,10 +210,10 @@ calc_residual_size(bool stability, bool optimize_hand_size, int num_views)
|
|||
|
||||
template <typename Scalar> struct Quat
|
||||
{
|
||||
Scalar x;
|
||||
Scalar y;
|
||||
Scalar z;
|
||||
Scalar w;
|
||||
Scalar x = {};
|
||||
Scalar y = {};
|
||||
Scalar z = {};
|
||||
Scalar w = {};
|
||||
|
||||
/// Default constructor - DOES NOT INITIALIZE VALUES
|
||||
constexpr Quat() {}
|
||||
|
@ -221,9 +252,9 @@ template <typename Scalar> struct Vec3
|
|||
{
|
||||
// Note that these are not initialized, for performance reasons.
|
||||
// If you want them initialized, use Zero() or something else
|
||||
Scalar x;
|
||||
Scalar y;
|
||||
Scalar z;
|
||||
Scalar x = {};
|
||||
Scalar y = {};
|
||||
Scalar z = {};
|
||||
|
||||
/// Default constructor - DOES NOT INITIALIZE VALUES
|
||||
constexpr Vec3() {}
|
||||
|
@ -254,12 +285,26 @@ template <typename Scalar> struct Vec3
|
|||
{
|
||||
return Vec3(0.f, 0.f, 0.f);
|
||||
}
|
||||
|
||||
// Norm, vector length, whatever.
|
||||
Scalar
|
||||
norm()
|
||||
{
|
||||
Scalar len = (Scalar)(0);
|
||||
|
||||
len += this->x * this->x;
|
||||
len += this->y * this->y;
|
||||
len += this->z * this->z;
|
||||
|
||||
len = sqrt(len);
|
||||
return len;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar> struct Vec2
|
||||
{
|
||||
Scalar x;
|
||||
Scalar y;
|
||||
Scalar x = {};
|
||||
Scalar y = {};
|
||||
|
||||
/// Default constructor - DOES NOT INITIALIZE VALUES
|
||||
constexpr Vec2() noexcept {}
|
||||
|
@ -298,7 +343,7 @@ template <typename Scalar> struct Vec2
|
|||
|
||||
template <typename T> struct ResidualHelper
|
||||
{
|
||||
T *out_residual;
|
||||
T *out_residual = nullptr;
|
||||
size_t out_residual_idx = 0;
|
||||
|
||||
ResidualHelper(T *residual) : out_residual(residual)
|
||||
|
@ -312,7 +357,4 @@ template <typename T> struct ResidualHelper
|
|||
this->out_residual[out_residual_idx++] = value;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace xrt::tracking::hand::mercury::lm
|
||||
|
|
|
@ -48,9 +48,11 @@ void
|
|||
optimizer_run(KinematicHandLM *hand,
|
||||
one_frame_input &observation,
|
||||
bool hand_was_untracked_last_frame,
|
||||
float smoothing_factor, //!<- Unused if this is the first frame
|
||||
bool optimize_hand_size,
|
||||
float target_hand_size,
|
||||
float hand_size_err_mul,
|
||||
float amt_use_depth,
|
||||
xrt_hand_joint_set &out_hand,
|
||||
float &out_hand_size,
|
||||
float &out_reprojection_error);
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -8,48 +8,46 @@
|
|||
* @ingroup tracking
|
||||
*/
|
||||
|
||||
// #include <iostream>
|
||||
// #include <cmath>
|
||||
#include "util/u_logging.h"
|
||||
#include "math/m_api.h"
|
||||
|
||||
#include "lm_interface.hpp"
|
||||
#include "lm_defines.hpp"
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
|
||||
// #include "lm_rotations.hpp"
|
||||
|
||||
namespace xrt::tracking::hand::mercury::lm {
|
||||
|
||||
template <typename T> struct OptimizerMetacarpalBone
|
||||
{
|
||||
Vec2<T> swing;
|
||||
T twist;
|
||||
Vec2<T> swing = {};
|
||||
T twist = {};
|
||||
};
|
||||
|
||||
template <typename T> struct OptimizerFinger
|
||||
{
|
||||
OptimizerMetacarpalBone<T> metacarpal;
|
||||
Vec2<T> proximal_swing;
|
||||
OptimizerMetacarpalBone<T> metacarpal = {};
|
||||
Vec2<T> proximal_swing = {};
|
||||
// Not Vec2.
|
||||
T rots[2];
|
||||
T rots[2] = {};
|
||||
};
|
||||
|
||||
template <typename T> struct OptimizerThumb
|
||||
{
|
||||
OptimizerMetacarpalBone<T> metacarpal;
|
||||
OptimizerMetacarpalBone<T> metacarpal = {};
|
||||
// Again not Vec2.
|
||||
T rots[2];
|
||||
T rots[2] = {};
|
||||
};
|
||||
|
||||
template <typename T> struct OptimizerHand
|
||||
{
|
||||
T hand_size;
|
||||
Vec3<T> wrist_location;
|
||||
Vec3<T> wrist_location = {};
|
||||
// This is constant, a ceres::Rotation.h quat,, taken from last frame.
|
||||
Quat<T> wrist_pre_orientation_quat;
|
||||
Quat<T> wrist_pre_orientation_quat = {};
|
||||
// This is optimized - angle-axis rotation vector. Starts at 0, loss goes up the higher it goes because it
|
||||
// indicates more of a rotation.
|
||||
Vec3<T> wrist_post_orientation_aax;
|
||||
Vec3<T> wrist_post_orientation_aax = {};
|
||||
OptimizerThumb<T> thumb = {};
|
||||
OptimizerFinger<T> finger[4] = {};
|
||||
};
|
||||
|
@ -77,16 +75,18 @@ public:
|
|||
class HandLimit
|
||||
{
|
||||
public:
|
||||
minmax hand_size;
|
||||
minmax hand_size = {};
|
||||
|
||||
minmax thumb_mcp_swing_x, thumb_mcp_swing_y, thumb_mcp_twist;
|
||||
minmax thumb_curls[2];
|
||||
minmax thumb_mcp_swing_x = {};
|
||||
minmax thumb_mcp_swing_y = {};
|
||||
minmax thumb_mcp_twist = {};
|
||||
minmax thumb_curls[2] = {};
|
||||
|
||||
FingerLimit fingers[4];
|
||||
FingerLimit fingers[4] = {};
|
||||
|
||||
HandLimit()
|
||||
{
|
||||
hand_size = {0.095 - 0.03, 0.095 + 0.03};
|
||||
hand_size = {MIN_HAND_SIZE, MAX_HAND_SIZE};
|
||||
|
||||
thumb_mcp_swing_x = {rad<HandScalar>(-60), rad<HandScalar>(60)};
|
||||
thumb_mcp_swing_y = {rad<HandScalar>(-60), rad<HandScalar>(60)};
|
||||
|
@ -97,25 +97,31 @@ public:
|
|||
}
|
||||
|
||||
|
||||
constexpr double margin = 0.09;
|
||||
HandScalar margin = 0.0001;
|
||||
|
||||
fingers[0].mcp_swing_y = {-0.19 - margin, -0.19 + margin};
|
||||
fingers[1].mcp_swing_y = {0.00 - margin, 0.00 + margin};
|
||||
fingers[2].mcp_swing_y = {0.19 - margin, 0.19 + margin};
|
||||
fingers[3].mcp_swing_y = {0.38 - margin, 0.38 + margin};
|
||||
fingers[0].mcp_swing_y = {HandScalar(-0.19) - margin, HandScalar(-0.19) + margin};
|
||||
fingers[1].mcp_swing_y = {HandScalar(0.00) - margin, HandScalar(0.00) + margin};
|
||||
fingers[2].mcp_swing_y = {HandScalar(0.19) - margin, HandScalar(0.19) + margin};
|
||||
fingers[3].mcp_swing_y = {HandScalar(0.38) - margin, HandScalar(0.38) + margin};
|
||||
|
||||
|
||||
fingers[0].mcp_swing_x = {HandScalar(-0.02) - margin, HandScalar(-0.02) + margin};
|
||||
fingers[1].mcp_swing_x = {HandScalar(0.00) - margin, HandScalar(0.00) + margin};
|
||||
fingers[2].mcp_swing_x = {HandScalar(0.02) - margin, HandScalar(0.02) + margin};
|
||||
fingers[3].mcp_swing_x = {HandScalar(0.04) - margin, HandScalar(0.04) + margin};
|
||||
|
||||
|
||||
for (int finger_idx = 0; finger_idx < 4; finger_idx++) {
|
||||
FingerLimit &finger = fingers[finger_idx];
|
||||
|
||||
finger.mcp_swing_x = {rad<HandScalar>(-10), rad<HandScalar>(10)};
|
||||
// finger.mcp_swing_x = {rad<HandScalar>(-0.0001), rad<HandScalar>(0.0001)};
|
||||
finger.mcp_twist = {rad<HandScalar>(-4), rad<HandScalar>(4)};
|
||||
|
||||
finger.pxm_swing_x = {rad<HandScalar>(-100), rad<HandScalar>(20)}; // ??? why is it reversed
|
||||
finger.pxm_swing_y = {rad<HandScalar>(-20), rad<HandScalar>(20)};
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
finger.curls[i] = {rad<HandScalar>(-90), rad<HandScalar>(10)};
|
||||
finger.curls[i] = {rad<HandScalar>(-90), rad<HandScalar>(0)};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -124,9 +130,6 @@ public:
|
|||
static const class HandLimit the_limit = {};
|
||||
|
||||
|
||||
constexpr HandScalar hand_size_min = 0.095 - 0.03;
|
||||
constexpr HandScalar hand_size_max = 0.095 + 0.03;
|
||||
|
||||
template <typename T>
|
||||
inline T
|
||||
LMToModel(T lm, minmax mm)
|
||||
|
@ -169,17 +172,7 @@ OptimizerHandUnpackFromVector(const T *in, bool use_hand_size, T hand_size, Opti
|
|||
out.thumb.rots[1] = LMToModel(in[acc_idx++], the_limit.thumb_curls[1]);
|
||||
|
||||
for (int finger_idx = 0; finger_idx < 4; finger_idx++) {
|
||||
|
||||
out.finger[finger_idx].metacarpal.swing.x =
|
||||
LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_swing_x);
|
||||
|
||||
out.finger[finger_idx].metacarpal.swing.y =
|
||||
LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_swing_y);
|
||||
|
||||
out.finger[finger_idx].metacarpal.twist =
|
||||
LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_twist);
|
||||
|
||||
|
||||
// Note that we are not unpacking the metacarpal swing/twist as it is constant.
|
||||
out.finger[finger_idx].proximal_swing.x =
|
||||
LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].pxm_swing_x);
|
||||
out.finger[finger_idx].proximal_swing.y =
|
||||
|
@ -224,13 +217,7 @@ OptimizerHandPackIntoVector(OptimizerHand<T> &in, bool use_hand_size, T *out)
|
|||
out[acc_idx++] = ModelToLM(in.thumb.rots[1], the_limit.thumb_curls[1]);
|
||||
|
||||
for (int finger_idx = 0; finger_idx < 4; finger_idx++) {
|
||||
out[acc_idx++] =
|
||||
ModelToLM(in.finger[finger_idx].metacarpal.swing.x, the_limit.fingers[finger_idx].mcp_swing_x);
|
||||
out[acc_idx++] =
|
||||
ModelToLM(in.finger[finger_idx].metacarpal.swing.y, the_limit.fingers[finger_idx].mcp_swing_y);
|
||||
out[acc_idx++] =
|
||||
ModelToLM(in.finger[finger_idx].metacarpal.twist, the_limit.fingers[finger_idx].mcp_twist);
|
||||
|
||||
// Note that we are not packing the metacarpal swing/twist as it is constant.
|
||||
out[acc_idx++] =
|
||||
ModelToLM(in.finger[finger_idx].proximal_swing.x, the_limit.fingers[finger_idx].pxm_swing_x);
|
||||
out[acc_idx++] =
|
||||
|
@ -252,18 +239,14 @@ template <typename T>
|
|||
void
|
||||
OptimizerHandInit(OptimizerHand<T> &opt, Quat<T> &pre_rotation)
|
||||
{
|
||||
opt.hand_size = (T)(0.095);
|
||||
opt.hand_size = (T)STANDARD_HAND_SIZE;
|
||||
|
||||
opt.wrist_post_orientation_aax.x = (T)(0);
|
||||
opt.wrist_post_orientation_aax.y = (T)(0);
|
||||
opt.wrist_post_orientation_aax.z = (T)(0);
|
||||
|
||||
// opt.store_wrist_pre_orientation_quat = pre_rotation;
|
||||
|
||||
opt.wrist_pre_orientation_quat.w = (T)pre_rotation.w;
|
||||
opt.wrist_pre_orientation_quat.x = (T)pre_rotation.x;
|
||||
opt.wrist_pre_orientation_quat.y = (T)pre_rotation.y;
|
||||
opt.wrist_pre_orientation_quat.z = (T)pre_rotation.z;
|
||||
opt.wrist_pre_orientation_quat = pre_rotation;
|
||||
|
||||
opt.wrist_location.x = (T)(0);
|
||||
opt.wrist_location.y = (T)(0);
|
||||
|
@ -275,23 +258,28 @@ OptimizerHandInit(OptimizerHand<T> &opt, Quat<T> &pre_rotation)
|
|||
opt.finger[i].metacarpal.swing.x = T(0);
|
||||
opt.finger[i].metacarpal.twist = T(0);
|
||||
|
||||
opt.finger[i].proximal_swing.x = rad<T>((T)(15));
|
||||
opt.finger[i].rots[0] = rad<T>((T)(-5));
|
||||
opt.finger[i].rots[1] = rad<T>((T)(-5));
|
||||
opt.finger[i].proximal_swing.x = T(rad<HandScalar>(15.0f));
|
||||
opt.finger[i].rots[0] = T(rad<HandScalar>(-5));
|
||||
opt.finger[i].rots[1] = T(rad<HandScalar>(-5));
|
||||
}
|
||||
|
||||
opt.thumb.metacarpal.swing.x = (T)(0);
|
||||
opt.thumb.metacarpal.swing.y = (T)(0);
|
||||
opt.thumb.metacarpal.twist = (T)(0);
|
||||
|
||||
opt.thumb.rots[0] = rad<T>((T)(-5));
|
||||
opt.thumb.rots[1] = rad<T>((T)(-59));
|
||||
opt.thumb.rots[0] = T(rad<HandScalar>(-5));
|
||||
opt.thumb.rots[1] = T(rad<HandScalar>(-59));
|
||||
|
||||
opt.finger[0].metacarpal.swing.y = (T)(-0.19);
|
||||
opt.finger[1].metacarpal.swing.y = (T)(0);
|
||||
opt.finger[2].metacarpal.swing.y = (T)(0.19);
|
||||
opt.finger[3].metacarpal.swing.y = (T)(0.38);
|
||||
|
||||
opt.finger[0].metacarpal.swing.x = (T)(-0.02);
|
||||
opt.finger[1].metacarpal.swing.x = (T)(0);
|
||||
opt.finger[2].metacarpal.swing.x = (T)(0.02);
|
||||
opt.finger[3].metacarpal.swing.x = (T)(0.04);
|
||||
|
||||
opt.finger[0].proximal_swing.y = (T)(-0.01);
|
||||
opt.finger[1].proximal_swing.y = (T)(0);
|
||||
opt.finger[2].proximal_swing.y = (T)(0.01);
|
||||
|
@ -312,11 +300,11 @@ OptimizerHandSquashRotations(OptimizerHand<T> &opt, Quat<T> &out_orientation)
|
|||
|
||||
Quat<T> &pre_rotation = opt.wrist_pre_orientation_quat;
|
||||
|
||||
Quat<T> post_rotation;
|
||||
Quat<T> post_rotation = {};
|
||||
|
||||
AngleAxisToQuaternion(opt.wrist_post_orientation_aax, post_rotation);
|
||||
|
||||
Quat<T> tmp_new_pre_rotation;
|
||||
Quat<T> tmp_new_pre_rotation = {};
|
||||
|
||||
QuaternionProduct(pre_rotation, post_rotation, tmp_new_pre_rotation);
|
||||
|
||||
|
|
|
@ -34,14 +34,6 @@ TEST_CASE("LevenbergMarquardt")
|
|||
|
||||
struct one_frame_input input = {};
|
||||
|
||||
for (int i = 0; i < 21; i++) {
|
||||
|
||||
input.views[0].rays[i] = m_vec3_normalize({0, (float)i, -1}); //{0,(float)i,-1};
|
||||
input.views[1].rays[i] = m_vec3_normalize({(float)i, 0, -1});
|
||||
input.views[0].confidences[i] = 1;
|
||||
input.views[1].confidences[i] = 1;
|
||||
}
|
||||
|
||||
lm::KinematicHandLM *hand;
|
||||
|
||||
xrt_pose left_in_right = XRT_POSE_IDENTITY;
|
||||
|
@ -53,7 +45,17 @@ TEST_CASE("LevenbergMarquardt")
|
|||
xrt_hand_joint_set out;
|
||||
float out_hand_size;
|
||||
float out_reprojection_error;
|
||||
lm::optimizer_run(hand, input, true, true, 0.09, 0.5, out, out_hand_size, out_reprojection_error);
|
||||
lm::optimizer_run(hand, //
|
||||
input, //
|
||||
true, //
|
||||
2.0f, //
|
||||
true, //
|
||||
0.09, //
|
||||
0.5, //
|
||||
0.5f, //
|
||||
out, //
|
||||
out_hand_size, //
|
||||
out_reprojection_error);
|
||||
|
||||
CHECK(std::isfinite(out_reprojection_error));
|
||||
CHECK(std::isfinite(out_hand_size));
|
||||
|
|
Loading…
Reference in a new issue