h/mercury: Update hand tracking with new half-artificial model

This commit is contained in:
Moses Turner 2023-01-31 00:58:19 -06:00
parent bfdeaa7d8f
commit e9f79c45bf
13 changed files with 2472 additions and 924 deletions

View file

@ -24,6 +24,25 @@ target_link_libraries(
ONNXRuntime::ONNXRuntime
)
add_library(t_ht_mercury_distorter STATIC hg_image_distorter.cpp)
target_link_libraries(t_ht_mercury_distorter PRIVATE aux_math aux_tracking aux_os aux_util)
target_include_directories(
t_ht_mercury_distorter SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR}
)
target_link_libraries(
t_ht_mercury_distorter
PRIVATE
aux_math
aux_tracking
aux_os
aux_util
${OpenCV_LIBRARIES}
ONNXRuntime::ONNXRuntime # no, wrong
)
add_library(t_ht_mercury STATIC hg_sync.cpp hg_sync.hpp hg_interface.h kine_common.hpp)
target_link_libraries(
@ -37,7 +56,7 @@ target_link_libraries(
ONNXRuntime::ONNXRuntime
t_ht_mercury_kine_lm
t_ht_mercury_model
# ncnn # Soon...
t_ht_mercury_distorter
${OpenCV_LIBRARIES}
)

View file

@ -0,0 +1,615 @@
// Copyright 2021-2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Utility to do batch stereographic projections of images.
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#include <cmath>
#include <opencv2/core.hpp>
#include <stdio.h>
#include <string>
#include <unistd.h>
#include "math/m_vec3.h"
#include "math/m_vec2.h"
#include "util/u_time.h"
#include "xrt/xrt_defines.h"
#include "math/m_space.h"
#include <filesystem>
#include <fstream>
#include "os/os_time.h"
#include "util/u_logging.h"
#include "tracking/t_tracking.h"
#include "tracking/t_calibration_opencv.hpp"
#include <iostream>
#include <opencv2/opencv.hpp>
//!@todo This is the wrong way of doing it; we should probably be allocating some space up front for each
//! keypoint estimator to scratch into. Patches welcome!
#define EIGEN_STACK_ALLOCATION_LIMIT 128 * 128 * 4 * 3
#include "math/m_eigen_interop.hpp"
#include "hg_sync.hpp"
namespace xrt::tracking::hand::mercury {
constexpr int wsize = 128;
template <typename T> using OutputSizedArray = Eigen::Array<T, wsize, wsize, Eigen::RowMajor>;
using OutputSizedFloatArray = OutputSizedArray<float>;
struct projection_state
{
cv::Mat &input;
Eigen::Map<OutputSizedArray<uint8_t>> distorted_image_eigen;
t_camera_model_params dist;
const projection_instructions &instructions;
projection_state(const projection_instructions &instructions, cv::Mat &input, cv::Mat &output)
: input(input), distorted_image_eigen(output.data, 128, 128), instructions(instructions){};
};
// A private, purpose-optimized version of the Kannalla-Brandt projection function.
static void
project_kb4(const t_camera_model_params &dist, //
const OutputSizedFloatArray &x, //
const OutputSizedFloatArray &y, //
const OutputSizedFloatArray &z, //
OutputSizedFloatArray &out_x, //
OutputSizedFloatArray &out_y)
{
const OutputSizedFloatArray r2 = x * x + y * y;
const OutputSizedFloatArray r = sqrt(r2);
#if 0
const I theta = atan2(r, z);
#else
// This works here but will not work in eg. a nonlinear optimizer, or for more general applications.
// Takes about 200us off the runtime.
// Basically:
// * We can be sure that z won't be negative only because previous hand tracking code checks this for
// us.
// * x,y,z is normalized so we don't have to worry about numerical stability.
// If neither of these were true we'd definitely need atan2.
//
// Grrr, we really need a good library for fast approximations of trigonometric functions.
const OutputSizedFloatArray theta = atan(r / z);
#endif
const OutputSizedFloatArray theta2 = theta * theta;
#if 0
I r_theta = dist.fisheye.k4 * theta2;
r_theta += dist.fisheye.k3;
r_theta *= theta2;
r_theta += dist.fisheye.k2;
r_theta *= theta2;
r_theta += dist.fisheye.k1;
r_theta *= theta2;
r_theta += 1;
r_theta *= theta;
#else
// This version gives the compiler more options to do FMAs and avoid temporaries. Down to floating point
// precision this should give the same result as the above.
OutputSizedFloatArray r_theta =
(((((dist.fisheye.k4 * theta2) + dist.fisheye.k3) * theta2 + dist.fisheye.k2) * theta2 + dist.fisheye.k1) *
theta2 +
1) *
theta;
#endif
const OutputSizedFloatArray mx = x * r_theta / r;
const OutputSizedFloatArray my = y * r_theta / r;
out_x = dist.fx * mx + dist.cx;
out_y = dist.fy * my + dist.cy;
}
Eigen::Vector3f
stereographic_to_direction(float sg_x, float sg_y)
{
float X = sg_x;
float Y = sg_y;
float denom = (1 + X * X + Y * Y);
float x = (2 * X) / denom;
float y = (2 * Y) / denom;
float z = (-1 + X * X + Y * Y) / denom;
// forward is -z
return {x, y, z};
// return {x / -z, y / -z};
}
template <typename T>
T
map_ranges(T value, T from_low, T from_high, T to_low, T to_high)
{
return (value - from_low) * (to_high - to_low) / (from_high - from_low) + to_low;
}
void
naive_remap(OutputSizedArray<int16_t> &image_x,
OutputSizedArray<int16_t> &image_y,
cv::Mat &input,
Eigen::Map<OutputSizedArray<uint8_t>> &output)
{
output = 0;
for (int y = 0; y < wsize; y++) {
for (int x = 0; x < wsize; x++) {
if (image_y(y, x) < 0) {
continue;
}
if (image_y(y, x) >= input.rows) {
continue;
}
if (image_x(y, x) < 0) {
continue;
}
if (image_x(y, x) >= input.cols) {
continue;
}
output(y, x) = input.at<uint8_t>(image_y(y, x), image_x(y, x));
}
}
}
void
StereographicDistort(projection_state &mi)
{
XRT_TRACE_MARKER();
OutputSizedFloatArray sg_x;
OutputSizedFloatArray sg_y;
// Please vectorize me?
if (mi.instructions.flip) {
for (int x = 0; x < wsize; ++x) {
sg_x.col(x).setConstant(map_ranges<float>((float)x, 0.0f, (float)wsize,
(float)mi.instructions.stereographic_radius,
(float)-mi.instructions.stereographic_radius));
}
} else {
for (int x = 0; x < wsize; ++x) {
sg_x.col(x).setConstant(map_ranges<float>((float)x, 0.0f, (float)wsize,
(float)-mi.instructions.stereographic_radius,
(float)mi.instructions.stereographic_radius));
}
}
// Ditto?
for (int y = 0; y < wsize; ++y) {
sg_y.row(y).setConstant(map_ranges<float>((float)y, 0.0f, (float)wsize,
(float)mi.instructions.stereographic_radius,
(float)-mi.instructions.stereographic_radius));
}
// STEREOGRAPHIC DIRECTION TO 3D DIRECTION
// Note: we do not normalize the direction, because we don't need to. :)
OutputSizedFloatArray dir_x;
OutputSizedFloatArray dir_y;
OutputSizedFloatArray dir_z;
#if 0
dir_x = sg_x * 2;
dir_y = sg_y * 2;
#else
// Adding something to itself is faster than multiplying itself by 2
// and unless you have fast-math the compiler won't do it for you. =/
dir_x = sg_x + sg_x;
dir_y = sg_y + sg_y;
#endif
dir_z = (sg_x * sg_x) + (sg_y * sg_y) - 1;
// END STEREOGRAPHIC DIRECTION TO 3D DIRECTION
// QUATERNION ROTATING VECTOR
Eigen::Matrix<OutputSizedFloatArray, 3, 1> rot_dir;
OutputSizedFloatArray uv0;
OutputSizedFloatArray uv1;
OutputSizedFloatArray uv2;
const Eigen::Quaternionf &q = mi.instructions.rot_quat;
uv0 = q.y() * dir_z - q.z() * dir_y;
uv1 = q.z() * dir_x - q.x() * dir_z;
uv2 = q.x() * dir_y - q.y() * dir_x;
uv0 += uv0;
uv1 += uv1;
uv2 += uv2;
rot_dir.x() = dir_x + q.w() * uv0;
rot_dir.y() = dir_y + q.w() * uv1;
rot_dir.z() = dir_z + q.w() * uv2;
rot_dir.x() += q.y() * uv2 - q.z() * uv1;
rot_dir.y() += q.z() * uv0 - q.x() * uv2;
rot_dir.z() += q.x() * uv1 - q.y() * uv0;
// END QUATERNION ROTATING VECTOR
OutputSizedArray<int16_t> image_x;
OutputSizedArray<int16_t> image_y;
OutputSizedFloatArray image_x_f;
OutputSizedFloatArray image_y_f;
//!@todo optimize
rot_dir.y() *= -1;
rot_dir.z() *= -1;
{
XRT_TRACE_IDENT(camera_projection);
switch (mi.dist.model) {
case T_DISTORTION_FISHEYE_KB4:
// This takes 250us vs 500 because of the removed atan2.
project_kb4(mi.dist, rot_dir.x(), rot_dir.y(), rot_dir.z(), image_x_f, image_y_f);
break;
case T_DISTORTION_OPENCV_RADTAN_8:
// Regular C is plenty fast for radtan :)
for (int i = 0; i < image_x_f.rows(); i++) {
for (int j = 0; j < image_x_f.cols(); j++) {
t_camera_models_project(&mi.dist, rot_dir.x()(i, j), rot_dir.y()(i, j),
rot_dir.z()(i, j), &image_x_f(i, j), &image_y_f(i, j));
}
}
break;
default: assert(false);
}
}
image_x = image_x_f.cast<int16_t>();
image_y = image_y_f.cast<int16_t>();
naive_remap(image_x, image_y, mi.input, mi.distorted_image_eigen);
}
bool
slow(projection_state &mi, float x, float y, cv::Point2i &out)
{
float sg_x =
map_ranges<float>(x, 0, wsize, -mi.instructions.stereographic_radius, mi.instructions.stereographic_radius);
float sg_y =
map_ranges<float>(y, 0, wsize, mi.instructions.stereographic_radius, -mi.instructions.stereographic_radius);
Eigen::Vector3f dir = stereographic_to_direction(sg_x, sg_y);
dir = mi.instructions.rot_quat * dir;
dir.y() *= -1;
dir.z() *= -1;
float _x = {};
float _y = {};
bool ret = t_camera_models_project(&mi.dist, dir.x(), dir.y(), dir.z(), &_x, &_y);
out.x = _x;
out.y = _y;
return ret;
}
void
draw_and_clear(cv::Mat img, std::vector<cv::Point> &line_vec, bool good, cv::Scalar color)
{
if (!good) {
color[0] = 255 - color[0];
color[1] = 255 - color[1];
color[2] = 255 - color[2];
}
cv::polylines(img, line_vec, false, color);
line_vec.clear();
}
void
add_or_draw_line(projection_state &mi, //
int x, //
int y, //
std::vector<cv::Point> &line_vec, //
cv::Scalar color, //
bool &good_most_recent, //
bool &started,
cv::Mat &img)
{
cv::Point2i e = {};
bool retval = slow(mi, x, y, e);
if (!started) {
started = true;
good_most_recent = retval;
line_vec.push_back(e);
return;
}
if (retval != good_most_recent) {
line_vec.push_back(e);
draw_and_clear(img, line_vec, good_most_recent, color);
}
good_most_recent = retval;
line_vec.push_back(e);
}
void
draw_boundary(projection_state &mi, cv::Scalar color, cv::Mat img)
{
std::vector<cv::Point> line_vec = {};
bool good_most_recent = true;
bool started = false;
constexpr float step = 16;
// x = 0, y = 0->128
for (int y = 0; y <= wsize; y += step) {
int x = 0;
add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img);
}
// x = 0->128, y = 128
for (int x = step; x <= wsize; x += step) {
int y = wsize;
add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img);
}
// x = 128, y = 128->0
for (int y = wsize - step; y >= 0; y -= step) {
int x = wsize;
add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img);
}
// x = 128->0, y = 0
for (int x = wsize - step; x >= 0; x -= step) {
int y = 0;
add_or_draw_line(mi, x, y, line_vec, color, good_most_recent, started, img);
}
draw_and_clear(img, line_vec, good_most_recent, color);
}
void
project_21_points_unscaled(Eigen::Array<float, 3, 21> &joints_local, Eigen::Quaternionf rot_quat, hand21_2d &out_joints)
{
for (int i = 0; i < 21; i++) {
Eigen::Vector3f direction = joints_local.col(i); //{d.x, d.y, d.z};
direction.normalize();
direction = rot_quat.conjugate() * direction;
float denom = 1 - direction.z();
float sg_x = direction.x() / denom;
float sg_y = direction.y() / denom;
// sg_x *= mi.stereographic_radius;
// sg_y *= mi.stereographic_radius;
out_joints[i].pos_2d.x = sg_x;
out_joints[i].pos_2d.y = sg_y;
}
}
template <typename V2>
void
project_point_scaled(projection_state &mi, Eigen::Vector3f direction, V2 &out_img_pt)
{
direction = mi.instructions.rot_quat.conjugate() * direction;
float denom = 1 - direction.z();
float sg_x = direction.x() / denom;
float sg_y = direction.y() / denom;
out_img_pt.pos_2d.x = map_ranges<float>(sg_x, -mi.instructions.stereographic_radius,
mi.instructions.stereographic_radius, 0, wsize);
out_img_pt.pos_2d.y = map_ranges<float>(sg_y, mi.instructions.stereographic_radius,
-mi.instructions.stereographic_radius, 0, wsize);
}
void
project_21_points_scaled(projection_state &mi, Eigen::Array<float, 3, 21> &joints_local, hand21_2d &out_joints_in_img)
{
for (int i = 0; i < 21; i++) {
project_point_scaled(mi, Eigen::Vector3f(joints_local.col(i)), out_joints_in_img[i]);
}
}
Eigen::Quaternionf
direction(Eigen::Vector3f dir, float twist)
{
Eigen::Quaternionf one = Eigen::Quaternionf().setFromTwoVectors(-Eigen::Vector3f::UnitZ(), dir);
Eigen::Quaternionf two;
two = Eigen::AngleAxisf(twist, -Eigen::Vector3f::UnitZ());
return one * two;
}
void
add_rel_depth(const Eigen::Array<float, 3, 21> &joints, hand21_2d &out_joints_in_img)
{
float hand_size = (Eigen::Vector3f(joints.col(0)) - Eigen::Vector3f(joints.col(9))).norm();
float midpxm_depth = Eigen::Vector3f(joints.col(9)).norm();
for (int i = 0; i < 21; i++) {
float jd = Eigen::Vector3f(joints.col(i)).norm();
out_joints_in_img[i].depth_relative_to_midpxm = (jd - midpxm_depth) / hand_size;
}
}
static float
palm_length(hand21_2d &joints)
{
vec2_5 wrist = joints[0];
vec2_5 middle_proximal = joints[9];
vec2_5 index_proximal = joints[5];
vec2_5 ring_proximal = joints[17];
float fwd = m_vec2_len(wrist.pos_2d - middle_proximal.pos_2d);
float side = m_vec2_len(index_proximal.pos_2d - ring_proximal.pos_2d);
float length = fmaxf(fwd, side);
return length;
}
void
make_projection_instructions(t_camera_model_params &dist,
bool flip_after,
float expand_val,
float twist,
Eigen::Array<float, 3, 21> &joints,
projection_instructions &out_instructions,
hand21_2d &out_hand)
{
out_instructions.flip = flip_after;
Eigen::Vector3f dir = Eigen::Vector3f(joints.col(9)).normalized();
out_instructions.rot_quat = direction(dir, twist);
Eigen::Vector3f old_direction = dir;
// Tested on Dec 7: This converges in 4 iterations max, usually 2.
for (int i = 0; i < 8; i++) {
project_21_points_unscaled(joints, out_instructions.rot_quat, out_hand);
xrt_vec2 min = out_hand[0].pos_2d;
xrt_vec2 max = out_hand[0].pos_2d;
for (int i = 0; i < 21; i++) {
xrt_vec2 pt = out_hand[i].pos_2d;
min.x = fmin(pt.x, min.x);
min.y = fmin(pt.y, min.y);
max.x = fmax(pt.x, max.x);
max.y = fmax(pt.y, max.y);
}
xrt_vec2 center = m_vec2_mul_scalar(min + max, 0.5);
float r = fmax(center.x - min.x, center.y - min.y);
out_instructions.stereographic_radius = r;
Eigen::Vector3f new_direction = stereographic_to_direction(center.x, center.y);
new_direction = out_instructions.rot_quat * new_direction;
out_instructions.rot_quat = direction(new_direction, twist);
if ((old_direction - dir).norm() < 0.0001) {
// We converged
break;
}
old_direction = dir;
}
// This can basically be removed (we will have converged very well in the above), but for correctness's
// sake, let's project one last time.
project_21_points_unscaled(joints, out_instructions.rot_quat, out_hand);
// These are to ensure that the bounding box doesn't get too small around a closed fist.
float palm_l = palm_length(out_hand);
float radius_around_palm = palm_l * 0.5 * (2.2 / 1.65) * expand_val;
out_instructions.stereographic_radius *= expand_val;
out_instructions.stereographic_radius = fmaxf(out_instructions.stereographic_radius, radius_around_palm);
// This is going straight into the (-1, 1)-normalized space
for (int i = 0; i < 21; i++) {
vec2_5 &v = out_hand[i];
v.pos_2d.x = map_ranges<float>(v.pos_2d.x, -out_instructions.stereographic_radius,
out_instructions.stereographic_radius, -1, 1);
//!@todo optimize
if (flip_after) {
v.pos_2d.x *= -1;
}
//!@todo this is probably wrong, should probably be negated
v.pos_2d.y = map_ranges<float>(v.pos_2d.y, out_instructions.stereographic_radius,
-out_instructions.stereographic_radius, -1, 1);
}
add_rel_depth(joints, out_hand);
}
void
make_projection_instructions_angular(xrt_vec3 direction_3d,
bool flip_after,
float angular_radius,
float expand_val,
float twist,
projection_instructions &out_instructions)
{
out_instructions.flip = flip_after;
xrt_vec3 imaginary_direction = {0, sinf(angular_radius), -cosf(angular_radius)};
out_instructions.stereographic_radius = imaginary_direction.y / (1 - imaginary_direction.z);
math_vec3_normalize(&direction_3d);
Eigen::Vector3f dir = xrt::auxiliary::math::map_vec3(direction_3d);
out_instructions.rot_quat = direction(dir, twist);
out_instructions.stereographic_radius *= expand_val;
}
void
stereographic_project_image(const t_camera_model_params &dist,
const projection_instructions &instructions,
cv::Mat &input_image,
cv::Mat *debug_image,
const cv::Scalar boundary_color,
cv::Mat &out)
{
out = cv::Mat(cv::Size(wsize, wsize), CV_8U);
projection_state mi(instructions, input_image, out);
mi.dist = dist;
// Why am I doing it like this????
// mi.stereographic_radius = instructions.stereographic_radius;
// mi.rot_quat = instructions.rot_quat;
// mi.flip = instructions.flip;
StereographicDistort(mi);
if (debug_image) {
draw_boundary(mi, boundary_color, *debug_image);
}
}
} // namespace xrt::tracking::hand::mercury

View file

@ -72,38 +72,18 @@ hsv2rgb(float fH, float fS, float fV)
return {fR * 255.0f, fG * 255.0f, fB * 255.0f};
}
//! @todo Make it take an array of vec2's and give out an array of vec2's, then
// put it in its own target so we don't have to link to OpenCV.
// Oooorrrrr... actually add good undistortion stuff to Monado so that we don't have to depend on OpenCV at all.
XRT_MAYBE_UNUSED static struct xrt_vec2
raycoord(ht_view *htv, struct xrt_vec2 model_out)
{
model_out.x *= htv->hgt->multiply_px_coord_for_undistort;
model_out.y *= htv->hgt->multiply_px_coord_for_undistort;
cv::Mat in_px_coords(1, 1, CV_32FC2);
float *write_in;
write_in = in_px_coords.ptr<float>(0);
write_in[0] = model_out.x;
write_in[1] = model_out.y;
cv::Mat out_ray(1, 1, CV_32FC2);
if (htv->hgt->use_fisheye) {
cv::fisheye::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
} else {
cv::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
}
float n_x = out_ray.at<float>(0, 0);
float n_y = out_ray.at<float>(0, 1);
return {n_x, n_y};
}
static void
inline void
handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type)
{
cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type);
}
inline void
handSquare(cv::Mat &debug_frame, xrt_vec2 center, float radius, cv::Scalar color)
{
cv::Point2i pt((int)center.x, (int)center.y);
cv::rectangle(debug_frame, cv::Rect(pt - cv::Point2i(radius / 2, radius / 2), cv::Size(radius, radius)), color,
1);
}
} // namespace xrt::tracking::hand::mercury

View file

@ -11,6 +11,7 @@
// https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/fns_candy_style_transfer/fns_candy_style_transfer.c
#include "hg_sync.hpp"
#include "hg_image_math.inl"
#include "hg_numerics_checker.hpp"
#include <filesystem>
@ -29,7 +30,6 @@ namespace xrt::tracking::hand::mercury {
} \
} while (0)
static cv::Matx23f
blackbar(const cv::Mat &in, enum t_camera_orientation rot, cv::Mat &out, xrt_size out_size)
{
@ -134,9 +134,71 @@ argmax(const float *data, int size)
return out_idx;
}
static void
refine_center_of_distribution(
const float *data, int coarse_x, int coarse_y, int w, int h, float *out_refined_x, float *out_refined_y)
static bool
hand_depth_center_of_mass(struct HandTracking *hgt, float data[22], float *out_depth, float *out_confidence)
{
float avg_location_px_coord = 0;
float sum = 0;
for (int i = 0; i < 22; i++) {
data[i] = fmin(1.0, fmax(data[i], 0.0));
sum += data[i];
avg_location_px_coord += data[i] * (float)i;
}
if (sum < 1e-5) {
HG_DEBUG(hgt, "All depth outputs were zero!");
return false;
}
avg_location_px_coord /= sum;
// std::cout << avg_location_px_coord << std::endl;
// bounds check
if (avg_location_px_coord < 0 || avg_location_px_coord > 21) {
HG_DEBUG(hgt, "Very bad! avg_location_px_coord was %f", avg_location_px_coord);
for (int i = 0; i < 22; i++) {
HG_DEBUG(hgt, "%f", data[i]);
}
avg_location_px_coord = fmin(21.0, fmax(0.0, avg_location_px_coord));
return false;
}
// nan check
if (avg_location_px_coord != avg_location_px_coord) {
HG_DEBUG(hgt, "Very bad! avg_location_px_coord was not a number: %f", avg_location_px_coord);
for (int i = 0; i < 22; i++) {
HG_DEBUG(hgt, "%f", data[i]);
}
*out_depth = 0;
*out_confidence = 0;
return false;
}
// printf("%f %d\n", avg_location_px_coord, (int)avg_location_px_coord);
*out_confidence = data[(int)avg_location_px_coord];
float depth_value = avg_location_px_coord + 0.5;
depth_value /= 22;
depth_value -= 0.5;
depth_value *= 2 * 1.5;
*out_depth = depth_value;
return true;
}
static bool
refine_center_of_distribution(struct HandTracking *hgt, //
const float *data, //
int coarse_x, //
int coarse_y, //
int w, //
int h, //
float *out_refined_x, //
float *out_refined_y)
{
// Be VERY suspicious of this function, it's probably not centering correctly.
float sum_of_values = 0;
@ -170,18 +232,16 @@ refine_center_of_distribution(
// Edge case, will fix soon
*out_refined_x = coarse_x;
*out_refined_y = coarse_y;
U_LOG_E("Failed! %d %d %d %d %d", coarse_x, coarse_y, w, h, max_kern_width);
return;
HG_DEBUG(hgt, "Failed! %d %d %d %d %d", coarse_x, coarse_y, w, h, max_kern_width);
return false;
} else {
*out_refined_x = sum_of_values_times_locations_x / sum_of_values;
*out_refined_y = sum_of_values_times_locations_y / sum_of_values;
return true;
}
*out_refined_x = sum_of_values_times_locations_x / sum_of_values;
*out_refined_y = sum_of_values_times_locations_y / sum_of_values;
return;
}
static void
static bool
normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out)
{
data_in.convertTo(data_out, CV_32FC1, 1 / 255.0);
@ -192,7 +252,7 @@ normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out)
if (stddev.at<double>(0, 0) == 0) {
U_LOG_W("Got image with zero standard deviation!");
return;
return false;
}
data_out *= 0.25 / stddev.at<double>(0, 0);
@ -201,6 +261,7 @@ normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out)
//! @todo optimize
cv::meanStdDev(data_out, mean, stddev);
data_out += (0.5 - mean.at<double>(0, 0));
return true;
}
void
@ -228,18 +289,19 @@ setup_model_image_input(HandTracking *hgt, onnx_wrap *wrap, const char *name, in
{
model_input_wrap inputimg = {};
inputimg.name = name;
inputimg.dimensions.push_back(1);
inputimg.dimensions.push_back(1);
inputimg.dimensions.push_back(h);
inputimg.dimensions.push_back(w);
inputimg.dimensions[0] = 1;
inputimg.dimensions[1] = 1;
inputimg.dimensions[2] = h;
inputimg.dimensions[3] = w;
inputimg.num_dimensions = 4;
size_t data_size = w * h * sizeof(float);
inputimg.data = (float *)malloc(data_size);
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
inputimg.data, //
data_size, //
inputimg.dimensions.data(), //
inputimg.dimensions.size(), //
inputimg.dimensions, //
inputimg.num_dimensions, //
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
&inputimg.tensor));
@ -320,12 +382,12 @@ run_hand_detection(void *ptr)
for (int hand_idx = 0; hand_idx < 2; hand_idx++) {
hand_bounding_box *output = info->outputs[hand_idx];
hand_region_of_interest &output = info->outputs[hand_idx];
output->found = hand_exists[hand_idx] > 0.3;
output.found = hand_exists[hand_idx] > 0.3;
if (output->found) {
output->confidence = hand_exists[hand_idx];
if (output.found) {
output.hand_detection_confidence = hand_exists[hand_idx];
xrt_vec2 _pt = {};
_pt.x = math_map_ranges(cx[hand_idx], -1, 1, 0, kDetectionInputSize);
@ -344,14 +406,11 @@ run_hand_detection(void *ptr)
_pt = transformVecBy2x3(_pt, go_back);
output->center = _pt;
output->size_px = size;
output.center_px = _pt;
output.size_px = size;
if (hgt->debug_scribble) {
cv::Point2i pt((int)output->center.x, (int)output->center.y);
cv::rectangle(debug_frame,
cv::Rect(pt - cv::Point2i(size / 2, size / 2), cv::Size(size, size)),
PINK, 1);
handSquare(debug_frame, output.center_px, output.size_px, PINK);
}
}
@ -377,81 +436,151 @@ init_keypoint_estimation(HandTracking *hgt, onnx_wrap *wrap)
std::filesystem::path path = hgt->models_folder;
path /= "grayscale_keypoint_new.onnx";
path /= "grayscale_keypoint_jan18.onnx";
wrap->wraps.clear();
setup_ort_api(hgt, wrap, path);
setup_model_image_input(hgt, wrap, "inputImg", kKeypointInputSize, kKeypointInputSize);
}
wrap->api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
void
calc_src_tri(cv::Point2f center,
cv::Point2f go_right,
cv::Point2f go_down,
enum t_camera_orientation rot,
cv::Point2f out_src_tri[3])
{
cv::Point2f top_left = {center - go_down - go_right};
cv::Point2f bottom_left = {center + go_down - go_right};
cv::Point2f bottom_right = {center + go_down + go_right};
cv::Point2f top_right = {center - go_down + go_right};
switch (rot) {
case CAMERA_ORIENTATION_0: {
OrtSessionOptions *opts = nullptr;
ORT(CreateSessionOptions(&opts));
// top left
out_src_tri[0] = top_left; // {center - go_down - go_right};
// TODO review options, config for threads?
ORT(SetSessionGraphOptimizationLevel(opts, ORT_ENABLE_ALL));
ORT(SetIntraOpNumThreads(opts, 1));
// bottom left
out_src_tri[1] = bottom_left; //{center + go_down - go_right};
// top right
out_src_tri[2] = top_right; //{center - go_down + go_right};
} break;
case CAMERA_ORIENTATION_90: {
// Need to rotate the view back by -90°
// top left (becomes top right)
out_src_tri[0] = top_right;
ORT(CreateEnv(ORT_LOGGING_LEVEL_FATAL, "monado_ht", &wrap->env));
// bottom left (becomes top left)
out_src_tri[1] = top_left;
ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &wrap->meminfo));
// top right (becomes bottom right)
out_src_tri[2] = bottom_right;
} break;
// HG_DEBUG(this->device, "Loading hand detection model from file '%s'", path.c_str());
ORT(CreateSession(wrap->env, path.c_str(), opts, &wrap->session));
assert(wrap->session != NULL);
case CAMERA_ORIENTATION_180: {
// top left (becomes bottom right)
out_src_tri[0] = bottom_right;
// size_t input_size = wrap->input_shape[0] * wrap->input_shape[1] * wrap->input_shape[2] *
// wrap->input_shape[3];
// bottom left (becomes top right)
out_src_tri[1] = top_right;
// wrap->data = (float *)malloc(input_size * sizeof(float));
{
model_input_wrap inputimg = {};
inputimg.name = "inputImg";
inputimg.dimensions[0] = 1;
inputimg.dimensions[1] = 1;
inputimg.dimensions[2] = 128;
inputimg.dimensions[3] = 128;
inputimg.num_dimensions = 4;
// top right (becomes bottom left)
out_src_tri[2] = bottom_left;
} break;
case CAMERA_ORIENTATION_270: {
// Need to rotate the view clockwise 90°
// top left (becomes bottom left)
out_src_tri[0] = bottom_left; //{center + go_down - go_right};
inputimg.data = (float *)malloc(128 * 128 * sizeof(float)); // SORRY IM BUSY
// bottom left (becomes bottom right)
out_src_tri[1] = bottom_right; //{center + go_down + go_right};
// top right (becomes top left)
out_src_tri[2] = top_left; //{center - go_down - go_right};
} break;
default: assert(false);
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
inputimg.data, //
128 * 128 * sizeof(float), //
inputimg.dimensions, //
inputimg.num_dimensions, //
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
&inputimg.tensor));
assert(inputimg.tensor);
int is_tensor;
ORT(IsTensor(inputimg.tensor, &is_tensor));
assert(is_tensor);
wrap->wraps.push_back(inputimg);
}
{
model_input_wrap inputimg = {};
inputimg.name = "lastKeypoints";
inputimg.dimensions[0] = 1;
inputimg.dimensions[1] = 42;
inputimg.num_dimensions = 2;
inputimg.data = (float *)malloc(1 * 42 * sizeof(float)); // SORRY IM BUSY
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
inputimg.data, //
1 * 42 * sizeof(float), //
inputimg.dimensions, //
inputimg.num_dimensions, //
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
&inputimg.tensor));
assert(inputimg.tensor);
int is_tensor;
ORT(IsTensor(inputimg.tensor, &is_tensor));
assert(is_tensor);
wrap->wraps.push_back(inputimg);
}
{
model_input_wrap inputimg = {};
inputimg.name = "useLastKeypoints";
inputimg.dimensions[0] = 1;
inputimg.num_dimensions = 1;
inputimg.data = (float *)malloc(1 * sizeof(float)); // SORRY IM BUSY
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
inputimg.data, //
1 * sizeof(float), //
inputimg.dimensions, //
inputimg.num_dimensions, //
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
&inputimg.tensor));
assert(inputimg.tensor);
int is_tensor;
ORT(IsTensor(inputimg.tensor, &is_tensor));
assert(is_tensor);
wrap->wraps.push_back(inputimg);
}
wrap->api->ReleaseSessionOptions(opts);
}
enum xrt_hand_joint joints_ml_to_xr[21]{
XRT_HAND_JOINT_WRIST, //
XRT_HAND_JOINT_THUMB_METACARPAL, //
XRT_HAND_JOINT_THUMB_PROXIMAL, //
XRT_HAND_JOINT_THUMB_DISTAL, //
XRT_HAND_JOINT_THUMB_TIP, //
//
XRT_HAND_JOINT_INDEX_PROXIMAL, //
XRT_HAND_JOINT_INDEX_INTERMEDIATE, //
XRT_HAND_JOINT_INDEX_DISTAL, //
XRT_HAND_JOINT_INDEX_TIP, //
//
XRT_HAND_JOINT_MIDDLE_PROXIMAL, //
XRT_HAND_JOINT_MIDDLE_INTERMEDIATE, //
XRT_HAND_JOINT_MIDDLE_DISTAL, //
XRT_HAND_JOINT_MIDDLE_TIP, //
//
XRT_HAND_JOINT_RING_PROXIMAL, //
XRT_HAND_JOINT_RING_INTERMEDIATE, //
XRT_HAND_JOINT_RING_DISTAL, //
XRT_HAND_JOINT_RING_TIP, //
//
XRT_HAND_JOINT_LITTLE_PROXIMAL, //
XRT_HAND_JOINT_LITTLE_INTERMEDIATE, //
XRT_HAND_JOINT_LITTLE_DISTAL, //
XRT_HAND_JOINT_LITTLE_TIP, //
};
void
make_keypoint_heatmap_output(int camera_idx, int hand_idx, int grid_pt_x, int grid_pt_y, float *plane, cv::Mat &out)
{
int root_x = kVisSpacerSize + ((1 + 2 * hand_idx) * (kKeypointInputSize + kVisSpacerSize));
int root_y = kVisSpacerSize + (camera_idx * (kKeypointInputSize + kVisSpacerSize));
int root_x = 8 + ((1 + 2 * hand_idx) * (128 + 8));
int root_y = 8 + ((2 * camera_idx) * (128 + 8));
int org_x = (root_x) + (grid_pt_x * 25);
int org_y = (root_y) + (grid_pt_y * 25);
@ -464,168 +593,383 @@ make_keypoint_heatmap_output(int camera_idx, int hand_idx, int grid_pt_x, int gr
start.copyTo(out(p));
}
void
make_keypoint_depth_heatmap_output(int camera_idx, //
int hand_idx, //
int grid_pt_x, //
int grid_pt_y,
float *plane, //
cv::Mat &out)
{
int root_x = 8 + ((1 + 2 * hand_idx) * (128 + 8));
int root_y = 8 + ((1 + 2 * camera_idx) * (128 + 8));
int org_x = (root_x) + (grid_pt_x * 25);
int org_y = (root_y) + (grid_pt_y * 25);
cv::Rect p = cv::Rect(org_x, org_y, 22, 22);
cv::Mat start(cv::Size(22, 22), CV_32FC1);
float *ptr = (float *)start.data; // Cope cope cope cope cope
for (int i = 0; i < 22; i++) {
for (int j = 0; j < 22; j++) {
ptr[(i * 22) + j] = plane[i];
}
}
start *= 255.0;
start.copyTo(out(p));
}
void
set_predicted_zero(float *data)
{
for (int i = 0; i < 42; i++) {
data[i] = 0.0f;
}
}
void
run_keypoint_estimation(void *ptr)
{
XRT_TRACE_MARKER();
keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr;
keypoint_estimation_run_info info = *(keypoint_estimation_run_info *)ptr;
onnx_wrap *wrap = &info->view->keypoint[info->hand_idx];
struct HandTracking *hgt = info->view->hgt;
onnx_wrap *wrap = &info.view->keypoint[info.hand_idx];
struct HandTracking *hgt = info.view->hgt;
int view_idx = info.view->view;
int hand_idx = info.hand_idx;
one_frame_one_view &this_output = hgt->keypoint_outputs[hand_idx].views[view_idx];
MLOutput2D &px_coord = this_output.keypoints_in_scaled_stereographic;
// Factor out starting here
cv::Mat &debug = info->view->debug_out_to_this;
hand_region_of_interest &output = info.view->regions_of_interest_this_frame[hand_idx];
hand_bounding_box *output = &info->view->bboxes_this_frame[info->hand_idx];
cv::Mat data_128x128_uint8;
cv::Point2f src_tri[3];
cv::Point2f dst_tri[3];
// top-left
cv::Point2f center = {output->center.x, output->center.y};
projection_instructions instr(info.view->hgdist);
instr.rot_quat = Eigen::Quaternionf::Identity();
instr.stereographic_radius = 0.4;
cv::Point2f go_right = {output->size_px / 2, 0};
cv::Point2f go_down = {0, output->size_px / 2};
calc_src_tri(center, go_right, go_down, info->view->camera_info.camera_orientation, src_tri);
/* For the right hand, flip the result horizontally since
* the model is trained on left hands.
* Top left, bottom left, top right */
if (info->hand_idx == 1) {
dst_tri[0] = {kKeypointInputSize, 0};
dst_tri[1] = {kKeypointInputSize, kKeypointInputSize};
dst_tri[2] = {0, 0};
t_camera_model_params dist = info.view->hgdist;
float twist = 0;
if (output.provenance == ROIProvenance::HAND_DETECTION) {
xrt_vec3 center;
xrt_vec3 edges[4];
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, output.center_px.x, output.center_px.y,
&center.x, &center.y, &center.z);
xrt_vec2 r = {output.size_px / 2, 0};
xrt_vec2 d = {0, output.size_px / 2};
xrt_vec2 v;
// note! we do not need to rotate this! it's *already* in camera space.
int acc_idx = 0;
v = output.center_px + r + d;
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x,
&edges[acc_idx].y, &edges[acc_idx].z);
acc_idx++;
v = output.center_px - r + d;
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x,
&edges[acc_idx].y, &edges[acc_idx].z);
acc_idx++;
v = output.center_px - r - d;
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x,
&edges[acc_idx].y, &edges[acc_idx].z);
acc_idx++;
v = output.center_px + r - d;
t_camera_models_unproject_and_flip(&hgt->views[view_idx].hgdist, v.x, v.y, &edges[acc_idx].x,
&edges[acc_idx].y, &edges[acc_idx].z);
float angle = 0;
for (int i = 0; i < 4; i++) {
angle = fmaxf(angle, m_vec3_angle(center, edges[i]));
}
make_projection_instructions_angular(center, hand_idx, angle,
hgt->tuneable_values.after_detection_fac.val, twist, instr);
wrap->wraps[2].data[0] = 0.0f;
set_predicted_zero(wrap->wraps[1].data);
} else {
dst_tri[0] = {0, 0};
dst_tri[1] = {0, kKeypointInputSize};
dst_tri[2] = {kKeypointInputSize, 0};
Eigen::Array<float, 3, 21> keypoints_in_camera;
if (view_idx == 0) {
keypoints_in_camera = hgt->pose_predicted_keypoints[hand_idx];
} else {
for (int i = 0; i < 21; i++) {
Eigen::Quaternionf ori = map_quat(hgt->left_in_right.orientation);
Eigen::Vector3f tmp = hgt->pose_predicted_keypoints[hand_idx].col(i);
tmp = ori * tmp;
tmp += map_vec3(hgt->left_in_right.position);
keypoints_in_camera.col(i) = tmp;
}
}
hand21_2d bleh;
make_projection_instructions(dist, hand_idx, hgt->tuneable_values.dyn_radii_fac.val, twist,
keypoints_in_camera, instr, bleh);
if (hgt->tuneable_values.enable_pose_predicted_input) {
for (int ml_joint_idx = 0; ml_joint_idx < 21; ml_joint_idx++) {
float *data = wrap->wraps[1].data;
data[(ml_joint_idx * 2) + 0] = bleh[ml_joint_idx].pos_2d.x;
data[(ml_joint_idx * 2) + 1] = bleh[ml_joint_idx].pos_2d.y;
// data[(ml_joint_idx * 2) + 2] = bleh[ml_joint_idx].depth_relative_to_midpxm;
}
wrap->wraps[2].data[0] = 1.0f;
} else {
wrap->wraps[2].data[0] = 0.0f;
set_predicted_zero(wrap->wraps[1].data);
}
}
cv::Matx23f go_there = getAffineTransform(src_tri, dst_tri);
cv::Matx23f go_back = getAffineTransform(dst_tri, src_tri); // NOLINT
stereographic_project_image(dist, instr, hgt->views[view_idx].run_model_on_this,
&hgt->views[view_idx].debug_out_to_this, info.hand_idx ? RED : YELLOW,
data_128x128_uint8);
cv::Mat cropped_image_uint8;
xrt::auxiliary::math::map_quat(this_output.look_dir) = instr.rot_quat;
this_output.stereographic_radius = instr.stereographic_radius;
bool is_hand = true;
{
XRT_TRACE_IDENT(transforms);
XRT_TRACE_IDENT(convert_format);
cv::warpAffine(info->view->run_model_on_this, cropped_image_uint8, go_there,
cv::Size(kKeypointInputSize, kKeypointInputSize), cv::INTER_LINEAR);
// here!
cv::Mat data_128x128_float(cv::Size(128, 128), CV_32FC1, wrap->wraps[0].data, 128 * sizeof(float));
cv::Mat cropped_image_float_wrapper(cv::Size(kKeypointInputSize, kKeypointInputSize), //
CV_32FC1, //
wrap->wraps[0].data, //
kKeypointInputSize * sizeof(float));
normalizeGrayscaleImage(cropped_image_uint8, cropped_image_float_wrapper);
is_hand = is_hand && normalizeGrayscaleImage(data_128x128_uint8, data_128x128_float);
}
// Ending here
const OrtValue *inputs[] = {wrap->wraps[0].tensor};
const char *input_names[] = {wrap->wraps[0].name};
OrtValue *output_tensors[] = {nullptr};
const OrtValue *inputs[] = {wrap->wraps[0].tensor, wrap->wraps[1].tensor, wrap->wraps[2].tensor};
const char *input_names[] = {wrap->wraps[0].name, wrap->wraps[1].name, wrap->wraps[2].name};
const char *output_names[] = {"heatmap"};
// OrtValue *output_tensor = nullptr;
OrtValue *output_tensors[] = {nullptr, nullptr, nullptr, nullptr};
const char *output_names[] = {"heatmap_xy", "heatmap_depth", "scalar_extras", "curls"};
{
XRT_TRACE_IDENT(model);
static_assert(ARRAY_SIZE(input_names) == ARRAY_SIZE(inputs));
static_assert(ARRAY_SIZE(output_names) == ARRAY_SIZE(output_tensors));
assert(ARRAY_SIZE(input_names) == ARRAY_SIZE(inputs));
assert(ARRAY_SIZE(output_names) == ARRAY_SIZE(output_tensors));
ORT(Run(wrap->session, nullptr, input_names, inputs, ARRAY_SIZE(input_names), output_names,
ARRAY_SIZE(output_names), output_tensors));
}
// To here
// Interpret model outputs!
float *out_data = nullptr;
ORT(GetTensorMutableData(output_tensors[0], (void **)&out_data));
Hand2D &px_coord = info->view->keypoint_outputs[info->hand_idx].hand_px_coord;
Hand2D &tan_space = info->view->keypoint_outputs[info->hand_idx].hand_tan_space;
float *confidences = info->view->keypoint_outputs[info->hand_idx].hand_tan_space.confidences;
xrt_vec2 *keypoints_global = px_coord.kps;
// I don't know why this was added
// float *confidences = info.view->keypoint_outputs.views[hand_idx].confidences;
size_t plane_size = kKeypointOutputHeatmapSize * kKeypointOutputHeatmapSize;
// This was added for debug scribbling, and should be added back at some point.
// xrt_vec2 *keypoints_global = px_coord.kps;
size_t plane_size = 22 * 22;
for (int i = 0; i < 21; i++) {
float *data = &out_data[i * plane_size];
int out_idx = argmax(data, kKeypointOutputHeatmapSize * kKeypointOutputHeatmapSize);
int row = out_idx / kKeypointOutputHeatmapSize;
int col = out_idx % kKeypointOutputHeatmapSize;
xrt_vec2 loc;
refine_center_of_distribution(data, col, row, kKeypointOutputHeatmapSize, kKeypointOutputHeatmapSize,
&loc.x, &loc.y);
// This will be optimized out if nan checking is disabled in hg_numerics_checker
for (size_t x = 0; x < plane_size; x++) {
CHECK_NOT_NAN(data[i]);
}
// 128.0/22.0f
loc.x *= float(kKeypointInputSize) / float(kKeypointOutputHeatmapSize);
loc.y *= float(kKeypointInputSize) / float(kKeypointOutputHeatmapSize);
loc = transformVecBy2x3(loc, go_back);
int out_idx = argmax(data, 22 * 22);
int row = out_idx / 22;
int col = out_idx % 22;
confidences[i] = data[out_idx];
px_coord.kps[i] = loc;
xrt_vec2 loc = {};
tan_space.kps[i] = raycoord(info->view, loc);
// This is a good start but rethink it. Maybe fail if less than 18/21 joints failed?
// is_hand = is_hand &&
refine_center_of_distribution(hgt, data, col, row, 22, 22, &loc.x, &loc.y);
if (hand_idx == 0) {
px_coord[i].pos_2d.x = math_map_ranges(loc.x, 0, 22, -1, 1);
} else {
px_coord[i].pos_2d.x = math_map_ranges(loc.x, 0, 22, 1, -1);
}
//!@todo when you change this to have +Z-forward
// note note note the flip!!!
px_coord[i].pos_2d.y = math_map_ranges(loc.y, 0, 22, 1, -1);
px_coord[i].confidence_xy = data[out_idx];
}
float *out_data_depth = nullptr;
ORT(GetTensorMutableData(output_tensors[1], (void **)&out_data_depth));
for (int joint_idx = 0; joint_idx < 21; joint_idx++) {
float *p_ptr = &out_data_depth[(joint_idx * 22)];
float depth = 0;
float confidence = 0;
// This function can fail
if (hand_depth_center_of_mass(hgt, p_ptr, &depth, &confidence)) {
px_coord[joint_idx].depth_relative_to_midpxm = depth;
px_coord[joint_idx].confidence_depth = confidence;
} else {
px_coord[joint_idx].depth_relative_to_midpxm = 0;
px_coord[joint_idx].confidence_depth = 0;
}
}
float *out_data_extras = nullptr;
ORT(GetTensorMutableData(output_tensors[2], (void **)&out_data_extras));
float is_hand_explicit = out_data_extras[0];
is_hand_explicit = (1.0) / (1.0 + powf(2.71828182845904523536, -is_hand_explicit));
// When the model is sure, it's _really_ sure.
// Index was fine with 0.99.
// North Star seemed to need 0.97.
if (is_hand_explicit < 0.97) {
U_LOG_D("Not hand! %f", is_hand_explicit);
is_hand = false;
}
this_output.active = is_hand;
float *out_data_curls = nullptr;
ORT(GetTensorMutableData(output_tensors[3], (void **)&out_data_curls));
for (int i = 0; i < 5; i++) {
float curl = out_data_curls[i];
float variance = out_data_curls[5 + i];
// Next two lines directly correspond to py_training/settings.py
// We don't want it to be negative
variance = fabsf(variance);
// We don't want it to be possible to be zero
variance += 0.01;
this_output.curls[i].value = curl;
this_output.curls[i].variance = curl;
}
if (hgt->debug_scribble) {
int data_acc_idx = 0;
int root_x = kVisSpacerSize + ((2 * info->hand_idx) * (kKeypointInputSize + kVisSpacerSize));
int root_y = kVisSpacerSize + (info->view->view * (kKeypointInputSize + kVisSpacerSize));
int root_x = 8 + ((2 * hand_idx) * (128 + 8));
int root_y = 8 + (2 * info.view->view * (128 + 8));
cv::Rect p = cv::Rect(root_x, root_y, kKeypointInputSize, kKeypointInputSize);
cv::Rect p = cv::Rect(root_x, root_y, 128, 128);
cropped_image_uint8.copyTo(hgt->visualizers.mat(p));
data_128x128_uint8.copyTo(hgt->visualizers.mat(p));
make_keypoint_heatmap_output(info.view->view, hand_idx, 0, 0, out_data + (data_acc_idx * plane_size),
hgt->visualizers.mat);
make_keypoint_depth_heatmap_output(info.view->view, hand_idx, 0, 0,
out_data_depth + (data_acc_idx * 22), hgt->visualizers.mat);
make_keypoint_heatmap_output(info->view->view, info->hand_idx, 0, 0,
out_data + (data_acc_idx * plane_size), hgt->visualizers.mat);
data_acc_idx++;
for (int finger = 0; finger < 5; finger++) {
for (int joint = 0; joint < 4; joint++) {
make_keypoint_heatmap_output(info->view->view, info->hand_idx, 1 + joint, finger,
make_keypoint_heatmap_output(info.view->view, hand_idx, 1 + joint, finger,
out_data + (data_acc_idx * plane_size),
hgt->visualizers.mat);
make_keypoint_depth_heatmap_output(info.view->view, hand_idx, 1 + joint, finger,
out_data_depth + (data_acc_idx * 22),
hgt->visualizers.mat);
data_acc_idx++;
}
}
// Hand existence
char amt[5];
snprintf(amt, ARRAY_SIZE(amt), "%.2f", is_hand_explicit);
cv::Point2i text_origin;
text_origin.x = root_x + 128 + 2;
text_origin.y = root_y + 60;
// Clear out what was there before
cv::rectangle(hgt->visualizers.mat, cv::Rect(text_origin - cv::Point2i{0, 25}, cv::Size{30, 30}), {255},
cv::FILLED);
if (hgt->tuneable_values.scribble_keypoint_model_outputs) {
for (int finger = 0; finger < 5; finger++) {
cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y};
for (int joint = 0; joint < 4; joint++) {
cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x,
(int)keypoints_global[1 + finger * 4 + joint].y};
cv::putText(hgt->visualizers.mat, amt, text_origin, cv::FONT_HERSHEY_SIMPLEX, 0.3, {0, 0, 0});
cv::line(debug, last, the_new, colors[info->hand_idx]);
last = the_new;
}
}
// Curls
cv::rectangle(hgt->visualizers.mat, cv::Rect(cv::Point2i{root_x, root_y + 128 + 22}, cv::Size{128, 60}),
{255}, cv::FILLED);
for (int i = 0; i < 5; i++) {
int r = 15;
for (int i = 0; i < 21; i++) {
xrt_vec2 loc = keypoints_global[i];
handDot(debug, loc, 2, (float)(i) / 21.0, 1, 2);
}
cv::Point2i center = {root_x + r + (20 * i), root_y + 128 + 60};
cv::circle(hgt->visualizers.mat, center, 1, {0}, 1);
float c = this_output.curls[i].value * 0.3;
int x = cos(c) * r;
// Remember, OpenCV has (0,0) at top left
int y = -sin(c) * r;
cv::Point2i pt2 = {x, y};
pt2 += center;
cv::circle(hgt->visualizers.mat, pt2, 1, {0}, 1);
cv::line(hgt->visualizers.mat, center, pt2, {0}, 1);
}
}
wrap->api->ReleaseValue(output_tensors[0]);
for (size_t i = 0; i < ARRAY_SIZE(output_tensors); i++) {
wrap->api->ReleaseValue(output_tensors[i]);
}
}
void
@ -640,4 +984,4 @@ release_onnx_wrap(onnx_wrap *wrap)
wrap->api->ReleaseEnv(wrap->env);
}
} // namespace xrt::tracking::hand::mercury
} // namespace xrt::tracking::hand::mercury

View file

@ -0,0 +1,62 @@
// Copyright 2021-2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Simple util for setting floating-point exceptions and checking for NaNs.
* @author Moses Turner <moses@collabora.com>
* @ingroup drv_ht
*/
#undef PEDANTIC_NAN_CHECKS
#undef NAN_EXCEPTIONS
#ifdef NAN_EXCEPTIONS
#include <cfenv>
#endif
namespace xrt::tracking::hand::mercury::numerics_checker {
#ifdef PEDANTIC_NAN_CHECKS
#define CHECK_NOT_NAN(val) \
do { \
if (val != val) { \
U_LOG_E(" was NAN at %d", __LINE__); \
assert(false); \
} \
\
} while (0)
#else
#define CHECK_NOT_NAN(val) (void)val;
#endif
#ifdef NAN_EXCEPTIONS
static int ex = FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID;
static inline void
set_floating_exceptions()
{
// NO: FE_UNDERFLOW, FE_INEXACT
// https://stackoverflow.com/questions/60731382/c-setting-floating-point-exception-environment
feenableexcept(ex); // Uncomment this for version 2
}
static inline void
remove_floating_exceptions()
{
fedisableexcept(ex);
}
#else
static inline void
set_floating_exceptions()
{}
static inline void
remove_floating_exceptions()
{}
#endif
} // namespace xrt::tracking::hand::mercury::numerics_checker

File diff suppressed because it is too large Load diff

View file

@ -12,9 +12,10 @@
#include "hg_interface.h"
#include "tracking/t_calibration_opencv.hpp"
#include "tracking/t_hand_tracking.h"
#include "tracking/t_camera_models.h"
#include "xrt/xrt_defines.h"
#include "xrt/xrt_frame.h"
@ -22,6 +23,7 @@
#include "math/m_vec2.h"
#include "math/m_vec3.h"
#include "math/m_mathinclude.h"
#include "math/m_eigen_interop.hpp"
#include "util/u_frame_times_widget.h"
#include "util/u_logging.h"
@ -48,10 +50,10 @@
#include "kine_lm/lm_interface.hpp"
namespace xrt::tracking::hand::mercury {
using namespace xrt::auxiliary::util;
using namespace xrt::auxiliary::math;
#define HG_TRACE(hgt, ...) U_LOG_IFL_T(hgt->log_level, __VA_ARGS__)
#define HG_DEBUG(hgt, ...) U_LOG_IFL_D(hgt->log_level, __VA_ARGS__)
@ -68,30 +70,85 @@ static constexpr uint16_t kVisSpacerSize = 8;
static const cv::Scalar RED(255, 30, 30);
static const cv::Scalar YELLOW(255, 255, 0);
static const cv::Scalar PINK(255, 0, 255);
static const cv::Scalar GREEN(0, 255, 0);
static const cv::Scalar colors[2] = {YELLOW, RED};
constexpr enum xrt_hand_joint joints_5x5_to_26[5][5] = {
{
XRT_HAND_JOINT_WRIST,
XRT_HAND_JOINT_THUMB_METACARPAL,
XRT_HAND_JOINT_THUMB_PROXIMAL,
XRT_HAND_JOINT_THUMB_DISTAL,
XRT_HAND_JOINT_THUMB_TIP,
},
{
XRT_HAND_JOINT_INDEX_METACARPAL,
XRT_HAND_JOINT_INDEX_PROXIMAL,
XRT_HAND_JOINT_INDEX_INTERMEDIATE,
XRT_HAND_JOINT_INDEX_DISTAL,
XRT_HAND_JOINT_INDEX_TIP,
},
{
XRT_HAND_JOINT_MIDDLE_METACARPAL,
XRT_HAND_JOINT_MIDDLE_PROXIMAL,
XRT_HAND_JOINT_MIDDLE_INTERMEDIATE,
XRT_HAND_JOINT_MIDDLE_DISTAL,
XRT_HAND_JOINT_MIDDLE_TIP,
},
{
XRT_HAND_JOINT_RING_METACARPAL,
XRT_HAND_JOINT_RING_PROXIMAL,
XRT_HAND_JOINT_RING_INTERMEDIATE,
XRT_HAND_JOINT_RING_DISTAL,
XRT_HAND_JOINT_RING_TIP,
},
{
XRT_HAND_JOINT_LITTLE_METACARPAL,
XRT_HAND_JOINT_LITTLE_PROXIMAL,
XRT_HAND_JOINT_LITTLE_INTERMEDIATE,
XRT_HAND_JOINT_LITTLE_DISTAL,
XRT_HAND_JOINT_LITTLE_TIP,
},
};
namespace ROIProvenance {
enum ROIProvenance
{
HAND_DETECTION,
POSE_PREDICTION
};
}
// Forward declaration for ht_view
struct HandTracking;
struct ht_view;
// Using the compiler to stop me from getting 2D space mixed up with 3D space.
struct Hand2D
{
struct xrt_vec2 kps[21];
float confidences[21];
};
struct Hand3D
{
struct xrt_vec3 kps[21];
};
using hand21_2d = std::array<vec2_5, 21>;
struct projection_instructions
{
Eigen::Quaternionf rot_quat = Eigen::Quaternionf::Identity();
float stereographic_radius = 0;
bool flip = false;
const t_camera_model_params &dist;
projection_instructions(const t_camera_model_params &dist) : dist(dist) {}
};
struct model_input_wrap
{
float *data = nullptr;
// int64_t isn't a bug; that's what onnxruntime wants.
std::vector<int64_t> dimensions = {};
int64_t dimensions[4];
size_t num_dimensions = 0;
OrtValue *tensor = nullptr;
const char *name;
};
@ -104,31 +161,37 @@ struct onnx_wrap
OrtMemoryInfo *meminfo = nullptr;
OrtSession *session = nullptr;
std::vector<model_input_wrap> wraps;
std::vector<model_input_wrap> wraps = {};
};
struct hand_bounding_box
// Multipurpose.
// * Hand detector writes into center_px, size_px, found and hand_detection_confidence
// * Keypoint estimator operates on this to a direction/radius for the stereographic projection, and for the associated
// keypoints.
struct hand_region_of_interest
{
xrt_vec2 center;
ROIProvenance::ROIProvenance provenance;
// Either set by the detection model or by predict_new_regions_of_interest/back_project
xrt_vec2 center_px;
float size_px;
bool found;
float confidence;
bool hand_detection_confidence;
};
struct hand_detection_run_info
{
ht_view *view;
hand_bounding_box *outputs[2];
// These are not duplicates of ht_view's regions_of_interest_this_frame!
// If some hands are already tracked, we have logic that only copies new ROIs to this frame's regions of
// interest.
hand_region_of_interest outputs[2];
};
struct keypoint_output
{
Hand2D hand_px_coord;
Hand2D hand_tan_space;
float confidences[21];
};
struct keypoint_estimation_run_info
{
ht_view *view;
@ -144,33 +207,19 @@ struct ht_view
struct t_camera_extra_info_one_view camera_info;
cv::Mat distortion;
cv::Matx<double, 3, 3> cameraMatrix;
cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2
t_camera_model_params hgdist_orig;
// With fx, fy, cx, cy scaled to the current camera resolution as appropriate.
t_camera_model_params hgdist;
cv::Mat run_model_on_this;
cv::Mat debug_out_to_this;
struct hand_bounding_box bboxes_this_frame[2]; // left, right
struct hand_region_of_interest regions_of_interest_this_frame[2]; // left, right
struct keypoint_estimation_run_info run_info[2];
struct keypoint_output keypoint_outputs[2];
};
struct hand_history
{
HistoryBuffer<xrt_hand_joint_set, 5> hands;
HistoryBuffer<uint64_t, 5> timestamps;
};
struct output_struct_one_frame
{
xrt_vec2 left[21];
float confidences_left[21];
xrt_vec2 right[21];
float confidences_right[21];
};
struct hand_size_refinement
{
@ -209,7 +258,6 @@ public:
float multiply_px_coord_for_undistort;
bool use_fisheye;
struct t_stereo_camera_calibration *calib;
@ -240,22 +288,44 @@ public:
lm::KinematicHandLM *kinematic_hands[2];
// struct hand_detection_state_tracker st_det[2] = {};
// These are produced by the keypoint estimator and consumed by the nonlinear optimizer
// left hand, right hand THEN left view, right view
struct one_frame_input keypoint_outputs[2];
// Used to track whether this hand has *ever* been seen during this user's session, so that we can spend some
// extra time optimizing their hand size if one of their hands isn't visible for the first bit.
bool hand_seen_before[2] = {false, false};
// Used to:
// * see if a hand is currently being tracked.
// * If so, don't replace the bounding box with that from a hand detection.
// * Also, if both hands are being tracked, we just don't run the hand detector.
bool last_frame_hand_detected[2] = {false, false};
// Used to decide whether to run the keypoint estimator/nonlinear optimizer.
bool this_frame_hand_detected[2] = {false, false};
struct hand_history histories[2];
// Used to determine pose-predicted regions of interest. Contains the last five hand keypoint positions, or less
// if the hand has just started being tracked.
HistoryBuffer<Eigen::Array<float, 3, 21>, 5> history_hands[2] = {};
// Contains the last 5 timestamps, or less if hand tracking has just started.
HistoryBuffer<uint64_t, 5> history_timestamps = {};
// left hand, right hand
Eigen::Array<float, 3, 21> pose_predicted_keypoints[2];
int detection_counter = 0;
struct hand_size_refinement refinement = {};
// Moses hand size is ~0.095; they has big-ish hands so let's do 0.09
float target_hand_size = 0.09;
float target_hand_size = STANDARD_HAND_SIZE;
xrt_frame *debug_frame;
// This should be removed.
void (*keypoint_estimation_run_func)(void *);
@ -267,15 +337,25 @@ public:
struct
{
bool new_user_event = false;
struct u_var_draggable_f32 after_detection_fac;
struct u_var_draggable_f32 dyn_radii_fac;
struct u_var_draggable_f32 dyn_joint_y_angle_error;
struct u_var_draggable_f32 amount_to_lerp_prediction;
struct u_var_draggable_f32 max_permissible_iou;
bool scribble_predictions_into_this_frame = false;
struct u_var_draggable_f32 amt_use_depth;
struct u_var_draggable_f32 mpiou_any;
struct u_var_draggable_f32 mpiou_single_detection;
struct u_var_draggable_f32 mpiou_double_detection;
struct u_var_draggable_f32 max_reprojection_error;
struct u_var_draggable_f32 opt_smooth_factor;
struct u_var_draggable_f32 max_hand_dist;
bool scribble_predictions_into_next_frame = false;
bool scribble_keypoint_model_outputs = false;
bool scribble_optimizer_outputs = true;
bool always_run_detection_model = false;
int max_num_outside_view = 3;
bool always_run_detection_model = false; // true
bool optimize_hand_size = true;
int max_num_outside_view = 6;
bool enable_pose_predicted_input = true;
bool enable_framerate_based_smoothing = false;
} tuneable_values;
@ -317,4 +397,33 @@ run_keypoint_estimation(void *ptr);
void
release_onnx_wrap(onnx_wrap *wrap);
void
make_projection_instructions(t_camera_model_params &dist,
bool flip_after,
float expand_val,
float twist,
Eigen::Array<float, 3, 21> &joints,
projection_instructions &out_instructions,
hand21_2d &out_hand);
void
make_projection_instructions_angular(xrt_vec3 direction_3d,
bool flip_after,
float angular_radius,
float expand_val,
float twist,
projection_instructions &out_instructions);
void
stereographic_project_image(const t_camera_model_params &dist,
const projection_instructions &instructions,
cv::Mat &input_image,
cv::Mat *debug_image,
const cv::Scalar boundary_color,
cv::Mat &out);
} // namespace xrt::tracking::hand::mercury

View file

@ -8,6 +8,7 @@
*/
#pragma once
#include <array>
#include "xrt/xrt_defines.h"
namespace xrt::tracking::hand::mercury {
@ -17,14 +18,35 @@ namespace xrt::tracking::hand::mercury {
// Different from `Scalar` in lm_* templates - those can be `Ceres::Jet`s too.
typedef float HandScalar;
// Used for "2.5D" joint locations, with a 2D ray direction in stereographic-space and a 1D depth relative to the
// middle-proximal joint.
struct vec2_5
{
xrt_vec2 pos_2d;
float depth_relative_to_midpxm;
float confidence_xy;
float confidence_depth;
};
// Using the compiler to stop me from getting 2D space mixed up with 3D space.
using MLOutput2D = std::array<vec2_5, 21>;
struct one_curl
{
float value;
float variance;
};
// Inputs to kinematic optimizers
//!@todo Ask Ryan if adding `= {}` only does something if we do one_frame_one_view bla = {}.
struct one_frame_one_view
{
bool active = true;
xrt_vec3 rays[21] = {};
HandScalar confidences[21] = {};
xrt_quat look_dir;
float stereographic_radius;
MLOutput2D keypoints_in_scaled_stereographic;
one_curl curls[5];
};
struct one_frame_input
@ -64,4 +86,9 @@ namespace Joint21 {
};
}
//!@todo These are not backed up by real anthropometry data; they are just guesstimates. Patches welcome!
constexpr HandScalar MIN_HAND_SIZE = 0.06;
constexpr HandScalar STANDARD_HAND_SIZE = 0.09;
constexpr HandScalar MAX_HAND_SIZE = 0.12;
} // namespace xrt::tracking::hand::mercury

View file

@ -8,11 +8,8 @@
*/
#pragma once
// #include <Eigen/Core>
// #include <Eigen/Geometry>
#include "math/m_mathinclude.h"
#include "../kine_common.hpp"
#include <type_traits>
namespace xrt::tracking::hand::mercury::lm {
@ -48,17 +45,21 @@ static constexpr size_t kNumOrientationsInFinger = 4;
// Not tested/tuned well enough; might make tracking slow.
#undef USE_HAND_PLAUSIBILITY
// Should work, but our neural nets aren't good enough yet.
#undef USE_HAND_CURLS
#undef RESIDUALS_HACKING
static constexpr size_t kMetacarpalBoneDim = 3;
static constexpr size_t kProximalBoneDim = 2;
static constexpr size_t kFingerDim = kProximalBoneDim + kMetacarpalBoneDim + 2;
static constexpr size_t kFingerDim = kProximalBoneDim + 2;
static constexpr size_t kThumbDim = kMetacarpalBoneDim + 2;
static constexpr size_t kHandSizeDim = 1;
static constexpr size_t kHandTranslationDim = 3;
static constexpr size_t kHandOrientationDim = 3;
// HRTC = Hand Residual Temporal Consistency
static constexpr size_t kHRTC_HandSize = 1;
static constexpr size_t kHRTC_RootBoneTranslation = 3;
static constexpr size_t kHRTC_RootBoneOrientation = 3; // Direct difference between the two angle-axis rotations. This
@ -69,12 +70,21 @@ static constexpr size_t kHRTC_ThumbCurls = 2;
static constexpr size_t kHRTC_ProximalSimilarity = 2;
static constexpr size_t kHRTC_FingerMCPSwingTwist = 3;
static constexpr size_t kHRTC_FingerMCPSwingTwist = 0;
static constexpr size_t kHRTC_FingerPXMSwing = 2;
static constexpr size_t kHRTC_FingerCurls = 2;
static constexpr size_t kHRTC_CurlSimilarity = 1;
static constexpr size_t kHandResidualOneSideSize = 21 * 2;
static constexpr size_t kHandResidualOneSideXY = (kNumNNJoints * 2);
static constexpr size_t kHandResidualOneSideDepth = 20; // one less because midxpm joint isn't used
#ifdef USE_HAND_CURLS
static constexpr size_t kHandResidualOneSideMatchCurls = 4;
#else
static constexpr size_t kHandResidualOneSideMatchCurls = 0;
#endif
static constexpr size_t kHandResidualOneSideSize =
kHandResidualOneSideXY + kHandResidualOneSideDepth + kHandResidualOneSideMatchCurls;
static constexpr size_t kHandResidualTemporalConsistencyOneFingerSize = //
kHRTC_FingerMCPSwingTwist + //
@ -97,27 +107,49 @@ static constexpr size_t kHandResidualTemporalConsistencySize = //
0;
// Factors to multiply different values by to get a smooth hand trajectory without introducing too much latency
class HandStability
{
public:
HandScalar stabilityRoot;
HandScalar stabilityCurlRoot;
HandScalar stabilityOtherRoot;
// 1.0 is good, a little jittery.
// Anything above 3.0 generally breaks.
static constexpr HandScalar kStabilityRoot = 1.0;
static constexpr HandScalar kStabilityCurlRoot = kStabilityRoot * 0.03f;
static constexpr HandScalar kStabilityOtherRoot = kStabilityRoot * 0.03f;
HandScalar stabilityThumbMCPSwing;
HandScalar stabilityThumbMCPTwist;
static constexpr HandScalar kStabilityThumbMCPSwing = kStabilityCurlRoot * 1.5f;
static constexpr HandScalar kStabilityThumbMCPTwist = kStabilityCurlRoot * 1.5f;
HandScalar stabilityFingerMCPSwing;
HandScalar stabilityFingerMCPTwist;
static constexpr HandScalar kStabilityFingerMCPSwing = kStabilityCurlRoot * 3.0f;
static constexpr HandScalar kStabilityFingerMCPTwist = kStabilityCurlRoot * 3.0f;
HandScalar stabilityFingerPXMSwingX;
HandScalar stabilityFingerPXMSwingY;
static constexpr HandScalar kStabilityFingerPXMSwingX = kStabilityCurlRoot * 1.0f;
static constexpr HandScalar kStabilityFingerPXMSwingY = kStabilityCurlRoot * 1.6f;
HandScalar stabilityRootPosition;
HandScalar stabilityHandSize;
static constexpr HandScalar kStabilityRootPosition = kStabilityOtherRoot * 30;
static constexpr HandScalar kStabilityHandSize = kStabilityOtherRoot * 1000;
HandScalar stabilityHandOrientationZ;
HandScalar stabilityHandOrientationXY;
HandStability(float root = 15.0f)
{
this->stabilityRoot = root;
this->stabilityCurlRoot = this->stabilityRoot * 0.03f;
this->stabilityOtherRoot = this->stabilityRoot * 0.03f;
static constexpr HandScalar kStabilityHandOrientation = kStabilityOtherRoot * 3;
this->stabilityThumbMCPSwing = this->stabilityCurlRoot * 1.5f;
this->stabilityThumbMCPTwist = this->stabilityCurlRoot * 1.5f;
this->stabilityFingerMCPSwing = this->stabilityCurlRoot * 3.0f;
this->stabilityFingerMCPTwist = this->stabilityCurlRoot * 3.0f;
this->stabilityFingerPXMSwingX = this->stabilityCurlRoot * 0.6f;
this->stabilityFingerPXMSwingY = this->stabilityCurlRoot * 1.6f;
this->stabilityRootPosition = this->stabilityOtherRoot * 25;
this->stabilityHandSize = this->stabilityOtherRoot * 1000;
this->stabilityHandOrientationZ = this->stabilityOtherRoot * 0.5;
this->stabilityHandOrientationXY = this->stabilityOtherRoot * 0.8;
}
};
static constexpr HandScalar kPlausibilityRoot = 1.0;
@ -154,7 +186,6 @@ calc_input_size(bool optimize_hand_size)
return out;
}
constexpr size_t
calc_residual_size(bool stability, bool optimize_hand_size, int num_views)
{
@ -179,10 +210,10 @@ calc_residual_size(bool stability, bool optimize_hand_size, int num_views)
template <typename Scalar> struct Quat
{
Scalar x;
Scalar y;
Scalar z;
Scalar w;
Scalar x = {};
Scalar y = {};
Scalar z = {};
Scalar w = {};
/// Default constructor - DOES NOT INITIALIZE VALUES
constexpr Quat() {}
@ -221,9 +252,9 @@ template <typename Scalar> struct Vec3
{
// Note that these are not initialized, for performance reasons.
// If you want them initialized, use Zero() or something else
Scalar x;
Scalar y;
Scalar z;
Scalar x = {};
Scalar y = {};
Scalar z = {};
/// Default constructor - DOES NOT INITIALIZE VALUES
constexpr Vec3() {}
@ -254,12 +285,26 @@ template <typename Scalar> struct Vec3
{
return Vec3(0.f, 0.f, 0.f);
}
// Norm, vector length, whatever.
Scalar
norm()
{
Scalar len = (Scalar)(0);
len += this->x * this->x;
len += this->y * this->y;
len += this->z * this->z;
len = sqrt(len);
return len;
}
};
template <typename Scalar> struct Vec2
{
Scalar x;
Scalar y;
Scalar x = {};
Scalar y = {};
/// Default constructor - DOES NOT INITIALIZE VALUES
constexpr Vec2() noexcept {}
@ -298,7 +343,7 @@ template <typename Scalar> struct Vec2
template <typename T> struct ResidualHelper
{
T *out_residual;
T *out_residual = nullptr;
size_t out_residual_idx = 0;
ResidualHelper(T *residual) : out_residual(residual)
@ -312,7 +357,4 @@ template <typename T> struct ResidualHelper
this->out_residual[out_residual_idx++] = value;
}
};
} // namespace xrt::tracking::hand::mercury::lm

View file

@ -48,9 +48,11 @@ void
optimizer_run(KinematicHandLM *hand,
one_frame_input &observation,
bool hand_was_untracked_last_frame,
float smoothing_factor, //!<- Unused if this is the first frame
bool optimize_hand_size,
float target_hand_size,
float hand_size_err_mul,
float amt_use_depth,
xrt_hand_joint_set &out_hand,
float &out_hand_size,
float &out_reprojection_error);

File diff suppressed because it is too large Load diff

View file

@ -8,48 +8,46 @@
* @ingroup tracking
*/
// #include <iostream>
// #include <cmath>
#include "util/u_logging.h"
#include "math/m_api.h"
#include "lm_interface.hpp"
#include "lm_defines.hpp"
#include <cmath>
#include <iostream>
// #include "lm_rotations.hpp"
namespace xrt::tracking::hand::mercury::lm {
template <typename T> struct OptimizerMetacarpalBone
{
Vec2<T> swing;
T twist;
Vec2<T> swing = {};
T twist = {};
};
template <typename T> struct OptimizerFinger
{
OptimizerMetacarpalBone<T> metacarpal;
Vec2<T> proximal_swing;
OptimizerMetacarpalBone<T> metacarpal = {};
Vec2<T> proximal_swing = {};
// Not Vec2.
T rots[2];
T rots[2] = {};
};
template <typename T> struct OptimizerThumb
{
OptimizerMetacarpalBone<T> metacarpal;
OptimizerMetacarpalBone<T> metacarpal = {};
// Again not Vec2.
T rots[2];
T rots[2] = {};
};
template <typename T> struct OptimizerHand
{
T hand_size;
Vec3<T> wrist_location;
Vec3<T> wrist_location = {};
// This is constant, a ceres::Rotation.h quat,, taken from last frame.
Quat<T> wrist_pre_orientation_quat;
Quat<T> wrist_pre_orientation_quat = {};
// This is optimized - angle-axis rotation vector. Starts at 0, loss goes up the higher it goes because it
// indicates more of a rotation.
Vec3<T> wrist_post_orientation_aax;
Vec3<T> wrist_post_orientation_aax = {};
OptimizerThumb<T> thumb = {};
OptimizerFinger<T> finger[4] = {};
};
@ -77,16 +75,18 @@ public:
class HandLimit
{
public:
minmax hand_size;
minmax hand_size = {};
minmax thumb_mcp_swing_x, thumb_mcp_swing_y, thumb_mcp_twist;
minmax thumb_curls[2];
minmax thumb_mcp_swing_x = {};
minmax thumb_mcp_swing_y = {};
minmax thumb_mcp_twist = {};
minmax thumb_curls[2] = {};
FingerLimit fingers[4];
FingerLimit fingers[4] = {};
HandLimit()
{
hand_size = {0.095 - 0.03, 0.095 + 0.03};
hand_size = {MIN_HAND_SIZE, MAX_HAND_SIZE};
thumb_mcp_swing_x = {rad<HandScalar>(-60), rad<HandScalar>(60)};
thumb_mcp_swing_y = {rad<HandScalar>(-60), rad<HandScalar>(60)};
@ -97,25 +97,31 @@ public:
}
constexpr double margin = 0.09;
HandScalar margin = 0.0001;
fingers[0].mcp_swing_y = {-0.19 - margin, -0.19 + margin};
fingers[1].mcp_swing_y = {0.00 - margin, 0.00 + margin};
fingers[2].mcp_swing_y = {0.19 - margin, 0.19 + margin};
fingers[3].mcp_swing_y = {0.38 - margin, 0.38 + margin};
fingers[0].mcp_swing_y = {HandScalar(-0.19) - margin, HandScalar(-0.19) + margin};
fingers[1].mcp_swing_y = {HandScalar(0.00) - margin, HandScalar(0.00) + margin};
fingers[2].mcp_swing_y = {HandScalar(0.19) - margin, HandScalar(0.19) + margin};
fingers[3].mcp_swing_y = {HandScalar(0.38) - margin, HandScalar(0.38) + margin};
fingers[0].mcp_swing_x = {HandScalar(-0.02) - margin, HandScalar(-0.02) + margin};
fingers[1].mcp_swing_x = {HandScalar(0.00) - margin, HandScalar(0.00) + margin};
fingers[2].mcp_swing_x = {HandScalar(0.02) - margin, HandScalar(0.02) + margin};
fingers[3].mcp_swing_x = {HandScalar(0.04) - margin, HandScalar(0.04) + margin};
for (int finger_idx = 0; finger_idx < 4; finger_idx++) {
FingerLimit &finger = fingers[finger_idx];
finger.mcp_swing_x = {rad<HandScalar>(-10), rad<HandScalar>(10)};
// finger.mcp_swing_x = {rad<HandScalar>(-0.0001), rad<HandScalar>(0.0001)};
finger.mcp_twist = {rad<HandScalar>(-4), rad<HandScalar>(4)};
finger.pxm_swing_x = {rad<HandScalar>(-100), rad<HandScalar>(20)}; // ??? why is it reversed
finger.pxm_swing_y = {rad<HandScalar>(-20), rad<HandScalar>(20)};
for (int i = 0; i < 2; i++) {
finger.curls[i] = {rad<HandScalar>(-90), rad<HandScalar>(10)};
finger.curls[i] = {rad<HandScalar>(-90), rad<HandScalar>(0)};
}
}
}
@ -124,9 +130,6 @@ public:
static const class HandLimit the_limit = {};
constexpr HandScalar hand_size_min = 0.095 - 0.03;
constexpr HandScalar hand_size_max = 0.095 + 0.03;
template <typename T>
inline T
LMToModel(T lm, minmax mm)
@ -169,17 +172,7 @@ OptimizerHandUnpackFromVector(const T *in, bool use_hand_size, T hand_size, Opti
out.thumb.rots[1] = LMToModel(in[acc_idx++], the_limit.thumb_curls[1]);
for (int finger_idx = 0; finger_idx < 4; finger_idx++) {
out.finger[finger_idx].metacarpal.swing.x =
LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_swing_x);
out.finger[finger_idx].metacarpal.swing.y =
LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_swing_y);
out.finger[finger_idx].metacarpal.twist =
LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].mcp_twist);
// Note that we are not unpacking the metacarpal swing/twist as it is constant.
out.finger[finger_idx].proximal_swing.x =
LMToModel(in[acc_idx++], the_limit.fingers[finger_idx].pxm_swing_x);
out.finger[finger_idx].proximal_swing.y =
@ -224,13 +217,7 @@ OptimizerHandPackIntoVector(OptimizerHand<T> &in, bool use_hand_size, T *out)
out[acc_idx++] = ModelToLM(in.thumb.rots[1], the_limit.thumb_curls[1]);
for (int finger_idx = 0; finger_idx < 4; finger_idx++) {
out[acc_idx++] =
ModelToLM(in.finger[finger_idx].metacarpal.swing.x, the_limit.fingers[finger_idx].mcp_swing_x);
out[acc_idx++] =
ModelToLM(in.finger[finger_idx].metacarpal.swing.y, the_limit.fingers[finger_idx].mcp_swing_y);
out[acc_idx++] =
ModelToLM(in.finger[finger_idx].metacarpal.twist, the_limit.fingers[finger_idx].mcp_twist);
// Note that we are not packing the metacarpal swing/twist as it is constant.
out[acc_idx++] =
ModelToLM(in.finger[finger_idx].proximal_swing.x, the_limit.fingers[finger_idx].pxm_swing_x);
out[acc_idx++] =
@ -252,18 +239,14 @@ template <typename T>
void
OptimizerHandInit(OptimizerHand<T> &opt, Quat<T> &pre_rotation)
{
opt.hand_size = (T)(0.095);
opt.hand_size = (T)STANDARD_HAND_SIZE;
opt.wrist_post_orientation_aax.x = (T)(0);
opt.wrist_post_orientation_aax.y = (T)(0);
opt.wrist_post_orientation_aax.z = (T)(0);
// opt.store_wrist_pre_orientation_quat = pre_rotation;
opt.wrist_pre_orientation_quat.w = (T)pre_rotation.w;
opt.wrist_pre_orientation_quat.x = (T)pre_rotation.x;
opt.wrist_pre_orientation_quat.y = (T)pre_rotation.y;
opt.wrist_pre_orientation_quat.z = (T)pre_rotation.z;
opt.wrist_pre_orientation_quat = pre_rotation;
opt.wrist_location.x = (T)(0);
opt.wrist_location.y = (T)(0);
@ -275,23 +258,28 @@ OptimizerHandInit(OptimizerHand<T> &opt, Quat<T> &pre_rotation)
opt.finger[i].metacarpal.swing.x = T(0);
opt.finger[i].metacarpal.twist = T(0);
opt.finger[i].proximal_swing.x = rad<T>((T)(15));
opt.finger[i].rots[0] = rad<T>((T)(-5));
opt.finger[i].rots[1] = rad<T>((T)(-5));
opt.finger[i].proximal_swing.x = T(rad<HandScalar>(15.0f));
opt.finger[i].rots[0] = T(rad<HandScalar>(-5));
opt.finger[i].rots[1] = T(rad<HandScalar>(-5));
}
opt.thumb.metacarpal.swing.x = (T)(0);
opt.thumb.metacarpal.swing.y = (T)(0);
opt.thumb.metacarpal.twist = (T)(0);
opt.thumb.rots[0] = rad<T>((T)(-5));
opt.thumb.rots[1] = rad<T>((T)(-59));
opt.thumb.rots[0] = T(rad<HandScalar>(-5));
opt.thumb.rots[1] = T(rad<HandScalar>(-59));
opt.finger[0].metacarpal.swing.y = (T)(-0.19);
opt.finger[1].metacarpal.swing.y = (T)(0);
opt.finger[2].metacarpal.swing.y = (T)(0.19);
opt.finger[3].metacarpal.swing.y = (T)(0.38);
opt.finger[0].metacarpal.swing.x = (T)(-0.02);
opt.finger[1].metacarpal.swing.x = (T)(0);
opt.finger[2].metacarpal.swing.x = (T)(0.02);
opt.finger[3].metacarpal.swing.x = (T)(0.04);
opt.finger[0].proximal_swing.y = (T)(-0.01);
opt.finger[1].proximal_swing.y = (T)(0);
opt.finger[2].proximal_swing.y = (T)(0.01);
@ -312,11 +300,11 @@ OptimizerHandSquashRotations(OptimizerHand<T> &opt, Quat<T> &out_orientation)
Quat<T> &pre_rotation = opt.wrist_pre_orientation_quat;
Quat<T> post_rotation;
Quat<T> post_rotation = {};
AngleAxisToQuaternion(opt.wrist_post_orientation_aax, post_rotation);
Quat<T> tmp_new_pre_rotation;
Quat<T> tmp_new_pre_rotation = {};
QuaternionProduct(pre_rotation, post_rotation, tmp_new_pre_rotation);

View file

@ -34,14 +34,6 @@ TEST_CASE("LevenbergMarquardt")
struct one_frame_input input = {};
for (int i = 0; i < 21; i++) {
input.views[0].rays[i] = m_vec3_normalize({0, (float)i, -1}); //{0,(float)i,-1};
input.views[1].rays[i] = m_vec3_normalize({(float)i, 0, -1});
input.views[0].confidences[i] = 1;
input.views[1].confidences[i] = 1;
}
lm::KinematicHandLM *hand;
xrt_pose left_in_right = XRT_POSE_IDENTITY;
@ -53,7 +45,17 @@ TEST_CASE("LevenbergMarquardt")
xrt_hand_joint_set out;
float out_hand_size;
float out_reprojection_error;
lm::optimizer_run(hand, input, true, true, 0.09, 0.5, out, out_hand_size, out_reprojection_error);
lm::optimizer_run(hand, //
input, //
true, //
2.0f, //
true, //
0.09, //
0.5, //
0.5f, //
out, //
out_hand_size, //
out_reprojection_error);
CHECK(std::isfinite(out_reprojection_error));
CHECK(std::isfinite(out_hand_size));