Add Mercury grayscale hand tracking!

This commit is contained in:
Moses Turner 2022-03-23 18:29:52 -05:00
parent 28b53689f4
commit 00be5d0551
12 changed files with 2919 additions and 0 deletions

View file

@ -2,6 +2,7 @@
# SPDX-License-Identifier: BSL-1.0
add_subdirectory(old_rgb)
add_subdirectory(mercury)
###

View file

@ -0,0 +1,49 @@
# Copyright 2019-2022, Collabora, Ltd.
# SPDX-License-Identifier: BSL-1.0
# Mercury hand tracking library!
add_library(
t_ht_mercury_kine STATIC
kine/kinematic_interface.hpp
kine/kinematic_main.cpp
)
target_link_libraries(
t_ht_mercury_kine
PRIVATE
aux_math
aux_tracking
aux_os
aux_util
)
target_include_directories(t_ht_mercury_kine SYSTEM PRIVATE ${EIGEN3_INCLUDE_DIR})
add_library(
t_ht_mercury STATIC
hg_interface.h
hg_model.hpp
hg_sync.cpp
hg_sync.hpp
)
target_link_libraries(
t_ht_mercury
PUBLIC aux-includes xrt-external-cjson
PRIVATE
aux_math
aux_tracking
aux_os
aux_util
aux_gstreamer
ONNXRuntime::ONNXRuntime
t_ht_mercury_kine
# ncnn # Soon...
${OpenCV_LIBRARIES}
)
if(XRT_HAVE_OPENCV)
target_include_directories(t_ht_mercury SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR})
target_link_libraries(t_ht_mercury PUBLIC ${OpenCV_LIBRARIES})
endif()

View file

@ -0,0 +1,174 @@
// Copyright 2021-2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Helper header for drawing and image transforms
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#pragma once
#include "hg_sync.hpp"
namespace xrt::tracking::hand::mercury {
/*!
* This is a template so that we can use xrt_vec3 or xrt_vec2.
* Please don't use this for anything other than xrt_vec3 or xrt_vec2!
*/
template <typename T>
T
transformVecBy2x3(T in, cv::Matx23f warp_back)
{
T rr;
rr.x = (in.x * warp_back(0, 0)) + (in.y * warp_back(0, 1)) + warp_back(0, 2);
rr.y = (in.x * warp_back(1, 0)) + (in.y * warp_back(1, 1)) + warp_back(1, 2);
return rr;
}
cv::Scalar
hsv2rgb(float fH, float fS, float fV)
{
const float fC = fV * fS; // Chroma
const float fHPrime = fmod(fH / 60.0, 6);
const float fX = fC * (1 - fabs(fmod(fHPrime, 2) - 1));
const float fM = fV - fC;
float fR, fG, fB;
if (0 <= fHPrime && fHPrime < 1) {
fR = fC;
fG = fX;
fB = 0;
} else if (1 <= fHPrime && fHPrime < 2) {
fR = fX;
fG = fC;
fB = 0;
} else if (2 <= fHPrime && fHPrime < 3) {
fR = 0;
fG = fC;
fB = fX;
} else if (3 <= fHPrime && fHPrime < 4) {
fR = 0;
fG = fX;
fB = fC;
} else if (4 <= fHPrime && fHPrime < 5) {
fR = fX;
fG = 0;
fB = fC;
} else if (5 <= fHPrime && fHPrime < 6) {
fR = fC;
fG = 0;
fB = fX;
} else {
fR = 0;
fG = 0;
fB = 0;
}
fR += fM;
fG += fM;
fB += fM;
return {fR * 255.0f, fG * 255.0f, fB * 255.0f};
}
struct xrt_vec2
raycoord(ht_view *htv, struct xrt_vec2 model_out)
{
model_out.x *= htv->htd->multiply_px_coord_for_undistort;
model_out.y *= htv->htd->multiply_px_coord_for_undistort;
cv::Mat in_px_coords(1, 1, CV_32FC2);
float *write_in;
write_in = in_px_coords.ptr<float>(0);
write_in[0] = model_out.x;
write_in[1] = model_out.y;
cv::Mat out_ray(1, 1, CV_32FC2);
if (htv->htd->use_fisheye) {
cv::fisheye::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
} else {
cv::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
}
float n_x = out_ray.at<float>(0, 0);
float n_y = out_ray.at<float>(0, 1);
struct xrt_vec3 n = {n_x, n_y, 1.0f};
cv::Matx33f R = htv->rotate_camera_to_stereo_camera;
struct xrt_vec3 o = {
(n.x * R(0, 0)) + (n.y * R(0, 1)) + (n.z * R(0, 2)),
(n.x * R(1, 0)) + (n.y * R(1, 1)) + (n.z * R(1, 2)),
(n.x * R(2, 0)) + (n.y * R(2, 1)) + (n.z * R(2, 2)),
};
math_vec3_scalar_mul(1.0f / o.z, &o);
return {o.x, o.y};
}
cv::Matx23f
blackbar(const cv::Mat &in, cv::Mat &out, xrt_size out_size)
{
#if 1
// Easy to think about, always right, but pretty slow:
// Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back.
// Then just warpAffine() it.
// Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize
// later.
// Do the black bars need to be on top and bottom, or on left and right?
float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1
float scale_down_h = (float)out_size.h / (float)in.rows; // 128/800 = 0.16
float scale_down = fmin(scale_down_w, scale_down_h); // 0.1
float width_inside = (float)in.cols * scale_down;
float height_inside = (float)in.rows * scale_down;
float translate_x = (out_size.w - width_inside) / 2; // should be 0 for 1280x800
float translate_y = (out_size.h - height_inside) / 2; // should be (1280-800)/2 = 240
cv::Matx23f go;
// clang-format off
go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x;
go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y;
// clang-format on
cv::warpAffine(in, out, go, cv::Size(out_size.w, out_size.h));
cv::Matx23f ret;
// clang-format off
ret(0,0) = 1.0f/scale_down; ret(0,1) = 0.0f; ret(0,2) = -translate_x/scale_down;
ret(1,0) = 0.0f; ret(1,1) = 1.0f/scale_down; ret(1,2) = -translate_y/scale_down;
// clang-format on
return ret;
#else
// Fast, always wrong if the input isn't square. You'd end up using something like this, plus some
// copyMakeBorder if you want to optimize.
if (aspect_ratio_input == aspect_ratio_output) {
cv::resize(in, out, {out_size.w, out_size.h});
cv::Matx23f ret;
float scale_from_out_to_in = (float)in.cols / (float)out_size.w;
// clang-format off
ret(0,0) = scale_from_out_to_in; ret(0,1) = 0.0f; ret(0,2) = 0.0f;
ret(1,0) = 0.0f; ret(1,1) = scale_from_out_to_in; ret(1,2) = 0.0f;
// clang-format on
cv::imshow("hi", out);
cv::waitKey(1);
return ret;
}
assert(!"Uh oh! Unimplemented!");
return {};
#endif
}
void
handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type)
{
cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type);
}
} // namespace xrt::tracking::hand::mercury

View file

@ -0,0 +1,34 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Public interface of Mercury hand tracking.
* @author Jakob Bornecrantz <jakob@collabora.com>
* @author Moses Turner <moses@collabora.com>
* @ingroup aux_tracking
*/
#pragma once
#include "tracking/t_tracking.h"
#include "tracking/t_hand_tracking.h"
#ifdef __cplusplus
extern "C" {
#endif
enum mercury_output_space
{
MERCURY_OUTPUT_SPACE_LEFT_CAMERA,
MERCURY_OUTPUT_SPACE_CENTER_OF_STEREO_CAMERA,
};
/*!
* Create Mercury hand tracking pipeline.
*
* @ingroup aux_tracking
*/
struct t_hand_tracking_sync *
t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib, enum mercury_output_space output_space);
#ifdef __cplusplus
} // extern "C"
#endif

View file

@ -0,0 +1,887 @@
// Copyright 2021-2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Mercury ML models!
* @author Moses Turner <moses@collabora.com>
* @ingroup drv_ht
*/
// Many C api things were stolen from here (MIT license):
// https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/fns_candy_style_transfer/fns_candy_style_transfer.c
#pragma once
#include "hg_sync.hpp"
#include "hg_image_math.hpp"
#include <filesystem>
#include <array>
namespace xrt::tracking::hand::mercury {
cv::Scalar RED(255, 30, 30);
cv::Scalar YELLOW(255, 255, 0);
cv::Scalar colors[2] = {YELLOW, RED};
#define ORT(expr) \
do { \
OrtStatus *status = wrap->api->expr; \
if (status != nullptr) { \
const char *msg = wrap->api->GetErrorMessage(status); \
HT_ERROR(htd, "[%s:%d]: %s\n", __FILE__, __LINE__, msg); \
wrap->api->ReleaseStatus(status); \
assert(false); \
} \
} while (0)
static bool
argmax(const float *data, int size, int *out_idx)
{
float max_value = -1.0f;
bool found = false;
for (int i = 0; i < size; i++) {
if (data[i] > max_value) {
max_value = data[i];
*out_idx = i;
found = true;
}
}
return found;
}
void
refine_center_of_distribution(
const float *data, int coarse_x, int coarse_y, int w, int h, float *out_refined_x, float *out_refined_y)
{
// Be VERY suspicious of this function, it's probably not centering correctly.
float sum_of_values = 0;
float sum_of_values_times_locations_x = 0;
float sum_of_values_times_locations_y = 0;
int max_kern_width = 10;
//!@todo this is stupid and has at least one edge case, make it more readable and link to a jupyter notebook
int kern_width_x = std::max(0, std::min(coarse_x, std::min(max_kern_width, abs(coarse_x - w) - 1)));
int kern_width_y = std::max(0, std::min(coarse_y, std::min(max_kern_width, abs(coarse_y - h) - 1)));
int min_x = coarse_x - kern_width_x;
int max_x = coarse_x + kern_width_x;
int min_y = coarse_y - kern_width_y;
int max_y = coarse_y + kern_width_y;
for (int y = min_y; y <= max_y; y++) {
for (int x = min_x; x <= max_x; x++) {
int acc = (y * w) + x;
float val = data[acc];
sum_of_values += val;
sum_of_values_times_locations_y += val * ((float)y + 0.5);
sum_of_values_times_locations_x += val * ((float)x + 0.5);
}
}
if (sum_of_values == 0) {
// Edge case, will fix soon
*out_refined_x = coarse_x;
*out_refined_y = coarse_y;
U_LOG_E("Failed! %d %d %d %d %d", coarse_x, coarse_y, w, h, max_kern_width);
return;
}
*out_refined_x = sum_of_values_times_locations_x / sum_of_values;
*out_refined_y = sum_of_values_times_locations_y / sum_of_values;
return;
}
static float
average_size(const float *data, const float *data_loc, int coarse_x, int coarse_y, int w, int h)
{
float sum = 0.0;
float sum_of_values = 0;
int max_kern_width = 10;
int min_x = std::max(0, coarse_x - max_kern_width);
int max_x = std::min(w, coarse_x + max_kern_width);
int min_y = std::max(0, coarse_y - max_kern_width);
int max_y = std::min(h, coarse_y + max_kern_width);
assert(min_x >= 0);
assert(max_x <= w);
assert(min_y >= 0);
assert(max_y <= h);
for (int y = min_y; y < max_y; y++) {
for (int x = min_x; x < max_x; x++) {
int acc = (y * w) + x;
float val = data[acc];
float val_loc = data_loc[acc];
sum += 1 * val_loc;
sum_of_values += val * val_loc;
}
}
assert(sum != 0);
return sum_of_values / sum;
}
static void
normalizeGrayscaleImage(cv::Mat &data_in, cv::Mat &data_out)
{
data_in.convertTo(data_out, CV_32FC1, 1 / 255.0);
cv::Mat mean;
cv::Mat stddev;
cv::meanStdDev(data_out, mean, stddev);
data_out *= 0.25 / stddev.at<double>(0, 0);
// Calculate it again; mean has changed. Yes we odn't need to but it's easy
//!@optimize
cv::meanStdDev(data_out, mean, stddev);
data_out += (0.5 - mean.at<double>(0, 0));
}
static void
init_hand_detection(HandTracking *htd, onnx_wrap *wrap)
{
std::filesystem::path path = htd->models_folder;
path /= "grayscale_detection.onnx";
wrap->input_name = "input_image_grayscale";
wrap->input_shape[0] = 1;
wrap->input_shape[1] = 1;
wrap->input_shape[2] = 240;
wrap->input_shape[3] = 320;
wrap->api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
OrtSessionOptions *opts = nullptr;
ORT(CreateSessionOptions(&opts));
ORT(SetSessionGraphOptimizationLevel(opts, ORT_ENABLE_ALL));
ORT(SetIntraOpNumThreads(opts, 1));
ORT(CreateEnv(ORT_LOGGING_LEVEL_FATAL, "monado_ht", &wrap->env));
ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &wrap->meminfo));
ORT(CreateSession(wrap->env, path.c_str(), opts, &wrap->session));
assert(wrap->session != NULL);
size_t input_size = wrap->input_shape[0] * wrap->input_shape[1] * wrap->input_shape[2] * wrap->input_shape[3];
wrap->data = (float *)malloc(input_size * sizeof(float));
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
wrap->data, //
input_size * sizeof(float), //
wrap->input_shape, //
4, //
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
&wrap->tensor));
assert(wrap->tensor);
int is_tensor;
ORT(IsTensor(wrap->tensor, &is_tensor));
assert(is_tensor);
wrap->api->ReleaseSessionOptions(opts);
}
void
run_hand_detection(void *ptr)
{
XRT_TRACE_MARKER();
ht_view *view = (ht_view *)ptr;
HandTracking *htd = view->htd;
onnx_wrap *wrap = &view->detection;
cv::Mat &data_400x640 = view->run_model_on_this;
cv::Mat _240x320_uint8;
xrt_size desire;
desire.h = 240;
desire.w = 320;
cv::Matx23f go_back = blackbar(data_400x640, _240x320_uint8, desire);
cv::Mat _240x320(cv::Size(320, 240), CV_32FC1, wrap->data, 320 * sizeof(float));
normalizeGrayscaleImage(_240x320_uint8, _240x320);
const char *output_name = "hand_locations_radii";
OrtValue *output_tensor = nullptr;
ORT(Run(wrap->session, nullptr, &wrap->input_name, &wrap->tensor, 1, &output_name, 1, &output_tensor));
float *out_data = nullptr;
ORT(GetTensorMutableData(output_tensor, (void **)&out_data));
size_t plane_size = 80 * 60;
for (int hand_idx = 0; hand_idx < 2; hand_idx++) {
const float *this_side_data = out_data + hand_idx * plane_size * 2;
int max_idx;
det_output *output = &view->det_outputs[hand_idx];
output->found = argmax(this_side_data, 4800, &max_idx) && this_side_data[max_idx] > 0.3;
if (output->found) {
int row = max_idx / 80;
int col = max_idx % 80;
float size = average_size(this_side_data + plane_size, this_side_data, col, row, 80, 60);
// model output width is between 0 and 1. multiply by image width and tuned factor
constexpr float fac = 2.0f;
size *= 320 * fac;
size *= m_vec2_len({go_back(0, 0), go_back(0, 1)});
float refined_x, refined_y;
refine_center_of_distribution(this_side_data, col, row, 80, 60, &refined_x, &refined_y);
cv::Mat &debug_frame = view->debug_out_to_this;
xrt_vec2 _pt = {refined_x * 4, refined_y * 4};
_pt = transformVecBy2x3(_pt, go_back);
output->center = _pt;
output->size_px = size;
if (htd->debug_scribble) {
cv::Point2i pt(_pt.x, _pt.y);
cv::rectangle(debug_frame,
cv::Rect(pt - cv::Point2i(size / 2, size / 2), cv::Size(size, size)),
colors[hand_idx], 1);
}
}
}
wrap->api->ReleaseValue(output_tensor);
}
static void
init_keypoint_estimation(HandTracking *htd, onnx_wrap *wrap)
{
std::filesystem::path path = htd->models_folder;
path /= "grayscale_keypoint_simdr.onnx";
wrap->input_name = "inputImg";
wrap->input_shape[0] = 1;
wrap->input_shape[1] = 1;
wrap->input_shape[2] = 128;
wrap->input_shape[3] = 128;
wrap->api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
OrtSessionOptions *opts = nullptr;
ORT(CreateSessionOptions(&opts));
ORT(SetSessionGraphOptimizationLevel(opts, ORT_ENABLE_ALL));
ORT(SetIntraOpNumThreads(opts, 1));
ORT(CreateEnv(ORT_LOGGING_LEVEL_FATAL, "monado_ht", &wrap->env));
ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &wrap->meminfo));
ORT(CreateSession(wrap->env, path.c_str(), opts, &wrap->session));
assert(wrap->session != NULL);
size_t input_size = wrap->input_shape[0] * wrap->input_shape[1] * wrap->input_shape[2] * wrap->input_shape[3];
wrap->data = (float *)malloc(input_size * sizeof(float));
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
wrap->data, //
input_size * sizeof(float), //
wrap->input_shape, //
4, //
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
&wrap->tensor));
assert(wrap->tensor);
int is_tensor;
ORT(IsTensor(wrap->tensor, &is_tensor));
assert(is_tensor);
wrap->api->ReleaseSessionOptions(opts);
}
void
run_keypoint_estimation(void *ptr)
{
// data has already been filled with what we're meant to detect on
XRT_TRACE_MARKER();
keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr;
onnx_wrap *wrap = &info->view->keypoint[info->hand_idx];
struct HandTracking *htd = info->view->htd;
cv::Mat &debug = info->view->debug_out_to_this;
det_output *output = &info->view->det_outputs[info->hand_idx];
cv::Point2f src_tri[3];
cv::Point2f dst_tri[3];
// top-left
cv::Point2f center = {output->center.x, output->center.y};
cv::Point2f go_right = {output->size_px / 2, 0};
cv::Point2f go_down = {0, output->size_px / 2};
if (info->hand_idx == 1) {
go_right *= -1;
}
// top left
src_tri[0] = {center - go_down - go_right};
// bottom left
src_tri[1] = {center + go_down - go_right};
// top right
src_tri[2] = {center - go_down + go_right};
dst_tri[0] = {0, 0};
dst_tri[1] = {0, 128};
dst_tri[2] = {128, 0};
cv::Matx23f go_there = getAffineTransform(src_tri, dst_tri);
cv::Matx23f go_back = getAffineTransform(dst_tri, src_tri);
cv::Mat data_128x128_uint8;
cv::warpAffine(info->view->run_model_on_this, data_128x128_uint8, go_there, cv::Size(128, 128),
cv::INTER_LINEAR);
cv::Mat data_128x128_float(cv::Size(128, 128), CV_32FC1, wrap->data, 128 * sizeof(float));
normalizeGrayscaleImage(data_128x128_uint8, data_128x128_float);
const char *output_names[2] = {"x_axis_hmap", "y_axis_hmap"};
OrtValue *output_tensor[2] = {nullptr, nullptr};
ORT(Run(wrap->session, nullptr, &wrap->input_name, &wrap->tensor, 1, output_names, 2, output_tensor));
// To here
float *out_data_x = nullptr;
float *out_data_y = nullptr;
ORT(GetTensorMutableData(output_tensor[0], (void **)&out_data_x));
ORT(GetTensorMutableData(output_tensor[1], (void **)&out_data_y));
Hand2D &px_coord = info->view->keypoint_outputs[info->hand_idx].hand_px_coord;
Hand2D &tan_space = info->view->keypoint_outputs[info->hand_idx].hand_tan_space;
xrt_vec2 *keypoints_global = px_coord.kps;
cv::Mat x(cv::Size(128, 21), CV_32FC1, out_data_x);
cv::Mat y(cv::Size(128, 21), CV_32FC1, out_data_y);
for (int i = 0; i < 21; i++) {
int loc_x;
int loc_y;
argmax(&out_data_x[i * 128], 128, &loc_x);
argmax(&out_data_y[i * 128], 128, &loc_y);
xrt_vec2 loc;
loc.x = loc_x;
loc.y = loc_y;
loc = transformVecBy2x3(loc, go_back);
px_coord.kps[i] = loc;
tan_space.kps[i] = raycoord(info->view, loc);
}
if (htd->debug_scribble) {
for (int finger = 0; finger < 5; finger++) {
cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y};
for (int joint = 0; joint < 4; joint++) {
cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x,
(int)keypoints_global[1 + finger * 4 + joint].y};
cv::line(debug, last, the_new, colors[info->hand_idx]);
last = the_new;
}
}
for (int i = 0; i < 21; i++) {
xrt_vec2 loc = keypoints_global[i];
handDot(debug, loc, 2, (float)(i) / 21.0, 1, 2);
}
}
wrap->api->ReleaseValue(output_tensor[0]);
wrap->api->ReleaseValue(output_tensor[1]);
}
static void
init_keypoint_estimation_new(HandTracking *htd, onnx_wrap *wrap)
{
std::filesystem::path path = htd->models_folder;
path /= "grayscale_keypoint_new.onnx";
// input_names = {"input_image_grayscale"};
wrap->input_name = "inputImg";
wrap->input_shape[0] = 1;
wrap->input_shape[1] = 1;
wrap->input_shape[2] = 128;
wrap->input_shape[3] = 128;
wrap->api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
OrtSessionOptions *opts = nullptr;
ORT(CreateSessionOptions(&opts));
// TODO review options, config for threads?
ORT(SetSessionGraphOptimizationLevel(opts, ORT_ENABLE_ALL));
ORT(SetIntraOpNumThreads(opts, 1));
ORT(CreateEnv(ORT_LOGGING_LEVEL_FATAL, "monado_ht", &wrap->env));
ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &wrap->meminfo));
// HT_DEBUG(this->device, "Loading hand detection model from file '%s'", path.c_str());
ORT(CreateSession(wrap->env, path.c_str(), opts, &wrap->session));
assert(wrap->session != NULL);
size_t input_size = wrap->input_shape[0] * wrap->input_shape[1] * wrap->input_shape[2] * wrap->input_shape[3];
wrap->data = (float *)malloc(input_size * sizeof(float));
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
wrap->data, //
input_size * sizeof(float), //
wrap->input_shape, //
4, //
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
&wrap->tensor));
assert(wrap->tensor);
int is_tensor;
ORT(IsTensor(wrap->tensor, &is_tensor));
assert(is_tensor);
wrap->api->ReleaseSessionOptions(opts);
}
void
run_keypoint_estimation_new(void *ptr)
{
XRT_TRACE_MARKER();
keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr;
onnx_wrap *wrap = &info->view->keypoint[info->hand_idx];
struct HandTracking *htd = info->view->htd;
// Factor out starting here
cv::Mat &debug = info->view->debug_out_to_this;
det_output *output = &info->view->det_outputs[info->hand_idx];
cv::Point2f src_tri[3];
cv::Point2f dst_tri[3];
// top-left
cv::Point2f center = {output->center.x, output->center.y};
cv::Point2f go_right = {output->size_px / 2, 0};
cv::Point2f go_down = {0, output->size_px / 2};
if (info->hand_idx == 1) {
go_right *= -1;
}
// top left
src_tri[0] = {center - go_down - go_right};
// bottom left
src_tri[1] = {center + go_down - go_right};
// top right
src_tri[2] = {center - go_down + go_right};
dst_tri[0] = {0, 0};
dst_tri[1] = {0, 128};
dst_tri[2] = {128, 0};
cv::Matx23f go_there = getAffineTransform(src_tri, dst_tri);
cv::Matx23f go_back = getAffineTransform(dst_tri, src_tri);
{
XRT_TRACE_IDENT(transforms);
cv::Mat data_128x128_uint8;
cv::warpAffine(info->view->run_model_on_this, data_128x128_uint8, go_there, cv::Size(128, 128),
cv::INTER_LINEAR);
cv::Mat data_128x128_float(cv::Size(128, 128), CV_32FC1, wrap->data, 128 * sizeof(float));
normalizeGrayscaleImage(data_128x128_uint8, data_128x128_float);
}
// Ending here
const char *output_names[2] = {"heatmap"};
OrtValue *output_tensor = nullptr;
{
XRT_TRACE_IDENT(model);
ORT(Run(wrap->session, nullptr, &wrap->input_name, &wrap->tensor, 1, output_names, 1, &output_tensor));
}
// To here
float *out_data = nullptr;
ORT(GetTensorMutableData(output_tensor, (void **)&out_data));
Hand2D &px_coord = info->view->keypoint_outputs[info->hand_idx].hand_px_coord;
Hand2D &tan_space = info->view->keypoint_outputs[info->hand_idx].hand_tan_space;
xrt_vec2 *keypoints_global = px_coord.kps;
size_t plane_size = 22 * 22;
for (int i = 0; i < 21; i++) {
float *data = &out_data[i * plane_size];
int out_idx = 0;
argmax(data, 22 * 22, &out_idx);
int row = out_idx / 22;
int col = out_idx % 22;
xrt_vec2 loc;
refine_center_of_distribution(data, col, row, 22, 22, &loc.x, &loc.y);
loc.x *= 128.0f / 22.0f;
loc.y *= 128.0f / 22.0f;
loc = transformVecBy2x3(loc, go_back);
px_coord.kps[i] = loc;
tan_space.kps[i] = raycoord(info->view, loc);
}
if (htd->debug_scribble) {
for (int finger = 0; finger < 5; finger++) {
cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y};
for (int joint = 0; joint < 4; joint++) {
cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x,
(int)keypoints_global[1 + finger * 4 + joint].y};
cv::line(debug, last, the_new, colors[info->hand_idx]);
last = the_new;
}
}
for (int i = 0; i < 21; i++) {
xrt_vec2 loc = keypoints_global[i];
handDot(debug, loc, 2, (float)(i) / 21.0, 1, 2);
}
}
wrap->api->ReleaseValue(output_tensor);
}
void
release_onnx_wrap(onnx_wrap *wrap)
{
wrap->api->ReleaseMemoryInfo(wrap->meminfo);
wrap->api->ReleaseSession(wrap->session);
wrap->api->ReleaseValue(wrap->tensor);
wrap->api->ReleaseEnv(wrap->env);
free(wrap->data);
}
#ifdef USE_NCNN
int
ncnn_extractor_input_wrap(ncnn_extractor_t ex, const char *name, const ncnn_mat_t mat)
{
XRT_TRACE_MARKER();
return ncnn_extractor_input(ex, name, mat);
}
int
ncnn_extractor_extract_wrap(ncnn_extractor_t ex, const char *name, ncnn_mat_t *mat)
{
XRT_TRACE_MARKER();
return ncnn_extractor_extract(ex, name, mat);
}
ncnn_mat_t
ncnn_mat_from_pixels_resize_wrap(const unsigned char *pixels,
int type,
int w,
int h,
int stride,
int target_width,
int target_height,
ncnn_allocator_t allocator)
{
XRT_TRACE_MARKER();
return ncnn_mat_from_pixels_resize(pixels, type, w, h, stride, target_width, target_height, allocator);
}
void
ncnn_mat_substract_mean_normalize_wrap(ncnn_mat_t mat, const float *mean_vals, const float *norm_vals)
{
XRT_TRACE_MARKER();
return ncnn_mat_substract_mean_normalize(mat, mean_vals, norm_vals);
}
void
run_hand_detection_ncnn(void *ptr)
{
ht_view *view = (ht_view *)ptr;
HandTracking *htd = view->htd;
onnx_wrap *wrap = &view->detection;
cv::Mat &data_400x640 = view->run_model_on_this;
ncnn_mat_t in = ncnn_mat_from_pixels_resize(view->run_model_on_this.data, NCNN_MAT_PIXEL_GRAY, 640, 400,
view->run_model_on_this.step[0], 320, 240, NULL);
const float norm_vals[3] = {1 / 255.0, 1 / 255.0, 1 / 255.0};
ncnn_mat_substract_mean_normalize_wrap(in, 0, norm_vals);
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_extractor_t ex = ncnn_extractor_create(htd->net);
ncnn_extractor_set_option(ex, opt);
ncnn_extractor_input_wrap(ex, "input_image_grayscale", in);
ncnn_mat_t out;
ncnn_extractor_extract_wrap(ex, "hand_locations_radii", &out);
#if 0
{
int out_dims = ncnn_mat_get_dims(out);
const int out_w = ncnn_mat_get_w(out);
const int out_h = ncnn_mat_get_h(out);
const int out_d = ncnn_mat_get_d(out);
const int out_c = ncnn_mat_get_c(out);
U_LOG_E("out: %d: %d %d %d %d", out_dims, out_w, out_h, out_d, out_c);
}
{
int out_dims = ncnn_mat_get_dims(in);
const int out_w = ncnn_mat_get_w(in);
const int out_h = ncnn_mat_get_h(in);
const int out_d = ncnn_mat_get_d(in);
const int out_c = ncnn_mat_get_c(in);
U_LOG_E("in: %d: %d %d %d %d", out_dims, out_w, out_h, out_d, out_c);
}
#endif
const float *out_data = (const float *)ncnn_mat_get_data(out);
size_t plane_size = 80 * 60;
cv::Scalar colors[2] = {{255, 255, 0}, {255, 0, 0}};
for (int hand_idx = 0; hand_idx < 2; hand_idx++) {
const float *this_side_data = out_data + hand_idx * plane_size * 2;
int max_idx;
det_output *output = &view->det_outputs[hand_idx];
output->found = argmax(this_side_data, 4800, &max_idx) && this_side_data[max_idx] > 0.3;
if (output->found) {
int row = max_idx / 80;
int col = max_idx % 80;
output->size_px = average_size(this_side_data + plane_size, this_side_data, col, row, 80, 60);
// output->size_px *= 640 * 1.6;
// output->size_px *= 640 * 1.8;
output->size_px *= 640 * 2.0;
float size = output->size_px;
float refined_x, refined_y;
refine_center_of_distribution(this_side_data, col, row, 80, 60, &refined_x, &refined_y);
output->center.x = refined_x * 8.0;
output->center.y = refined_y * 6.6666666666666666667;
// cv::Point2i pt(refined_x * 16.0 * .5, refined_y * 13.333333333333334 * .5);
cv::Point2i pt(output->center.x, output->center.y);
cv::Mat &debug_frame = view->debug_out_to_this;
cv::rectangle(debug_frame, cv::Rect(pt - cv::Point2i(size / 2, size / 2), cv::Size(size, size)),
colors[hand_idx], 1);
}
}
ncnn_extractor_destroy(ex);
ncnn_mat_destroy(in);
ncnn_mat_destroy(out);
}
void
run_keypoint_estimation_new_ncnn(void *ptr)
{
XRT_TRACE_MARKER();
keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr;
struct HandTracking *htd = info->view->htd;
// Factor out starting here
cv::Mat &debug = info->view->debug_out_to_this;
det_output *output = &info->view->det_outputs[info->hand_idx];
cv::Point2f src_tri[3];
cv::Point2f dst_tri[3];
// top-left
cv::Point2f center = {output->center.x, output->center.y};
cv::Point2f go_right = {output->size_px / 2, 0};
cv::Point2f go_down = {0, output->size_px / 2};
if (info->hand_idx == 1) {
go_right *= -1;
}
// top left
src_tri[0] = {center - go_down - go_right};
// bottom left
src_tri[1] = {center + go_down - go_right};
// top right
src_tri[2] = {center - go_down + go_right};
dst_tri[0] = {0, 0};
dst_tri[1] = {0, 128};
dst_tri[2] = {128, 0};
cv::Matx23f go_there = getAffineTransform(src_tri, dst_tri);
cv::Matx23f go_back = getAffineTransform(dst_tri, src_tri);
XRT_TRACE_IDENT(transforms);
cv::Mat data_128x128_uint8;
cv::warpAffine(info->view->run_model_on_this, data_128x128_uint8, go_there, cv::Size(128, 128),
cv::INTER_LINEAR);
// cv::Mat data_128x128_float(cv::Size(128, 128), CV_32FC1);
// normalizeGrayscaleImage(data_128x128_uint8, data_128x128_float);
ncnn_mat_t in = ncnn_mat_from_pixels(data_128x128_uint8.data, NCNN_MAT_PIXEL_GRAY, 128, 128,
data_128x128_uint8.step[0], NULL);
const float norm_vals[3] = {1 / 255.0, 1 / 255.0, 1 / 255.0};
ncnn_mat_substract_mean_normalize_wrap(in, 0, norm_vals);
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_extractor_t ex = ncnn_extractor_create(htd->net_keypoint);
ncnn_extractor_set_option(ex, opt);
ncnn_extractor_input_wrap(ex, "inputImg", in);
ncnn_mat_t out;
ncnn_extractor_extract_wrap(ex, "heatmap", &out);
// Ending here
const float *out_data = (const float *)ncnn_mat_get_data(out);
Hand2D &px_coord = info->view->keypoint_outputs[info->hand_idx].hand_px_coord;
Hand2D &tan_space = info->view->keypoint_outputs[info->hand_idx].hand_tan_space;
xrt_vec2 *keypoints_global = px_coord.kps;
size_t plane_size = 22 * 22;
for (int i = 0; i < 21; i++) {
const float *data = &out_data[i * plane_size];
int out_idx = 0;
argmax(data, 22 * 22, &out_idx);
int row = out_idx / 22;
int col = out_idx % 22;
xrt_vec2 loc;
refine_center_of_distribution(data, col, row, 22, 22, &loc.x, &loc.y);
loc.x *= 128.0f / 22.0f;
loc.y *= 128.0f / 22.0f;
loc = transformVecBy2x3(loc, go_back);
px_coord.kps[i] = loc;
tan_space.kps[i] = raycoord(info->view, loc);
}
if (htd->debug_scribble) {
for (int finger = 0; finger < 5; finger++) {
cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y};
for (int joint = 0; joint < 4; joint++) {
cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x,
(int)keypoints_global[1 + finger * 4 + joint].y};
cv::line(debug, last, the_new, colors[info->hand_idx]);
last = the_new;
}
}
for (int i = 0; i < 21; i++) {
xrt_vec2 loc = keypoints_global[i];
handDot(debug, loc, 2, (float)(i) / 21.0, 1, 2);
}
}
ncnn_extractor_destroy(ex);
ncnn_mat_destroy(in);
ncnn_mat_destroy(out);
}
#endif
} // namespace xrt::tracking::hand::mercury

View file

@ -0,0 +1,533 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Mercury hand tracking main file.
* @author Jakob Bornecrantz <jakob@collabora.com>
* @author Moses Turner <jakob@collabora.com>
* @author Nick Klingensmith <programmerpichu@gmail.com>
* @ingroup tracking
*/
#include "hg_sync.hpp"
#include "hg_model.hpp"
namespace xrt::tracking::hand::mercury {
// Flags to tell state tracker that these are indeed valid joints
static const enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_relation_flags)(
XRT_SPACE_RELATION_ORIENTATION_VALID_BIT | XRT_SPACE_RELATION_ORIENTATION_TRACKED_BIT |
XRT_SPACE_RELATION_POSITION_VALID_BIT | XRT_SPACE_RELATION_POSITION_TRACKED_BIT);
static void
hgJointDisparityMath(struct HandTracking *htd, Hand2D *hand_in_left, Hand2D *hand_in_right, Hand3D *out_hand)
{
for (int i = 0; i < 21; i++) {
// Believe it or not, this is where the 3D stuff happens!
float t = htd->baseline / (hand_in_left->kps[i].x - hand_in_right->kps[i].x);
out_hand->kps[i].z = -t;
out_hand->kps[i].x = (hand_in_left->kps[i].x * t);
out_hand->kps[i].y = -hand_in_left->kps[i].y * t;
out_hand->kps[i].x += htd->baseline + (hand_in_right->kps[i].x * t);
out_hand->kps[i].y += -hand_in_right->kps[i].y * t;
out_hand->kps[i].x *= .5;
out_hand->kps[i].y *= .5;
}
}
/*!
* Setup helper functions.
*/
static bool
getCalibration(struct HandTracking *htd, t_stereo_camera_calibration *calibration)
{
xrt::auxiliary::tracking::StereoCameraCalibrationWrapper wrap(calibration);
xrt_vec3 trans = {(float)wrap.camera_translation_mat(0, 0), (float)wrap.camera_translation_mat(1, 0),
(float)wrap.camera_translation_mat(2, 0)};
htd->baseline = m_vec3_len(trans);
// Note, this assumes camera 0 is the left camera and camera 1 is the right camera.
// If you find one with the opposite arrangement, you'll need to invert htd->baseline, and look at
// hgJointDisparityMath
htd->use_fisheye = wrap.view[0].use_fisheye;
cv::Matx34d P1;
cv::Matx34d P2;
cv::Matx44d Q;
// We only want R1 and R2, we don't care about anything else
if (htd->use_fisheye) {
cv::fisheye::stereoRectify(wrap.view[0].intrinsics_mat, // cameraMatrix1
wrap.view[0].distortion_fisheye_mat, // distCoeffs1
wrap.view[1].intrinsics_mat, // cameraMatrix2
wrap.view[1].distortion_fisheye_mat, // distCoeffs2
wrap.view[0].image_size_pixels_cv, // imageSize*
wrap.camera_rotation_mat, // R
wrap.camera_translation_mat, // T
htd->views[0].rotate_camera_to_stereo_camera, // R1
htd->views[1].rotate_camera_to_stereo_camera, // R2
P1, // P1
P2, // P2
Q, // Q
0, // flags
cv::Size()); // newImageSize
} else {
cv::stereoRectify(wrap.view[0].intrinsics_mat, // cameraMatrix1
wrap.view[0].distortion_mat, // distCoeffs1
wrap.view[1].intrinsics_mat, // cameraMatrix2
wrap.view[1].distortion_mat, // distCoeffs2
wrap.view[0].image_size_pixels_cv, // imageSize*
wrap.camera_rotation_mat, // R
wrap.camera_translation_mat, // T
htd->views[0].rotate_camera_to_stereo_camera, // R1
htd->views[1].rotate_camera_to_stereo_camera, // R2
P1, // P1
P2, // P2
Q, // Q
0, // flags
-1.0f, // alpha
cv::Size(), // newImageSize
NULL, // validPixROI1
NULL); // validPixROI2
}
//* Good enough guess that view 0 and view 1 are the same size.
for (int i = 0; i < 2; i++) {
htd->views[i].cameraMatrix = wrap.view[i].intrinsics_mat;
if (htd->use_fisheye) {
htd->views[i].distortion = wrap.view[i].distortion_fisheye_mat;
} else {
htd->views[i].distortion = wrap.view[i].distortion_mat;
}
}
htd->one_view_size_px.w = wrap.view[0].image_size_pixels.w;
htd->one_view_size_px.h = wrap.view[0].image_size_pixels.h;
cv::Matx33d rotate_stereo_camera_to_left_camera = htd->views[0].rotate_camera_to_stereo_camera.inv();
xrt_matrix_3x3 s;
s.v[0] = rotate_stereo_camera_to_left_camera(0, 0);
s.v[1] = rotate_stereo_camera_to_left_camera(0, 1);
s.v[2] = rotate_stereo_camera_to_left_camera(0, 2);
s.v[3] = rotate_stereo_camera_to_left_camera(1, 0);
s.v[4] = rotate_stereo_camera_to_left_camera(1, 1);
s.v[5] = rotate_stereo_camera_to_left_camera(1, 2);
s.v[6] = rotate_stereo_camera_to_left_camera(2, 0);
s.v[7] = rotate_stereo_camera_to_left_camera(2, 1);
s.v[8] = rotate_stereo_camera_to_left_camera(2, 2);
xrt_quat tmp;
math_quat_from_matrix_3x3(&s, &tmp);
// Weird that I have to invert this quat, right? I think at some point - like probably just above this - I must
// have swapped row-major and col-major - remember, if you transpose a rotation matrix, you get its inverse.
// Doesn't matter that I don't understand - non-inverted looks definitely wrong, inverted looks definitely
// right.
math_quat_invert(&tmp, &htd->stereo_camera_to_left_camera);
return true;
}
static void
getModelsFolder(struct HandTracking *htd)
{
// Please bikeshed me on this! I don't know where is the best place to put this stuff!
#if 0
char exec_location[1024] = {};
readlink("/proc/self/exe", exec_location, 1024);
HT_DEBUG(htd, "Exec at %s\n", exec_location);
int end = 0;
while (exec_location[end] != '\0') {
HT_DEBUG(htd, "%d", end);
end++;
}
while (exec_location[end] != '/' && end != 0) {
HT_DEBUG(htd, "%d %c", end, exec_location[end]);
exec_location[end] = '\0';
end--;
}
strcat(exec_location, "../share/monado/hand-tracking-models/");
strcpy(htd->startup_config.model_slug, exec_location);
#else
const char *xdg_home = getenv("XDG_CONFIG_HOME");
const char *home = getenv("HOME");
if (xdg_home != NULL) {
strcpy(htd->models_folder, xdg_home);
} else if (home != NULL) {
strcpy(htd->models_folder, home);
} else {
assert(false);
}
strcat(htd->models_folder, "/.local/share/monado/hand-tracking-models/");
#endif
}
static void
applyThumbIndexDrag(Hand3D *hand)
{
// TERRIBLE HACK.
// Puts the thumb and pointer a bit closer together to be better at triggering XR clients' pinch detection.
static const float max_radius = 0.05;
static const float min_radius = 0.00;
// no min drag, min drag always 0.
static const float max_drag = 0.85f;
xrt_vec3 thumb = hand->kps[THMB_TIP];
xrt_vec3 index = hand->kps[INDX_TIP];
xrt_vec3 ttp = index - thumb;
float length = m_vec3_len(ttp);
if ((length > max_radius)) {
return;
}
float amount = math_map_ranges(length, min_radius, max_radius, max_drag, 0.0f);
hand->kps[THMB_TIP] = m_vec3_lerp(thumb, index, amount * 0.5f);
hand->kps[INDX_TIP] = m_vec3_lerp(index, thumb, amount * 0.5f);
}
static void
applyJointWidths(struct HandTracking *htd, struct xrt_hand_joint_set *set)
{
// Thanks to Nick Klingensmith for this idea
struct xrt_hand_joint_value *gr = set->values.hand_joint_set_default;
static const float finger_joint_size[5] = {0.022f, 0.021f, 0.022f, 0.021f, 0.02f};
static const float hand_finger_size[5] = {1.0f, 1.0f, 0.83f, 0.75f};
static const float thumb_size[4] = {0.016f, 0.014f, 0.012f, 0.012f};
float mul = 1.0f;
for (int i = XRT_HAND_JOINT_THUMB_METACARPAL; i <= XRT_HAND_JOINT_THUMB_TIP; i++) {
int j = i - XRT_HAND_JOINT_THUMB_METACARPAL;
gr[i].radius = thumb_size[j] * mul;
}
for (int finger = 0; finger < 4; finger++) {
for (int joint = 0; joint < 5; joint++) {
int set_idx = finger * 5 + joint + XRT_HAND_JOINT_INDEX_METACARPAL;
float val = finger_joint_size[joint] * hand_finger_size[finger] * .5 * mul;
gr[set_idx].radius = val;
}
}
// The radius of each joint is the distance from the joint to the skin in meters. -OpenXR spec.
set->values.hand_joint_set_default[XRT_HAND_JOINT_PALM].radius =
.032f * .5f; // Measured my palm thickness with calipers
set->values.hand_joint_set_default[XRT_HAND_JOINT_WRIST].radius =
.040f * .5f; // Measured my wrist thickness with calipers
}
/*
*
* Member functions.
*
*/
HandTracking::HandTracking()
{
this->base.process = &HandTracking::cCallbackProcess;
this->base.destroy = &HandTracking::cCallbackDestroy;
}
HandTracking::~HandTracking()
{
release_onnx_wrap(&this->views[0].keypoint[0]);
release_onnx_wrap(&this->views[0].keypoint[1]);
release_onnx_wrap(&this->views[0].detection);
release_onnx_wrap(&this->views[1].keypoint[0]);
release_onnx_wrap(&this->views[1].keypoint[1]);
release_onnx_wrap(&this->views[1].detection);
u_worker_group_reference(&this->group, NULL);
t_stereo_camera_calibration_reference(&this->calib, NULL);
free_kinematic_hand(&this->kinematic_hands[0]);
free_kinematic_hand(&this->kinematic_hands[1]);
u_var_remove_root((void *)&this->base);
u_frame_times_widget_teardown(&this->ft_widget);
}
void
HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync,
struct xrt_frame *left_frame,
struct xrt_frame *right_frame,
struct xrt_hand_joint_set *out_left_hand,
struct xrt_hand_joint_set *out_right_hand,
uint64_t *out_timestamp_ns)
{
XRT_TRACE_MARKER();
HandTracking *htd = (struct HandTracking *)ht_sync;
htd->current_frame_timestamp = left_frame->timestamp;
struct xrt_hand_joint_set *out_xrt_hands[2] = {out_left_hand, out_right_hand};
/*
* Setup views.
*/
assert(left_frame->width == right_frame->width);
assert(left_frame->height == right_frame->height);
const int full_height = left_frame->height;
const int full_width = left_frame->width*2;
const int view_width = htd->one_view_size_px.w;
const int view_height = htd->one_view_size_px.h;
assert(full_height == view_height);
htd->multiply_px_coord_for_undistort = 1.0f;
const cv::Size full_size = cv::Size(full_width, full_height);
const cv::Size view_size = cv::Size(view_width, view_height);
const cv::Point view_offsets[2] = {cv::Point(0, 0), cv::Point(view_width, 0)};
htd->views[0].run_model_on_this = cv::Mat(view_size, CV_8UC1, left_frame->data, left_frame->stride);
htd->views[1].run_model_on_this = cv::Mat(view_size, CV_8UC1, right_frame->data, right_frame->stride);
*out_timestamp_ns = htd->current_frame_timestamp; // No filtering, fine to do this now. Also just a reminder
// that this took you 2 HOURS TO DEBUG THAT ONE TIME.
htd->debug_scribble = u_sink_debug_is_active(&htd->debug_sink);
cv::Mat debug_output = {};
xrt_frame *debug_frame = nullptr;
if (htd->debug_scribble) {
u_frame_create_one_off(XRT_FORMAT_R8G8B8, full_width, full_height, &debug_frame);
debug_frame->timestamp = htd->current_frame_timestamp;
debug_output = cv::Mat(full_size, CV_8UC3, debug_frame->data, debug_frame->stride);
cv::cvtColor(htd->views[0].run_model_on_this, debug_output(cv::Rect(view_offsets[0], view_size)),
cv::COLOR_GRAY2BGR);
cv::cvtColor(htd->views[1].run_model_on_this, debug_output(cv::Rect(view_offsets[1], view_size)),
cv::COLOR_GRAY2BGR);
htd->views[0].debug_out_to_this = debug_output(cv::Rect(view_offsets[0], view_size));
htd->views[1].debug_out_to_this = debug_output(cv::Rect(view_offsets[1], view_size));
}
u_worker_group_push(htd->group, run_hand_detection, &htd->views[0]);
u_worker_group_push(htd->group, run_hand_detection, &htd->views[1]);
u_worker_group_wait_all(htd->group);
for (int hand_idx = 0; hand_idx < 2; hand_idx++) {
if (!(htd->views[0].det_outputs[hand_idx].found && htd->views[1].det_outputs[hand_idx].found)) {
// If we don't find this hand in *both* views
out_xrt_hands[hand_idx]->is_active = false;
continue;
}
out_xrt_hands[hand_idx]->is_active = true;
for (int view_idx = 0; view_idx < 2; view_idx++) {
struct keypoint_estimation_run_info &inf = htd->views[view_idx].run_info[hand_idx];
inf.view = &htd->views[view_idx];
inf.hand_idx = hand_idx;
u_worker_group_push(htd->group, htd->keypoint_estimation_run_func,
&htd->views[view_idx].run_info[hand_idx]);
}
}
u_worker_group_wait_all(htd->group);
for (int hand_idx = 0; hand_idx < 2; hand_idx++) {
if (!out_xrt_hands[hand_idx]->is_active) {
htd->last_frame_hand_detected[hand_idx] = false;
continue;
}
kine::kinematic_hand_4f *hand = htd->kinematic_hands[hand_idx];
if (!htd->last_frame_hand_detected[hand_idx]) {
kine::init_hardcoded_statics(hand, htd->hand_size / 100.0f);
}
Hand2D *hand_in_left_view = &htd->views[0].keypoint_outputs[hand_idx].hand_tan_space;
Hand2D *hand_in_right_view = &htd->views[1].keypoint_outputs[hand_idx].hand_tan_space;
Hand3D hand_3d;
struct xrt_hand_joint_set *put_in_set = out_xrt_hands[hand_idx];
applyThumbIndexDrag(&hand_3d);
applyJointWidths(htd, put_in_set);
hgJointDisparityMath(htd, hand_in_left_view, hand_in_right_view, &hand_3d);
kine::optimize_new_frame(hand_3d.kps, hand, put_in_set, hand_idx);
math_pose_identity(&put_in_set->hand_pose.pose);
switch (htd->output_space) {
case MERCURY_OUTPUT_SPACE_LEFT_CAMERA: {
put_in_set->hand_pose.pose.orientation = htd->stereo_camera_to_left_camera;
} break;
case MERCURY_OUTPUT_SPACE_CENTER_OF_STEREO_CAMERA: {
put_in_set->hand_pose.pose.orientation.w = 1.0;
put_in_set->hand_pose.pose.position.x = -htd->baseline / 2;
} break;
}
put_in_set->hand_pose.relation_flags = valid_flags_ht;
}
u_frame_times_widget_push_sample(&htd->ft_widget, htd->current_frame_timestamp);
if (htd->debug_scribble) {
u_sink_debug_push_frame(&htd->debug_sink, debug_frame);
xrt_frame_reference(&debug_frame, NULL);
}
}
void
HandTracking::cCallbackDestroy(t_hand_tracking_sync *ht_sync)
{
HandTracking *ht_ptr = &HandTracking::fromC(ht_sync);
delete ht_ptr;
}
} // namespace xrt::tracking::hand::mercury
/*
*
* 'Exported' functions.
*
*/
extern "C" t_hand_tracking_sync *
t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib, mercury_output_space output_space)
{
XRT_TRACE_MARKER();
auto htd = new xrt::tracking::hand::mercury::HandTracking();
// Setup logging first. We like logging.
htd->log_level = xrt::tracking::hand::mercury::debug_get_log_option_mercury_log();
bool use_simdr = xrt::tracking::hand::mercury::debug_get_bool_option_mercury_use_simdr_keypoint();
htd->output_space = output_space;
/*
* Get configuration
*/
assert(calib != NULL);
htd->calib = NULL;
// We have to reference it, getCalibration points at it.
t_stereo_camera_calibration_reference(&htd->calib, calib);
getCalibration(htd, calib);
getModelsFolder(htd);
#ifdef USE_NCNN
{
htd->net = ncnn_net_create();
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_net_set_option(htd->net, opt);
ncnn_net_load_param(htd->net, "/3/clones/ncnn/batch_size_2.param");
ncnn_net_load_model(htd->net, "/3/clones/ncnn/batch_size_2.bin");
}
{
htd->net_keypoint = ncnn_net_create();
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_net_set_option(htd->net_keypoint, opt);
ncnn_net_load_param(
htd->net_keypoint,
"/home/moses/.local/share/monado/hand-tracking-models/grayscale_keypoint_new.param");
ncnn_net_load_model(htd->net_keypoint,
"/home/moses/.local/share/monado/hand-tracking-models/grayscale_keypoint_new.bin");
}
#endif
htd->views[0].htd = htd;
htd->views[1].htd = htd; // :)
init_hand_detection(htd, &htd->views[0].detection);
init_hand_detection(htd, &htd->views[1].detection);
if (use_simdr) {
init_keypoint_estimation(htd, &htd->views[0].keypoint[0]);
init_keypoint_estimation(htd, &htd->views[0].keypoint[1]);
init_keypoint_estimation(htd, &htd->views[1].keypoint[0]);
init_keypoint_estimation(htd, &htd->views[1].keypoint[1]);
htd->keypoint_estimation_run_func = xrt::tracking::hand::mercury::run_keypoint_estimation;
} else {
init_keypoint_estimation_new(htd, &htd->views[0].keypoint[0]);
init_keypoint_estimation_new(htd, &htd->views[0].keypoint[1]);
init_keypoint_estimation_new(htd, &htd->views[1].keypoint[0]);
init_keypoint_estimation_new(htd, &htd->views[1].keypoint[1]);
htd->keypoint_estimation_run_func = xrt::tracking::hand::mercury::run_keypoint_estimation_new;
}
htd->views[0].view = 0;
htd->views[1].view = 1;
int num_threads = 4;
htd->pool = u_worker_thread_pool_create(num_threads - 1, num_threads);
htd->group = u_worker_group_create(htd->pool);
htd->hand_size = 9.5864;
xrt::tracking::hand::mercury::kine::alloc_kinematic_hand(&htd->kinematic_hands[0]);
xrt::tracking::hand::mercury::kine::alloc_kinematic_hand(&htd->kinematic_hands[1]);
u_frame_times_widget_init(&htd->ft_widget, 10.0f, 10.0f);
u_var_add_root(htd, "Camera-based Hand Tracker", true);
u_var_add_f32(htd, &htd->hand_size, "Hand size (Centimeters between wrist and middle-proximal joint)");
u_var_add_ro_f32(htd, &htd->ft_widget.fps, "FPS!");
u_var_add_f32_timing(htd, htd->ft_widget.debug_var, "times!");
u_var_add_sink_debug(htd, &htd->debug_sink, "i");
HT_DEBUG(htd, "Hand Tracker initialized!");
return &htd->base;
}

View file

@ -0,0 +1,238 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Mercury main header!
* @author Jakob Bornecrantz <jakob@collabora.com>
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#pragma once
#include "hg_interface.h"
#include "tracking/t_calibration_opencv.hpp"
#include "xrt/xrt_defines.h"
#include "xrt/xrt_frame.h"
#include "math/m_api.h"
#include "math/m_vec2.h"
#include "math/m_vec3.h"
#include "math/m_mathinclude.h"
#include "util/u_frame_times_widget.h"
#include "util/u_logging.h"
#include "util/u_sink.h"
#include "util/u_template_historybuf.hpp"
#include "util/u_worker.h"
#include "util/u_trace_marker.h"
#include "util/u_debug.h"
#include "util/u_frame.h"
#include "util/u_var.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <opencv2/opencv.hpp>
#include <core/session/onnxruntime_c_api.h>
#include "kine/kinematic_interface.hpp"
namespace xrt::tracking::hand::mercury {
using namespace xrt::auxiliary::util;
DEBUG_GET_ONCE_LOG_OPTION(mercury_log, "MERCURY_LOG", U_LOGGING_WARN)
DEBUG_GET_ONCE_BOOL_OPTION(mercury_use_simdr_keypoint, "MERCURY_USE_SIMDR_KEYPOINT", false)
#define HT_TRACE(htd, ...) U_LOG_IFL_T(htd->log_level, __VA_ARGS__)
#define HT_DEBUG(htd, ...) U_LOG_IFL_D(htd->log_level, __VA_ARGS__)
#define HT_INFO(htd, ...) U_LOG_IFL_I(htd->log_level, __VA_ARGS__)
#define HT_WARN(htd, ...) U_LOG_IFL_W(htd->log_level, __VA_ARGS__)
#define HT_ERROR(htd, ...) U_LOG_IFL_E(htd->log_level, __VA_ARGS__)
#undef USE_NCNN
// Forward declaration for ht_view
struct HandTracking;
struct ht_view;
enum Joint21
{
WRIST = 0,
THMB_MCP = 1,
THMB_PXM = 2,
THMB_DST = 3,
THMB_TIP = 4,
INDX_PXM = 5,
INDX_INT = 6,
INDX_DST = 7,
INDX_TIP = 8,
MIDL_PXM = 9,
MIDL_INT = 10,
MIDL_DST = 11,
MIDL_TIP = 12,
RING_PXM = 13,
RING_INT = 14,
RING_DST = 15,
RING_TIP = 16,
LITL_PXM = 17,
LITL_INT = 18,
LITL_DST = 19,
LITL_TIP = 20
};
// Using the compiler to stop me from getting 2D space mixed up with 3D space.
struct Hand2D
{
struct xrt_vec2 kps[21];
};
struct Hand3D
{
struct xrt_vec3 kps[21];
};
struct onnx_wrap
{
const OrtApi *api = nullptr;
OrtEnv *env = nullptr;
OrtMemoryInfo *meminfo = nullptr;
OrtSession *session = nullptr;
OrtValue *tensor = nullptr;
float *data;
int64_t input_shape[4];
const char *input_name;
};
struct det_output
{
xrt_vec2 center;
float size_px;
bool found;
};
struct keypoint_output
{
Hand2D hand_px_coord;
Hand2D hand_tan_space;
};
struct keypoint_estimation_run_info
{
ht_view *view;
bool hand_idx;
};
struct ht_view
{
HandTracking *htd;
onnx_wrap detection;
onnx_wrap keypoint[2];
int view;
cv::Mat distortion;
cv::Matx<double, 3, 3> cameraMatrix;
cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2
cv::Mat run_model_on_this;
cv::Mat debug_out_to_this;
struct det_output det_outputs[2]; // left, right
struct keypoint_estimation_run_info run_info[2]; // Stupid
struct keypoint_output keypoint_outputs[2];
};
/*!
* Main class of Mercury hand tracking.
*
* @ingroup aux_tracking
*/
struct HandTracking
{
public:
// Base thing, has to be first.
t_hand_tracking_sync base = {};
struct u_sink_debug debug_sink = {};
int multiply_px_coord_for_undistort;
bool use_fisheye;
enum mercury_output_space output_space;
struct t_stereo_camera_calibration *calib;
struct xrt_size one_view_size_px = {};
#ifdef USE_NCNN
ncnn_net_t net;
ncnn_net_t net_keypoint;
#endif
struct ht_view views[2] = {};
u_worker_thread_pool *pool;
u_worker_group *group;
float hand_size;
float baseline = {};
struct xrt_quat stereo_camera_to_left_camera = {};
uint64_t current_frame_timestamp = {}; // SUPER dumb.
// Change this whenever you want
volatile bool debug_scribble = true;
char models_folder[1024];
enum u_logging_level log_level = U_LOGGING_INFO;
kine::kinematic_hand_4f *kinematic_hands[2];
bool last_frame_hand_detected[2];
xrt_frame *debug_frame;
void (*keypoint_estimation_run_func)(void *);
u_frame_times_widget ft_widget;
public:
explicit HandTracking();
~HandTracking();
static inline HandTracking &
fromC(t_hand_tracking_sync *ht_sync)
{
return *reinterpret_cast<HandTracking *>(ht_sync);
}
static void
cCallbackProcess(struct t_hand_tracking_sync *ht_sync,
struct xrt_frame *left_frame,
struct xrt_frame *right_frame,
struct xrt_hand_joint_set *out_left_hand,
struct xrt_hand_joint_set *out_right_hand,
uint64_t *out_timestamp_ns);
static void
cCallbackDestroy(t_hand_tracking_sync *ht_sync);
};
} // namespace xrt::tracking::hand::mercury

View file

@ -0,0 +1,211 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Defines for kinematic model
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#pragma once
#include "math/m_api.h"
#include <stddef.h>
#include <unistd.h>
#include <Eigen/Core>
#include <Eigen/Geometry>
#include "math/m_eigen_interop.hpp"
#include "math/m_api.h"
#include "math/m_space.h"
#include "math/m_vec3.h"
#include "util/u_trace_marker.h"
#include "os/os_time.h"
#include "util/u_time.h"
#include <iostream>
#include <vector>
#include <stdio.h>
#include <stddef.h>
#include <unistd.h>
using namespace xrt::auxiliary::math;
namespace xrt::tracking::hand::mercury::kine {
// Not doing enum class, I *want* to allow implicit conversions.
namespace Joint21 {
enum Joint21
{
WRIST = 0,
THMB_MCP = 1,
THMB_PXM = 2,
THMB_DST = 3,
THMB_TIP = 4,
INDX_PXM = 5,
INDX_INT = 6,
INDX_DST = 7,
INDX_TIP = 8,
MIDL_PXM = 9,
MIDL_INT = 10,
MIDL_DST = 11,
MIDL_TIP = 12,
RING_PXM = 13,
RING_INT = 14,
RING_DST = 15,
RING_TIP = 16,
LITL_PXM = 17,
LITL_INT = 18,
LITL_DST = 19,
LITL_TIP = 20
};
}
enum class HandJoint26KP : int
{
PALM = 0,
WRIST = 1,
THUMB_METACARPAL = 2,
THUMB_PROXIMAL = 3,
THUMB_DISTAL = 4,
THUMB_TIP = 5,
INDEX_METACARPAL = 6,
INDEX_PROXIMAL = 7,
INDEX_INTERMEDIATE = 8,
INDEX_DISTAL = 9,
INDEX_TIP = 10,
MIDDLE_METACARPAL = 11,
MIDDLE_PROXIMAL = 12,
MIDDLE_INTERMEDIATE = 13,
MIDDLE_DISTAL = 14,
MIDDLE_TIP = 15,
RING_METACARPAL = 16,
RING_PROXIMAL = 17,
RING_INTERMEDIATE = 18,
RING_DISTAL = 19,
RING_TIP = 20,
LITTLE_METACARPAL = 21,
LITTLE_PROXIMAL = 22,
LITTLE_INTERMEDIATE = 23,
LITTLE_DISTAL = 24,
LITTLE_TIP = 25,
};
enum HandFinger
{
HF_THUMB = 0,
HF_INDEX = 1,
HF_MIDDLE = 2,
HF_RING = 3,
HF_LITTLE = 4,
};
enum FingerBone
{
// FB_CARPAL,
FB_METACARPAL,
FB_PROXIMAL,
FB_INTERMEDIATE,
FB_DISTAL
};
enum ThumbBone
{
// TB_CARPAL,
TB_METACARPAL,
TB_PROXIMAL,
TB_DISTAL
};
struct wct_t
{
float waggle;
float curl;
float twist;
};
// IGNORE THE FIRST BONE in the wrist.
struct bone_t
{
// will always be 0, 0, -(some amount) for mcp, pxm, int, dst
// will be random amounts for carpal bones
Eigen::Vector3f trans_from_last_joint;
wct_t rot_to_next_joint_wct;
Eigen::Quaternionf rot_to_next_joint_quat;
// Translation from last joint to this joint, rotation that takes last joint's -z and points it at next joint
Eigen::Affine3f bone_relation;
// Imagine it like transforming an object at the origin to this bone's position/orientation.
Eigen::Affine3f world_pose;
struct
{
Eigen::Affine3f *world_pose;
Eigen::Affine3f *bone_relation;
} parent;
wct_t joint_limit_min;
wct_t joint_limit_max;
// What keypoint out of the ML model does this correspond to?
Joint21::Joint21 keypoint_idx_21;
// float static_weight
// float model_confidence_weight ?
};
// translation: wrist to mcp (-z and x). rotation: from wrist space to metacarpal space
// translation: mcp to pxm (just -z). rotation: from mcp to pxm space
struct finger_t
{
bone_t bones[5];
};
typedef struct kinematic_hand_4f
{
// The distance from the wrist to the middle-proximal joint - this sets the overall hand size.
float size;
// Wrist pose, scaled by size.
Eigen::Affine3f wrist_relation;
finger_t fingers[5];
xrt_vec3 t_jts[21];
Eigen::Matrix<float, 3, 21> t_jts_as_mat;
Eigen::Matrix<float, 3, 21> kinematic;
// // Pointers to the locations of
// struct xrt_vec3 *loc_ptrs[21];
} kinematic_hand_4f;
#define CONTINUE_IF_HIDDEN_THUMB \
if (finger_idx == 0 && bone_idx == 0) { \
continue; \
}
} // namespace xrt::tracking::hand::mercury::kine

View file

@ -0,0 +1,230 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Setter-upper for kinematic model
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#pragma once
#include "kinematic_defines.hpp"
#include "kinematic_tiny_math.hpp"
namespace xrt::tracking::hand::mercury::kine {
void
bone_update_quat_and_matrix(struct bone_t *bone)
{
wct_to_quat(bone->rot_to_next_joint_wct, (xrt_quat *)&bone->rot_to_next_joint_quat);
// To make sure that the bottom-right cell is 1.0f. You can do this somewhere else (just once) for speed
bone->bone_relation.setIdentity();
bone->bone_relation.translation() = bone->trans_from_last_joint;
bone->bone_relation.linear() = Eigen::Matrix3f(bone->rot_to_next_joint_quat);
}
void
eval_chain(std::vector<const Eigen::Affine3f *> &chain, Eigen::Affine3f &out)
{
out.setIdentity();
for (const Eigen::Affine3f *new_matrix : chain) {
// I have NO IDEA if it's correct to left-multiply or right-multiply.
// I need to go to school for linear algebra for a lot longer...
out = out * (*new_matrix);
}
}
void
_statics_init_world_parents(kinematic_hand_4f *hand)
{
for (int finger = 0; finger < 5; finger++) {
finger_t *of = &hand->fingers[finger];
of->bones[0].parent.world_pose = &hand->wrist_relation;
// Does this make any sense? Be careful here...
of->bones[0].parent.bone_relation = &hand->wrist_relation;
for (int bone = 1; bone < 5; bone++) {
of->bones[bone].parent.world_pose = &of->bones[bone - 1].world_pose;
of->bones[bone].parent.bone_relation = &of->bones[bone - 1].bone_relation;
}
}
}
void
_statics_init_world_poses(kinematic_hand_4f *hand)
{
XRT_TRACE_MARKER();
for (int finger = 0; finger < 5; finger++) {
finger_t *of = &hand->fingers[finger];
for (int bone = 0; bone < 5; bone++) {
of->bones[bone].world_pose =
(*of->bones[bone].parent.world_pose) * of->bones[bone].bone_relation;
}
}
}
void
_statics_init_loc_ptrs(kinematic_hand_4f *hand)
{
hand->fingers[0].bones[1].keypoint_idx_21 = Joint21::THMB_MCP;
hand->fingers[0].bones[2].keypoint_idx_21 = Joint21::THMB_PXM;
hand->fingers[0].bones[3].keypoint_idx_21 = Joint21::THMB_DST;
hand->fingers[0].bones[4].keypoint_idx_21 = Joint21::THMB_TIP;
hand->fingers[1].bones[1].keypoint_idx_21 = Joint21::INDX_PXM;
hand->fingers[1].bones[2].keypoint_idx_21 = Joint21::INDX_INT;
hand->fingers[1].bones[3].keypoint_idx_21 = Joint21::INDX_DST;
hand->fingers[1].bones[4].keypoint_idx_21 = Joint21::INDX_TIP;
hand->fingers[2].bones[1].keypoint_idx_21 = Joint21::MIDL_PXM;
hand->fingers[2].bones[2].keypoint_idx_21 = Joint21::MIDL_INT;
hand->fingers[2].bones[3].keypoint_idx_21 = Joint21::MIDL_DST;
hand->fingers[2].bones[4].keypoint_idx_21 = Joint21::MIDL_TIP;
hand->fingers[3].bones[1].keypoint_idx_21 = Joint21::RING_PXM;
hand->fingers[3].bones[2].keypoint_idx_21 = Joint21::RING_INT;
hand->fingers[3].bones[3].keypoint_idx_21 = Joint21::RING_DST;
hand->fingers[3].bones[4].keypoint_idx_21 = Joint21::RING_TIP;
hand->fingers[4].bones[1].keypoint_idx_21 = Joint21::LITL_PXM;
hand->fingers[4].bones[2].keypoint_idx_21 = Joint21::LITL_INT;
hand->fingers[4].bones[3].keypoint_idx_21 = Joint21::LITL_DST;
hand->fingers[4].bones[4].keypoint_idx_21 = Joint21::LITL_TIP;
}
void
_statics_joint_limits(kinematic_hand_4f *hand)
{
{
finger_t *t = &hand->fingers[0];
t->bones[1].joint_limit_max.waggle = rad(70);
t->bones[1].joint_limit_min.waggle = rad(-70);
t->bones[1].joint_limit_max.curl = rad(70);
t->bones[1].joint_limit_min.curl = rad(-70);
t->bones[1].joint_limit_max.twist = rad(40);
t->bones[1].joint_limit_min.twist = rad(-40);
//
t->bones[2].joint_limit_max.waggle = rad(0);
t->bones[2].joint_limit_min.waggle = rad(0);
t->bones[2].joint_limit_max.curl = rad(50);
t->bones[2].joint_limit_min.curl = rad(-100);
t->bones[2].joint_limit_max.twist = rad(0);
t->bones[2].joint_limit_min.twist = rad(0);
//
t->bones[3].joint_limit_max.waggle = rad(0);
t->bones[3].joint_limit_min.waggle = rad(0);
t->bones[3].joint_limit_max.curl = rad(50);
t->bones[3].joint_limit_min.curl = rad(-100);
t->bones[3].joint_limit_max.twist = rad(0);
t->bones[3].joint_limit_min.twist = rad(0);
}
}
// Exported:
void
init_hardcoded_statics(kinematic_hand_4f *hand, float size)
{
memset(hand, 0, sizeof(kinematic_hand_4f));
hand->size = size;
hand->wrist_relation.setIdentity();
hand->wrist_relation.linear() *= hand->size;
{
finger_t *t = &hand->fingers[0];
// Hidden extra bone that makes our code easier to write. Note the weird extra rotation.
t->bones[0].rot_to_next_joint_wct = {-rad(45), rad(-10), -rad(70)};
t->bones[0].trans_from_last_joint = {0.33097, 0, -0.25968};
t->bones[1].rot_to_next_joint_wct = {0, rad(-5), 0};
t->bones[2].rot_to_next_joint_wct = {0, rad(-25), 0};
t->bones[2].trans_from_last_joint.z() = -0.389626;
t->bones[3].rot_to_next_joint_wct = {0, rad(-25), 0};
t->bones[3].trans_from_last_joint.z() = -0.311176;
t->bones[4].trans_from_last_joint.z() = -0.232195;
}
float wagg = -0.19;
float finger_joints[4][3] = {
{
-0.365719,
-0.231581,
-0.201790,
},
{
-0.404486,
-0.247749,
-0.210121,
},
{
-0.365639,
-0.225666,
-0.187089,
},
{
-0.278197,
-0.176178,
-0.157566,
},
};
for (int finger = HF_INDEX; finger <= HF_LITTLE; finger++) {
finger_t *of = &hand->fingers[finger];
of->bones[0].rot_to_next_joint_wct.waggle = wagg;
wagg += 0.19f;
of->bones[FB_PROXIMAL].rot_to_next_joint_wct.curl = rad(-5);
of->bones[FB_INTERMEDIATE].rot_to_next_joint_wct.curl = rad(-5);
of->bones[FB_DISTAL].rot_to_next_joint_wct.curl = rad(-5);
for (int i = 0; i < 3; i++) {
int bone = i + 2;
of->bones[bone].trans_from_last_joint.z() = finger_joints[finger - 1][i];
}
}
hand->fingers[1].bones[1].trans_from_last_joint.z() = -0.66;
hand->fingers[2].bones[1].trans_from_last_joint.z() = -0.645;
hand->fingers[3].bones[1].trans_from_last_joint.z() = -0.58;
hand->fingers[4].bones[1].trans_from_last_joint.z() = -0.52;
hand->fingers[HF_INDEX].bones[0].trans_from_last_joint = {0.16926, 0, -0.34437};
hand->fingers[HF_MIDDLE].bones[0].trans_from_last_joint = {0.034639, 0, -0.35573};
hand->fingers[HF_RING].bones[0].trans_from_last_joint = {-0.063625, 0, -0.34164};
hand->fingers[HF_LITTLE].bones[0].trans_from_last_joint = {-0.1509, 0, -0.30373};
for (int finger_idx = 0; finger_idx < 5; finger_idx++) {
for (int bone_idx = 0; bone_idx < 5; bone_idx++) {
bone_update_quat_and_matrix(&hand->fingers[finger_idx].bones[bone_idx]);
}
}
_statics_init_world_parents(hand);
_statics_init_world_poses(hand);
_statics_init_loc_ptrs(hand);
_statics_joint_limits(hand);
}
} // namespace xrt::tracking::hand::mercury::kine

View file

@ -0,0 +1,34 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Interface for kinematic model
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#pragma once
#include "math/m_api.h"
#include "xrt/xrt_defines.h"
namespace xrt::tracking::hand::mercury::kine {
struct kinematic_hand_4f;
void
alloc_kinematic_hand(kinematic_hand_4f **out_kinematic_hand);
void
optimize_new_frame(xrt_vec3 model_joints_3d[21],
kinematic_hand_4f *hand,
struct xrt_hand_joint_set *out_set,
int hand_idx);
void
init_hardcoded_statics(kinematic_hand_4f *hand, float size = 0.095864);
void
free_kinematic_hand(kinematic_hand_4f **kinematic_hand);
} // namespace xrt::tracking::hand::mercury::kine

View file

@ -0,0 +1,473 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Main code for kinematic model
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#include "kinematic_defines.hpp"
#include "kinematic_tiny_math.hpp"
#include "kinematic_hand_init.hpp"
#include <Eigen/Core>
#include <Eigen/LU>
#include <Eigen/SVD>
#include <Eigen/src/Geometry/Umeyama.h>
namespace xrt::tracking::hand::mercury::kine {
static void
_two_set_ele(Eigen::Matrix<float, 3, 21> &thing, Eigen::Affine3f jt, int idx)
{
// Stupid and slow
thing.col(idx) = jt.translation();
}
static void
two(struct kinematic_hand_4f *hand)
{
XRT_TRACE_MARKER();
int i = 0;
_two_set_ele(hand->kinematic, hand->wrist_relation, i);
for (int finger_idx = 0; finger_idx < 5; finger_idx++) {
for (int bone_idx = 1; bone_idx < 5; bone_idx++) {
i++;
_two_set_ele(hand->kinematic, hand->fingers[finger_idx].bones[bone_idx].world_pose, i);
}
}
Eigen::Affine3f aff3d;
aff3d = Eigen::umeyama(hand->kinematic, hand->t_jts_as_mat, false);
hand->wrist_relation = aff3d * hand->wrist_relation;
_statics_init_world_poses(hand);
}
#if 0
#define DOT_EPSILON 0.000001f
Eigen::Quaternionf
fast_simple_rotation(const Eigen::Vector3f &from_un, const Eigen::Vector3f &to_un)
{
// Slooowwwwwww....
Eigen::Vector3f from = from_un.normalized();
Eigen::Vector3f to = to_un.normalized();
Eigen::Vector3f axis = from.cross(to);
float dot = from.dot(to);
if (dot < (-1.0f + DOT_EPSILON)) {
return Eigen::Quaternionf(0, 1, 0, 0);
};
Eigen::Quaternionf result(axis.x() * 0.5f, axis.y() * 0.5f, axis.z() * 0.5f, (dot + 1.0f) * 0.5f);
result.normalize();
return result;
}
Eigen::Matrix3f
moses_fast_simple_rotation(const Eigen::Vector3f &from_un, const Eigen::Vector3f &to_un)
{
// Slooowwwwwww....
Eigen::Vector3f from = from_un.normalized();
Eigen::Vector3f to = to_un.normalized();
Eigen::Vector3f axis = from.cross(to).normalized();
float angle = acos(from.dot(to));
// U_LOG_E("HERE %f", angle);
// std::cout << to << std::endl;
// std::cout << from << std::endl;
// std::cout << axis << std::endl;
// std::cout << angle << std::endl;
Eigen::AngleAxisf ax;
ax.axis() = axis;
ax.angle() = angle;
return ax.matrix();
}
#endif
static void
do_it_for_bone(struct kinematic_hand_4f *hand, int finger_idx, int bone_idx, bool clamp_to_x_axis_rotation)
{
finger_t *of = &hand->fingers[finger_idx];
bone_t *bone = &hand->fingers[finger_idx].bones[bone_idx];
int num_children = 0;
Eigen::Vector3f kine = Eigen::Vector3f::Zero();
Eigen::Vector3f target = Eigen::Vector3f::Zero();
for (int idx = bone_idx + 1; idx < 5; idx++) {
num_children++;
target += map_vec3(hand->t_jts[of->bones[idx].keypoint_idx_21]);
kine += of->bones[idx].world_pose.translation();
}
kine *= 1.0f / (float)num_children;
target *= 1.0f / (float)num_children;
kine = bone->world_pose.inverse() * kine;
target = bone->world_pose.inverse() * target;
kine.normalize();
target.normalize();
Eigen::Matrix3f rot = Eigen::Quaternionf().setFromTwoVectors(kine, target).matrix();
bone->bone_relation.linear() = bone->bone_relation.linear() * rot;
}
static void
clamp_to_x_axis(struct kinematic_hand_4f *hand,
int finger_idx,
int bone_idx,
bool clamp_angle = false,
float min_angle = 0,
float max_angle = 0)
{
bone_t *bone = &hand->fingers[finger_idx].bones[bone_idx];
// U_LOG_E("before_anything");
// std::cout << bone->bone_relation.linear().matrix() << std::endl;
Eigen::Vector3f new_x = bone->bone_relation.linear() * Eigen::Vector3f::UnitX();
Eigen::Matrix3f correction_rot =
Eigen::Quaternionf().setFromTwoVectors(new_x.normalized(), Eigen::Vector3f::UnitX()).matrix();
// U_LOG_E("correction");
// std::cout << correction_rot << std::endl;
// U_LOG_E("correction times new_x");
// std::cout << correction_rot * new_x << "\n";
// U_LOG_E("before, where does relation point x");
// std::cout << bone->bone_relation.rotation() * Eigen::Vector3f::UnitX() << "\n";
// Weird that we're left-multiplying here, I don't know why. But it works.
bone->bone_relation.linear() = correction_rot * bone->bone_relation.linear();
// U_LOG_E("after_correction");
// std::cout << bone->bone_relation.linear() << std::endl;
// U_LOG_E("after, where does relation point x");
// std::cout << bone->bone_relation.linear() * Eigen::Vector3f::UnitX() << "\n";
if (clamp_angle) {
//!@optimize: get rid of 1 and 2, we only need 0.
// signed angle: asin(Cross product of -z and rot*-z X axis.
// U_LOG_E("before X clamp");
// std::cout << bone->bone_relation.linear() << "\n";
// auto euler = bone->bone_relation.linear().eulerAngles(0, 1, 2);
Eigen::Vector3f cross =
(-Eigen::Vector3f::UnitZ()).cross(bone->bone_relation.linear() * (-Eigen::Vector3f::UnitZ()));
// std::cout << bone->bone_relation.linear() << "\n";
// U_LOG_E("eluer before clamp: %f %f %f %f %f", min_angle, max_angle, euler(0), euler(1), euler(2));
// U_LOG_E("eluer before clamp: %f %f %f %f %f %f", euler(0), euler(1), euler(2), cross(0), cross(1) ,
// cross(2));
//!@optimize Move the asin into constexpr land
// No, the sine of the joint limit
float rotation_value = asin(cross(0));
//!@bug No. If the rotation value is outside of the allowed values, clamp it to the rotation it's *closest to*.
// That's different than using clamp, rotation formalisms are complicated.
clamp(&rotation_value, min_angle, max_angle);
// U_LOG_E("eluer after clamp: %f", rotation_value);
Eigen::Matrix3f n;
n = Eigen::AngleAxisf(rotation_value, Eigen::Vector3f::UnitX());
bone->bone_relation.linear() = n;
// U_LOG_E("after X clamp");
// std::cout << n << "\n";
}
}
// Is this not just swing-twist about the Z axis? Dunnoooo... Find out later.
static void
clamp_proximals(struct kinematic_hand_4f *hand,
int finger_idx,
int bone_idx,
float max_swing_angle = 0,
float tanangle_left = tan(rad(-20)),
float tanangle_right = tan(rad(20)),
float tanangle_curled = tan(rad(-89)), // Uh oh...
float tanangle_uncurled = tan(rad(30)))
{
bone_t *bone = &hand->fingers[finger_idx].bones[bone_idx];
Eigen::Matrix3f rot = bone->bone_relation.linear();
// U_LOG_E("rot");
// std::cout << rot << "\n";
Eigen::Vector3f our_z = rot * (-Eigen::Vector3f::UnitZ());
Eigen::Matrix3f simple = Eigen::Quaternionf().setFromTwoVectors(-Eigen::Vector3f::UnitZ(), our_z).matrix();
// U_LOG_E("simple");
// std::cout << simple << "\n";
Eigen::Matrix3f twist = rot * simple.inverse();
// U_LOG_E("twist");
// std::cout << twist << "\n";
// U_LOG_E("twist_axis");
Eigen::AngleAxisf twist_aax = Eigen::AngleAxisf(twist);
// std::cout << twist_aax.axis() << "\n";
// U_LOG_E("twist_angle");
// std::cout << twist_aax.angle() << "\n";
// U_LOG_E("all together now");
// std::cout << twist * simple << "\n";
if (fabs(twist_aax.angle()) > max_swing_angle) {
// max_swing_angle times +1 or -1, depending.
twist_aax.angle() = max_swing_angle * (twist_aax.angle() / fabs(twist_aax.angle()));
// U_LOG_E("clamping twist %f", twist_aax.angle());
// std::cout << twist << "\n";
// std::cout << twist_aax.toRotationMatrix() << "\n";
}
if (our_z.z() > 0) {
//!@bug We need smarter joint limiting, limiting via tanangles is not acceptable as joints can rotate
//! outside of the 180 degree hemisphere.
our_z.z() = -0.000001f;
}
our_z *= -1.0f / our_z.z();
clamp(&our_z.x(), tanangle_left, tanangle_right);
clamp(&our_z.y(), tanangle_curled, tanangle_uncurled);
simple = Eigen::Quaternionf().setFromTwoVectors(-Eigen::Vector3f::UnitZ(), our_z.normalized()).matrix();
bone->bone_relation.linear() = twist_aax * simple;
}
static void
do_it_for_finger(struct kinematic_hand_4f *hand, int finger_idx)
{
do_it_for_bone(hand, finger_idx, 0, false);
clamp_proximals(hand, finger_idx, 0, rad(4.0), tan(rad(-30)), tan(rad(30)), tan(rad(-10)), tan(rad(10)));
_statics_init_world_poses(hand);
do_it_for_bone(hand, finger_idx, 1, true);
clamp_proximals(hand, finger_idx, 1, rad(4.0));
_statics_init_world_poses(hand);
do_it_for_bone(hand, finger_idx, 2, true);
clamp_to_x_axis(hand, finger_idx, 2, true, rad(-90), rad(10));
_statics_init_world_poses(hand);
do_it_for_bone(hand, finger_idx, 3, true);
clamp_to_x_axis(hand, finger_idx, 3, true, rad(-90), rad(10));
_statics_init_world_poses(hand);
}
static void
optimize(kinematic_hand_4f *hand)
{
for (int i = 0; i < 15; i++) {
two(hand);
do_it_for_bone(hand, 0, 1, false);
clamp_proximals(hand, 0, 1, rad(70), tan(rad(-40)), tan(rad(40)), tan(rad(-40)), tan(rad(40)));
_statics_init_world_poses(hand);
do_it_for_bone(hand, 0, 2, true);
clamp_to_x_axis(hand, 0, 2, true, rad(-90), rad(40));
_statics_init_world_poses(hand);
do_it_for_bone(hand, 0, 3, true);
clamp_to_x_axis(hand, 0, 3, true, rad(-90), rad(40));
_statics_init_world_poses(hand);
two(hand);
do_it_for_finger(hand, 1);
do_it_for_finger(hand, 2);
do_it_for_finger(hand, 3);
do_it_for_finger(hand, 4);
}
two(hand);
}
static void
make_joint_at_matrix_left_hand(int idx, Eigen::Affine3f &pose, struct xrt_hand_joint_set *hand)
{
hand->values.hand_joint_set_default[idx].relation.relation_flags = (enum xrt_space_relation_flags)(
XRT_SPACE_RELATION_ORIENTATION_VALID_BIT | XRT_SPACE_RELATION_ORIENTATION_TRACKED_BIT |
XRT_SPACE_RELATION_POSITION_VALID_BIT | XRT_SPACE_RELATION_POSITION_TRACKED_BIT);
Eigen::Vector3f v = pose.translation();
hand->values.hand_joint_set_default[idx].relation.pose.position.x = v.x();
hand->values.hand_joint_set_default[idx].relation.pose.position.y = v.y();
hand->values.hand_joint_set_default[idx].relation.pose.position.z = v.z();
Eigen::Quaternionf q;
q = pose.rotation();
hand->values.hand_joint_set_default[idx].relation.pose.orientation.x = q.x();
hand->values.hand_joint_set_default[idx].relation.pose.orientation.y = q.y();
hand->values.hand_joint_set_default[idx].relation.pose.orientation.z = q.z();
hand->values.hand_joint_set_default[idx].relation.pose.orientation.w = q.w();
}
static void
make_joint_at_matrix_right_hand(int idx, Eigen::Affine3f &pose, struct xrt_hand_joint_set *hand)
{
hand->values.hand_joint_set_default[idx].relation.relation_flags = (enum xrt_space_relation_flags)(
XRT_SPACE_RELATION_ORIENTATION_VALID_BIT | XRT_SPACE_RELATION_ORIENTATION_TRACKED_BIT |
XRT_SPACE_RELATION_POSITION_VALID_BIT | XRT_SPACE_RELATION_POSITION_TRACKED_BIT);
Eigen::Vector3f v = pose.translation();
hand->values.hand_joint_set_default[idx].relation.pose.position.x = -v.x();
hand->values.hand_joint_set_default[idx].relation.pose.position.y = v.y();
hand->values.hand_joint_set_default[idx].relation.pose.position.z = v.z();
Eigen::Matrix3f rotation = pose.rotation();
Eigen::Matrix3f mirror_on_x = Eigen::Matrix3f::Identity();
mirror_on_x(0, 0) = -1;
rotation = mirror_on_x * rotation;
rotation(0, 0) *= -1;
rotation(1, 0) *= -1;
rotation(2, 0) *= -1;
Eigen::Quaternionf q;
q = rotation;
hand->values.hand_joint_set_default[idx].relation.pose.orientation.x = q.x();
hand->values.hand_joint_set_default[idx].relation.pose.orientation.y = q.y();
hand->values.hand_joint_set_default[idx].relation.pose.orientation.z = q.z();
hand->values.hand_joint_set_default[idx].relation.pose.orientation.w = q.w();
}
static void
make_joint_at_matrix(int idx, Eigen::Affine3f &pose, struct xrt_hand_joint_set *hand, int hand_idx)
{
if (hand_idx == 0) {
make_joint_at_matrix_left_hand(idx, pose, hand);
} else {
make_joint_at_matrix_right_hand(idx, pose, hand);
}
}
// Exported:
void
optimize_new_frame(xrt_vec3 model_joints_3d[21],
kinematic_hand_4f *hand,
struct xrt_hand_joint_set *out_set,
int hand_idx)
{
// intake poses!
for (int i = 0; i < 21; i++) {
if (hand_idx == 0) {
hand->t_jts[i] = model_joints_3d[i];
} else {
hand->t_jts[i].x = -model_joints_3d[i].x;
hand->t_jts[i].y = model_joints_3d[i].y;
hand->t_jts[i].z = model_joints_3d[i].z;
}
hand->t_jts_as_mat(0, i) = hand->t_jts[i].x;
hand->t_jts_as_mat(1, i) = hand->t_jts[i].y;
hand->t_jts_as_mat(2, i) = hand->t_jts[i].z;
}
// do the math!
optimize(hand);
// Convert it to xrt_hand_joint_set!
make_joint_at_matrix(XRT_HAND_JOINT_WRIST, hand->wrist_relation, out_set, hand_idx);
Eigen::Affine3f palm_relation;
palm_relation.linear() = hand->fingers[2].bones[0].world_pose.linear();
palm_relation.translation() = Eigen::Vector3f::Zero();
palm_relation.translation() += hand->fingers[2].bones[0].world_pose.translation() / 2;
palm_relation.translation() += hand->fingers[2].bones[1].world_pose.translation() / 2;
make_joint_at_matrix(XRT_HAND_JOINT_PALM, palm_relation, out_set, hand_idx);
int start = XRT_HAND_JOINT_THUMB_METACARPAL;
for (int finger_idx = 0; finger_idx < 5; finger_idx++) {
for (int bone_idx = 0; bone_idx < 5; bone_idx++) {
CONTINUE_IF_HIDDEN_THUMB;
if (!(finger_idx == 0 && bone_idx == 0)) {
make_joint_at_matrix(start++, hand->fingers[finger_idx].bones[bone_idx].world_pose,
out_set, hand_idx);
}
}
}
out_set->is_active = true;
}
void
alloc_kinematic_hand(kinematic_hand_4f **out_kinematic_hand) {
kinematic_hand_4f *h = new kinematic_hand_4f;
*out_kinematic_hand = h;
}
void
free_kinematic_hand(kinematic_hand_4f **kinematic_hand) {
delete *kinematic_hand;
}
} // namespace xrt::tracking::hand::mercury::kine

View file

@ -0,0 +1,55 @@
// Copyright 2022, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Math for kinematic model
* @author Moses Turner <moses@collabora.com>
* @ingroup tracking
*/
#pragma once
#include "kinematic_defines.hpp"
namespace xrt::tracking::hand::mercury::kine {
// Waggle-curl-twist.
static inline void
wct_to_quat(wct_t wct, struct xrt_quat *out)
{
XRT_TRACE_MARKER();
xrt_vec3 waggle_axis = {0, 1, 0};
xrt_quat just_waggle;
math_quat_from_angle_vector(wct.waggle, &waggle_axis, &just_waggle);
xrt_vec3 curl_axis = {1, 0, 0};
xrt_quat just_curl;
math_quat_from_angle_vector(wct.curl, &curl_axis, &just_curl);
xrt_vec3 twist_axis = {0, 0, 1};
xrt_quat just_twist;
math_quat_from_angle_vector(wct.twist, &twist_axis, &just_twist);
//!@optimize This should be a matrix multiplication...
*out = just_waggle;
math_quat_rotate(out, &just_curl, out);
math_quat_rotate(out, &just_twist, out);
}
// Inlines.
static inline float
rad(double degrees)
{
return degrees * (M_PI / 180.0);
}
static inline void
clamp(float *in, float min, float max)
{
*in = fminf(max, fmaxf(min, *in));
}
static inline void
clamp_to_r(float *in, float c, float r)
{
clamp(in, c - r, c + r);
}
} // namespace xrt::tracking::hand::mercury::kine