mirror of
https://gitlab.freedesktop.org/monado/monado.git
synced 2025-01-01 12:46:12 +00:00
d/ht: Switch to new get_hand_tracking signature and update tracking
This commit is contained in:
parent
322d5b8f2d
commit
5abd3b3570
|
@ -267,7 +267,7 @@ if(XRT_BUILD_DRIVER_HANDTRACKING)
|
|||
ht/templates/NaivePermutationSort.hpp
|
||||
)
|
||||
add_library(drv_ht STATIC ${HT_SOURCE_FILES})
|
||||
target_link_libraries(drv_ht PRIVATE xrt-interfaces aux_os aux_util aux_math ONNXRuntime::ONNXRuntime ${OpenCV_LIBRARIES})
|
||||
target_link_libraries(drv_ht PRIVATE xrt-interfaces aux_os aux_util aux_math aux_gstreamer ONNXRuntime::ONNXRuntime ${OpenCV_LIBRARIES})
|
||||
target_include_directories(drv_ht PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR})
|
||||
list(APPEND ENABLED_DRIVERS ht)
|
||||
endif()
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "cjson/cJSON.h"
|
||||
#include "math/m_filter_one_euro.h"
|
||||
#include "os/os_time.h"
|
||||
#include "util/u_frame.h"
|
||||
|
||||
#include "templates/NaivePermutationSort.hpp"
|
||||
|
@ -17,7 +20,9 @@
|
|||
#include "ht_models.hpp"
|
||||
#include "ht_hand_math.hpp"
|
||||
#include "ht_image_math.hpp"
|
||||
#include "util/u_time.h"
|
||||
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
|
||||
|
@ -42,8 +47,20 @@ htProcessJoint(struct ht_device *htd,
|
|||
static float
|
||||
errHistory2D(HandHistory2DBBox *past, Palm7KP *present)
|
||||
{
|
||||
return (m_vec2_len(*past->wrist_unfiltered[0] - present->kps[WRIST_7KP]) +
|
||||
m_vec2_len(*past->middle_unfiltered[0] - present->kps[MIDDLE_7KP]));
|
||||
if (!past->htAlgorithm_approves) {
|
||||
// U_LOG_E("Returning big number because htAlgorithm told me to!");
|
||||
return 100000000000000000000000000000.0f;
|
||||
}
|
||||
float sum_of_lengths = m_vec2_len(*past->wrist_unfiltered[0] - *past->middle_unfiltered[0]) +
|
||||
m_vec2_len(present->kps[WRIST_7KP] - present->kps[MIDDLE_7KP]);
|
||||
|
||||
float sum_of_distances = (m_vec2_len(*past->wrist_unfiltered[0] - present->kps[WRIST_7KP]) +
|
||||
m_vec2_len(*past->middle_unfiltered[0] - present->kps[MIDDLE_7KP]));
|
||||
|
||||
|
||||
float final = sum_of_distances / sum_of_lengths;
|
||||
|
||||
return final;
|
||||
}
|
||||
|
||||
static std::vector<Hand2D>
|
||||
|
@ -74,7 +91,7 @@ htImageToKeypoints(struct ht_view *htv)
|
|||
used_histories, used_detections,
|
||||
|
||||
history_indices, detection_indices, dontuse,
|
||||
errHistory2D);
|
||||
errHistory2D, 1.0f);
|
||||
|
||||
// Here's the trick - we use the associated bbox_filter to get an output but *never commit* the noisy 128x128
|
||||
// detection; instead later on we commit the (hopefully) nicer palm and wrist from the 224x224 keypoint
|
||||
|
@ -82,12 +99,14 @@ htImageToKeypoints(struct ht_view *htv)
|
|||
|
||||
// Add extra detections!
|
||||
for (size_t i = 0; i < used_detections.size(); i++) {
|
||||
if (used_detections[i] == false) {
|
||||
if ((used_detections[i] == false) && hand_detections[i].confidence > 0.65) {
|
||||
// Confidence to get in the door is 0.65, confidence to stay in is 0.3
|
||||
HandHistory2DBBox hist_new = {};
|
||||
m_filter_euro_vec2_init(&hist_new.m_filter_middle, FCMIN_BBOX, FCMIN_D_BB0X, BETA_BB0X);
|
||||
m_filter_euro_vec2_init(&hist_new.m_filter_wrist, FCMIN_BBOX, FCMIN_D_BB0X, BETA_BB0X);
|
||||
m_filter_euro_vec2_init(&hist_new.m_filter_center, FCMIN_BBOX_POSITION, FCMIN_D_BB0X_POSITION,
|
||||
BETA_BB0X_POSITION);
|
||||
m_filter_euro_vec2_init(&hist_new.m_filter_direction, FCMIN_BBOX_ORIENTATION,
|
||||
FCMIN_D_BB0X_ORIENTATION, BETA_BB0X_ORIENTATION);
|
||||
|
||||
// this leaks, on august 24
|
||||
htv->bbox_histories.push_back(hist_new);
|
||||
history_indices.push_back(htv->bbox_histories.size() - 1);
|
||||
detection_indices.push_back(i);
|
||||
|
@ -98,7 +117,9 @@ htImageToKeypoints(struct ht_view *htv)
|
|||
for (size_t i = 0; i < history_indices.size(); i++) {
|
||||
HandHistory2DBBox *hist_of_interest = &htv->bbox_histories[history_indices[i]];
|
||||
hist_of_interest->wrist_unfiltered.push(hand_detections[detection_indices[i]].kps[WRIST_7KP]);
|
||||
hist_of_interest->index_unfiltered.push(hand_detections[detection_indices[i]].kps[INDEX_7KP]);
|
||||
hist_of_interest->middle_unfiltered.push(hand_detections[detection_indices[i]].kps[MIDDLE_7KP]);
|
||||
hist_of_interest->pinky_unfiltered.push(hand_detections[detection_indices[i]].kps[LITTLE_7KP]);
|
||||
// Eh do the rest later
|
||||
}
|
||||
|
||||
|
@ -136,17 +157,23 @@ htImageToKeypoints(struct ht_view *htv)
|
|||
for (size_t i = 0; i < htv->bbox_histories.size(); i++) { //(BBoxHistory * entry : htv->bbox_histories) {
|
||||
HandHistory2DBBox *entry = &htv->bbox_histories[i];
|
||||
cv::Mat hand_rect = cv::Mat(224, 224, CV_8UC3);
|
||||
xrt_vec2 goodenough_middle;
|
||||
xrt_vec2 goodenough_wrist;
|
||||
|
||||
m_filter_euro_vec2_run_no_commit(&entry->m_filter_middle, htv->htd->current_frame_timestamp,
|
||||
entry->middle_unfiltered[0], &goodenough_middle);
|
||||
m_filter_euro_vec2_run_no_commit(&entry->m_filter_wrist, htv->htd->current_frame_timestamp,
|
||||
entry->wrist_unfiltered[0], &goodenough_wrist);
|
||||
|
||||
rotatedRectFromJoints(htv, goodenough_middle, goodenough_wrist, &blah[i]);
|
||||
xrt_vec2 unfiltered_middle;
|
||||
xrt_vec2 unfiltered_direction;
|
||||
|
||||
|
||||
centerAndRotationFromJoints(htv, entry->wrist_unfiltered[0], entry->index_unfiltered[0],
|
||||
entry->middle_unfiltered[0], entry->pinky_unfiltered[0], &unfiltered_middle,
|
||||
&unfiltered_direction);
|
||||
|
||||
xrt_vec2 filtered_middle;
|
||||
xrt_vec2 filtered_direction;
|
||||
|
||||
m_filter_euro_vec2_run_no_commit(&entry->m_filter_center, htv->htd->current_frame_timestamp,
|
||||
&unfiltered_middle, &filtered_middle);
|
||||
m_filter_euro_vec2_run_no_commit(&entry->m_filter_direction, htv->htd->current_frame_timestamp,
|
||||
&unfiltered_direction, &filtered_direction);
|
||||
|
||||
rotatedRectFromJoints(htv, filtered_middle, filtered_direction, &blah[i]);
|
||||
|
||||
warpAffine(raw_input, hand_rect, blah[i].warp_there, hand_rect.size());
|
||||
|
||||
|
@ -180,29 +207,39 @@ htImageToKeypoints(struct ht_view *htv)
|
|||
in_image_px_coords.kps[i] = rr;
|
||||
|
||||
in_image_ray_coords.kps[i] = raycoord(htv, rr);
|
||||
if (htd->debug_scribble) {
|
||||
if (htd->debug_scribble && htd->dynamic_config.scribble_2d_keypoints) {
|
||||
handDot(htv->debug_out_to_this, {rr.x, rr.y}, fmax((-vec.z + 100 - 20) * .08, 2),
|
||||
((float)i) / 21.0f, cv::FILLED);
|
||||
((float)i) / 21.0f, 0.95f, cv::FILLED);
|
||||
}
|
||||
}
|
||||
xrt_vec2 middle_in_px_coords = {in_image_px_coords.kps[MIDL_PXM].x, in_image_px_coords.kps[MIDL_PXM].y};
|
||||
xrt_vec2 wrist_in_px_coords = {in_image_px_coords.kps[WRIST].x, in_image_px_coords.kps[WRIST].y};
|
||||
xrt_vec2 index_in_px_coords = {in_image_px_coords.kps[INDX_PXM].x, in_image_px_coords.kps[INDX_PXM].y};
|
||||
xrt_vec2 middle_in_px_coords = {in_image_px_coords.kps[MIDL_PXM].x, in_image_px_coords.kps[MIDL_PXM].y};
|
||||
xrt_vec2 little_in_px_coords = {in_image_px_coords.kps[LITL_PXM].x, in_image_px_coords.kps[LITL_PXM].y};
|
||||
xrt_vec2 dontuse;
|
||||
m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_wrist, htv->htd->current_frame_timestamp,
|
||||
&wrist_in_px_coords, &dontuse);
|
||||
|
||||
m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_middle, htv->htd->current_frame_timestamp,
|
||||
&middle_in_px_coords, &dontuse);
|
||||
xrt_vec2 unfiltered_middle, unfiltered_direction;
|
||||
|
||||
centerAndRotationFromJoints(htv, &wrist_in_px_coords, &index_in_px_coords, &middle_in_px_coords,
|
||||
&little_in_px_coords, &unfiltered_middle, &unfiltered_direction);
|
||||
|
||||
m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_center, htv->htd->current_frame_timestamp,
|
||||
&unfiltered_middle, &dontuse);
|
||||
|
||||
m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_direction, htv->htd->current_frame_timestamp,
|
||||
&unfiltered_direction, &dontuse);
|
||||
|
||||
output.push_back(in_image_ray_coords);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
#if defined(JSON_OUTPUT)
|
||||
#if defined(EXPERIMENTAL_DATASET_RECORDING)
|
||||
|
||||
static void
|
||||
jsonAddJoint(cJSON *into_this, xrt_pose loc, const char *name)
|
||||
{
|
||||
|
||||
cJSON *container = cJSON_CreateObject();
|
||||
cJSON *joint_loc = cJSON_CreateArray();
|
||||
cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.x));
|
||||
|
@ -224,64 +261,119 @@ jsonAddJoint(cJSON *into_this, xrt_pose loc, const char *name)
|
|||
cJSON_AddItemToObject(into_this, name, container);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
jsonAddSet(struct ht_device *htd)
|
||||
void
|
||||
jsonMaybeAddSomeHands(struct ht_device *htd, bool err)
|
||||
{
|
||||
cJSON *two_hand_container = cJSON_CreateObject();
|
||||
static const char *keys[] = {
|
||||
"wrist", "palm",
|
||||
if (!htd->tracking_should_record_dataset) {
|
||||
return;
|
||||
}
|
||||
cJSON *j_this_frame = cJSON_CreateObject();
|
||||
cJSON_AddItemToObject(j_this_frame, "seq_since_start", cJSON_CreateNumber(htd->gst.current_index));
|
||||
cJSON_AddItemToObject(j_this_frame, "seq_src", cJSON_CreateNumber(htd->frame_for_process->source_sequence));
|
||||
cJSON_AddItemToObject(j_this_frame, "ts", cJSON_CreateNumber(htd->gst.last_frame_ns));
|
||||
|
||||
"thumb_mcp", "thumb_pxm", "thumb_dst", "thumb_tip",
|
||||
cJSON *j_hands_in_frame = cJSON_AddArrayToObject(j_this_frame, "detected_hands");
|
||||
if (!err) {
|
||||
for (size_t idx_hand = 0; idx_hand < htd->histories_3d.size(); idx_hand++) {
|
||||
cJSON *j_hand_in_frame = cJSON_CreateObject();
|
||||
|
||||
"index_mcp", "index_pxm", "index_int", "index_dst", "index_tip",
|
||||
cJSON *j_uuid = cJSON_CreateNumber(htd->histories_3d[idx_hand].uuid);
|
||||
cJSON_AddItemToObject(j_hand_in_frame, "uuid", j_uuid);
|
||||
|
||||
"middle_mcp", "middle_pxm", "middle_int", "middle_dst", "middle_tip",
|
||||
cJSON *j_handedness = cJSON_CreateNumber(htd->histories_3d[idx_hand].handedness);
|
||||
cJSON_AddItemToObject(j_hand_in_frame, "handedness", j_handedness);
|
||||
|
||||
"ring_mcp", "ring_pxm", "ring_int", "ring_dst", "ring_tip",
|
||||
static const char *keys[21] = {
|
||||
"WRIST",
|
||||
|
||||
"little_mcp", "little_pxm", "little_int", "little_dst", "little_tip",
|
||||
};
|
||||
static const char *sides_names[] = {
|
||||
"left",
|
||||
"right",
|
||||
};
|
||||
for (int side = 0; side < 2; side++) {
|
||||
struct xrt_hand_joint_set *set = &htd->hands_for_openxr[side];
|
||||
if (!set->is_active) {
|
||||
cJSON_AddNullToObject(two_hand_container, sides_names[side]);
|
||||
} else {
|
||||
cJSON *hand_obj = cJSON_CreateObject();
|
||||
for (int i = 0; i < 26; i++) {
|
||||
const char *key = keys[i];
|
||||
xrt_pose pose = set->values.hand_joint_set_default[i].relation.pose;
|
||||
jsonAddJoint(hand_obj, pose, key);
|
||||
"THMB_MCP", "THMB_PXM", "THMB_DST", "THMB_TIP",
|
||||
|
||||
"INDX_PXM", "INDX_INT", "INDX_DST", "INDX_TIP",
|
||||
|
||||
"MIDL_PXM", "MIDL_INT", "MIDL_DST", "MIDL_TIP",
|
||||
|
||||
"RING_PXM", "RING_INT", "RING_DST", "RING_TIP",
|
||||
|
||||
"LITL_PXM", "LITL_INT", "LITL_DST", "LITL_TIP",
|
||||
};
|
||||
|
||||
for (int idx_joint = 0; idx_joint < 21; idx_joint++) {
|
||||
// const char* key = keys[idx_joint];
|
||||
cJSON *j_vec3 = cJSON_AddArrayToObject(j_hand_in_frame, keys[idx_joint]);
|
||||
cJSON_AddItemToArray(
|
||||
j_vec3,
|
||||
cJSON_CreateNumber(
|
||||
htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].x));
|
||||
cJSON_AddItemToArray(
|
||||
j_vec3,
|
||||
cJSON_CreateNumber(
|
||||
htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].y));
|
||||
cJSON_AddItemToArray(
|
||||
j_vec3,
|
||||
cJSON_CreateNumber(
|
||||
htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].z));
|
||||
}
|
||||
cJSON_AddItemToObject(two_hand_container, sides_names[side], hand_obj);
|
||||
|
||||
|
||||
cJSON_AddItemToArray(j_hands_in_frame, j_hand_in_frame);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(JSON_OUTPUT)
|
||||
cJSON_AddItemToArray(htd->output_array, two_hand_container);
|
||||
#endif
|
||||
cJSON_AddItemToArray(htd->output_array, j_this_frame);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static void
|
||||
htBailThisFrame(struct ht_device *htd)
|
||||
htExitFrame(struct ht_device *htd,
|
||||
bool err,
|
||||
struct xrt_hand_joint_set final_hands_ordered_by_handedness[2],
|
||||
uint64_t timestamp)
|
||||
{
|
||||
|
||||
os_mutex_lock(&htd->openxr_hand_data_mediator);
|
||||
htd->hands_for_openxr[0].is_active = false;
|
||||
htd->hands_for_openxr[1].is_active = false;
|
||||
#if defined(JSON_OUTPUT)
|
||||
json_add_set(htd);
|
||||
#endif
|
||||
if (err) {
|
||||
htd->hands_for_openxr[0].is_active = false;
|
||||
htd->hands_for_openxr[1].is_active = false;
|
||||
} else {
|
||||
memcpy(&htd->hands_for_openxr[0], &final_hands_ordered_by_handedness[0],
|
||||
sizeof(struct xrt_hand_joint_set));
|
||||
memcpy(&htd->hands_for_openxr[1], &final_hands_ordered_by_handedness[1],
|
||||
sizeof(struct xrt_hand_joint_set));
|
||||
htd->hands_for_openxr_timestamp = timestamp;
|
||||
HT_DEBUG(htd, "Adding ts %zu", htd->hands_for_openxr_timestamp);
|
||||
}
|
||||
os_mutex_unlock(&htd->openxr_hand_data_mediator);
|
||||
#ifdef EXPERIMENTAL_DATASET_RECORDING
|
||||
if (htd->tracking_should_record_dataset) {
|
||||
// Add nothing-entry to json file.
|
||||
jsonMaybeAddSomeHands(htd, err);
|
||||
htd->gst.current_index++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
htJointDisparityMath(struct ht_device *htd, Hand2D *hand_in_left, Hand2D *hand_in_right, Hand3D *out_hand)
|
||||
{
|
||||
for (int i = 0; i < 21; i++) {
|
||||
// Believe it or not, this is where the 3D stuff happens!
|
||||
float t = htd->baseline / (hand_in_left->kps[i].x - hand_in_right->kps[i].x);
|
||||
|
||||
out_hand->kps[i].z = -t;
|
||||
|
||||
out_hand->kps[i].x = (hand_in_left->kps[i].x * t);
|
||||
out_hand->kps[i].y = -hand_in_left->kps[i].y * t;
|
||||
|
||||
out_hand->kps[i].x += htd->baseline + (hand_in_right->kps[i].x * t);
|
||||
out_hand->kps[i].y += -hand_in_right->kps[i].y * t;
|
||||
|
||||
out_hand->kps[i].x *= .5;
|
||||
out_hand->kps[i].y *= .5;
|
||||
}
|
||||
}
|
||||
int64_t last_frame, this_frame;
|
||||
|
||||
static void
|
||||
|
@ -289,6 +381,23 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
{
|
||||
XRT_TRACE_MARKER();
|
||||
|
||||
#ifdef EXPERIMENTAL_DATASET_RECORDING
|
||||
|
||||
if (htd->tracking_should_record_dataset) {
|
||||
U_LOG_E("PUSHING!");
|
||||
uint64_t start = os_monotonic_get_ns();
|
||||
xrt_sink_push_frame(htd->gst.sink, htd->frame_for_process);
|
||||
uint64_t end = os_monotonic_get_ns();
|
||||
|
||||
if ((end - start) > 0.1 * U_TIME_1MS_IN_NS) {
|
||||
U_LOG_E("Encoder overloaded!");
|
||||
}
|
||||
|
||||
htd->gst.offset_ns = gstreamer_sink_get_timestamp_offset(htd->gst.gs);
|
||||
htd->gst.last_frame_ns = htd->frame_for_process->timestamp - htd->gst.offset_ns;
|
||||
}
|
||||
#endif
|
||||
|
||||
htd->current_frame_timestamp = htd->frame_for_process->timestamp;
|
||||
|
||||
int64_t start, end;
|
||||
|
@ -304,7 +413,7 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
const int view_width = htd->camera.one_view_size_px.w;
|
||||
const int view_height = htd->camera.one_view_size_px.h;
|
||||
|
||||
assert(full_width == view_width * 2);
|
||||
// assert(full_width == view_width * 2);
|
||||
assert(full_height == view_height);
|
||||
|
||||
const cv::Size full_size = cv::Size(full_width, full_height);
|
||||
|
@ -315,8 +424,10 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
htd->views[0].run_model_on_this = full_frame(cv::Rect(view_offsets[0], view_size));
|
||||
htd->views[1].run_model_on_this = full_frame(cv::Rect(view_offsets[1], view_size));
|
||||
|
||||
htd->mat_for_process = &full_frame;
|
||||
|
||||
// Check this every frame. We really, really, really don't want it to ever suddenly be null.
|
||||
htd->debug_scribble = htd->debug_sink != nullptr;
|
||||
htd->debug_scribble = htd->debug_sink.sink != nullptr;
|
||||
|
||||
cv::Mat debug_output = {};
|
||||
xrt_frame *debug_frame = nullptr; // only use if htd->debug_scribble
|
||||
|
@ -370,7 +481,7 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
uint64_t timestamp = htd->frame_for_process->timestamp;
|
||||
|
||||
if (htd->debug_scribble) {
|
||||
htd->debug_sink->push_frame(htd->debug_sink, debug_frame);
|
||||
u_sink_debug_push_frame(&htd->debug_sink, debug_frame);
|
||||
xrt_frame_reference(&debug_frame, NULL);
|
||||
}
|
||||
|
||||
|
@ -378,89 +489,73 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
// In the long run, this'll be a silly thing - we shouldn't always take the detection model's word for it
|
||||
// especially when part of the pipeline is an arbitrary confidence threshold.
|
||||
if (hands_in_left_view.size() == 0 || hands_in_right_view.size() == 0) {
|
||||
htBailThisFrame(htd);
|
||||
htExitFrame(htd, true, NULL, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Figure out how to match hands up across views.
|
||||
// Construct a matrix, where the rows are left view hands and the cols are right view hands.
|
||||
// For each cell, compute an error that's just the difference in Y ray coordinates of all the 21 keypoints. With
|
||||
// perfect cameras + models, these differences will be zero. Anything with a high difference is not the same
|
||||
// hand observed across views. For each cell, make a datatype that is: the error, the left view hand index, the
|
||||
// right view hand index. Put these in an array, sort them by lowest error. Iterate over this sorted list (not
|
||||
// in matrix-land anymore), assigning left view hands to right view hands as you go. For any elements that are
|
||||
// trying to assign an already-assigned hand, skip them. At the end, check for any hands that went un-assigned;
|
||||
// forget about those.
|
||||
|
||||
// In the future, maybe we should go forward with several hand associations if there are two that are close,
|
||||
// keep track of which associations are mutually exclusive, and drop the one that fits the kinematic model less
|
||||
// well? Or drop the one that matches with previous measurements less well? Getting raw 3D poses out of line
|
||||
// intersection is not expensive.
|
||||
|
||||
// Known issue: If you put your hands at both exactly the same height it will not do the right thing. Won't fix
|
||||
// right now; need to upstream *something* first.
|
||||
|
||||
std::vector<bool> left_hands_taken;
|
||||
std::vector<bool> right_hands_taken;
|
||||
|
||||
std::vector<size_t> l_indices_in_order;
|
||||
std::vector<size_t> r_indices_in_order;
|
||||
std::vector<float> y_disparity_error_in_order;
|
||||
|
||||
naive_sort_permutation_by_error<Hand2D, Hand2D>(
|
||||
// Inputs
|
||||
hands_in_left_view, hands_in_right_view,
|
||||
|
||||
// Outputs
|
||||
left_hands_taken, right_hands_taken,
|
||||
|
||||
l_indices_in_order, r_indices_in_order, y_disparity_error_in_order, errHandDisparity);
|
||||
|
||||
std::vector<Hand2D> associated_in_left;
|
||||
std::vector<Hand2D> associated_in_right;
|
||||
|
||||
|
||||
for (size_t i = 0; i < l_indices_in_order.size(); i++) {
|
||||
associated_in_left.push_back(hands_in_left_view[i]);
|
||||
associated_in_right.push_back(hands_in_right_view[i]);
|
||||
}
|
||||
std::vector<Hand3D> possible_3d_hands;
|
||||
|
||||
// for every possible combination of hands in left view and hands in right view,
|
||||
for (size_t idx_l = 0; idx_l < hands_in_left_view.size(); idx_l++) {
|
||||
for (size_t idx_r = 0; idx_r < hands_in_right_view.size(); idx_r++) {
|
||||
Hand3D cur_hand = {};
|
||||
|
||||
std::vector<Hand3D> hands_unfiltered; //(associated_in_left.size());
|
||||
Hand2D &left_2d = hands_in_left_view[idx_l];
|
||||
Hand2D &right_2d = hands_in_right_view[idx_r];
|
||||
|
||||
for (size_t hand_idx = 0; hand_idx < associated_in_left.size(); hand_idx++) {
|
||||
|
||||
Hand3D cur_hand;
|
||||
|
||||
for (int i = 0; i < 21; i++) {
|
||||
float t = htd->baseline /
|
||||
(associated_in_left[hand_idx].kps[i].x - associated_in_right[hand_idx].kps[i].x);
|
||||
// float x, y;
|
||||
|
||||
cur_hand.kps[i].z = -t;
|
||||
|
||||
cur_hand.kps[i].x = (associated_in_left[hand_idx].kps[i].x * t); //-(htd->baseline * 0.5f);
|
||||
cur_hand.kps[i].y = -associated_in_left[hand_idx].kps[i].y * t;
|
||||
// Calculate a 3D hand for this combination
|
||||
htJointDisparityMath(htd, &hands_in_left_view[idx_l], &hands_in_right_view[idx_r], &cur_hand);
|
||||
cur_hand.timestamp = timestamp;
|
||||
cur_hand.rejected_by_smush = false;
|
||||
|
||||
// soon! average with hand in right view.
|
||||
cur_hand.kps[i].x += htd->baseline + (associated_in_right[hand_idx].kps[i].x * t);
|
||||
cur_hand.kps[i].y += -associated_in_right[hand_idx].kps[i].y * t;
|
||||
cur_hand.idx_l = idx_l;
|
||||
cur_hand.idx_r = idx_r;
|
||||
|
||||
cur_hand.kps[i].x *= .5;
|
||||
cur_hand.kps[i].y *= .5;
|
||||
}
|
||||
// Calculate a y-disparity for this combination
|
||||
cur_hand.y_disparity_error = errHandDisparity(&left_2d, &right_2d);
|
||||
|
||||
if (rejectBadHand(&cur_hand)) { // reject hands!!!
|
||||
cur_hand.y_disparity_error = y_disparity_error_in_order[hand_idx];
|
||||
hands_unfiltered.push_back(cur_hand);
|
||||
} else {
|
||||
HT_DEBUG(htd, "Rejected bad hand!"); // This probably could be a warn ...
|
||||
possible_3d_hands.push_back(cur_hand);
|
||||
}
|
||||
}
|
||||
|
||||
// Okay now do the exact same thing but with present and past instead of with left view and right view. Lotsa
|
||||
// code but hey this is hard stuff.
|
||||
HT_DEBUG(htd, "Starting with %zu hands!", possible_3d_hands.size());
|
||||
|
||||
// For each pair of 3D hands we just made
|
||||
for (size_t idx_one = 0; idx_one < possible_3d_hands.size(); idx_one++) {
|
||||
for (size_t idx_two = 0; idx_two < possible_3d_hands.size(); idx_two++) {
|
||||
if ((idx_one <= idx_two)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// See if this pair is suspiciously close together.
|
||||
// If it is, then this pairing is wrong - this is what was causing the "hands smushing together"
|
||||
// issue - we weren't catching these reliably.
|
||||
float errr = sumOfHandJointDistances(&possible_3d_hands[idx_one], &possible_3d_hands[idx_two]);
|
||||
HT_TRACE(htd, "%zu %zu is smush %f", idx_one, idx_two, errr);
|
||||
if (errr < 0.03f * 21.0f) {
|
||||
possible_3d_hands[idx_one].rejected_by_smush = true;
|
||||
possible_3d_hands[idx_two].rejected_by_smush = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Hand3D> hands_unfiltered;
|
||||
|
||||
for (Hand3D hand : possible_3d_hands) {
|
||||
// If none of these are false, then all our heuristics indicate this is a real hand, so we add it to our
|
||||
// list of real hands.
|
||||
bool selected = !hand.rejected_by_smush && //
|
||||
hand.y_disparity_error < 1.0f && //
|
||||
rejectTooClose(htd, &hand) && //
|
||||
rejectTooFar(htd, &hand) && //
|
||||
rejectTinyPalm(htd, &hand);
|
||||
if (selected) {
|
||||
HT_TRACE(htd, "Pushing back with y-error %f", hand.y_disparity_error);
|
||||
hands_unfiltered.push_back(hand);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<bool> past_hands_taken;
|
||||
|
@ -471,19 +566,22 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
std::vector<float> flow_errors;
|
||||
|
||||
|
||||
float max_dist_between_frames = 1.0f;
|
||||
|
||||
naive_sort_permutation_by_error<HandHistory3D, Hand3D>(htd->histories_3d, // past
|
||||
hands_unfiltered, // present
|
||||
|
||||
|
||||
// outputs
|
||||
past_hands_taken, present_hands_taken, past_indices,
|
||||
present_indices, flow_errors, errHandHistory
|
||||
present_indices, flow_errors, errHandHistory,
|
||||
(max_dist_between_frames * 21.0f)
|
||||
|
||||
);
|
||||
|
||||
|
||||
for (size_t i = 0; i < past_indices.size(); i++) {
|
||||
htd->histories_3d[past_indices[i]].last_hands.push(hands_unfiltered[present_indices[i]]);
|
||||
htd->histories_3d[past_indices[i]].last_hands_unfiltered.push(hands_unfiltered[present_indices[i]]);
|
||||
}
|
||||
// The above may not do anything, because we'll start out with no hand histories! All the numbers of elements
|
||||
// should be zero.
|
||||
|
@ -493,8 +591,10 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
if (present_hands_taken[i] == false) {
|
||||
// if this hand never got assigned to a history
|
||||
HandHistory3D history_new;
|
||||
history_new.uuid = rand(); // Not a great uuid, huh? Good enough for us, this only has to be
|
||||
// unique across say an hour period max.
|
||||
handEuroFiltersInit(&history_new, FCMIN_HAND, FCMIN_D_HAND, BETA_HAND);
|
||||
history_new.last_hands.push(hands_unfiltered[i]);
|
||||
history_new.last_hands_unfiltered.push(hands_unfiltered[i]);
|
||||
// history_new.
|
||||
htd->histories_3d.push_back(
|
||||
history_new); // Add something to the end - don't initialize any of it.
|
||||
|
@ -511,27 +611,56 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
|
||||
if (htd->histories_3d.size() == 0) {
|
||||
HT_DEBUG(htd, "Bailing");
|
||||
htBailThisFrame(htd);
|
||||
htExitFrame(htd, true, NULL, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t num_hands = htd->histories_3d.size();
|
||||
if (num_hands > 2) {
|
||||
HT_WARN(htd, "More than two hands observed (%zu)! Expect bugginess!",
|
||||
num_hands); // this is quite bad, but rarely happens.
|
||||
}
|
||||
// if (num_hands > 2) {
|
||||
HT_DEBUG(htd, "Ending with %zu hands!",
|
||||
num_hands); // this is quite bad, but rarely happens.
|
||||
// }
|
||||
|
||||
// Here, we go back to our bbox_histories and remove the histories for any bounding boxes that never turned into
|
||||
// good hands.
|
||||
|
||||
// Iterate over all hands we're keeping track of, compute their current handedness.
|
||||
std::vector<size_t> valid_2d_idxs[2];
|
||||
|
||||
|
||||
for (size_t i = 0; i < htd->histories_3d.size(); i++) {
|
||||
// U_LOG_E("Valid hand %zu l_idx %i r_idx %i", i, htd->histories_3d[i].last_hands[0]->idx_l,
|
||||
// htd->histories_3d[i].last_hands[0]->idx_r);
|
||||
valid_2d_idxs[0].push_back(htd->histories_3d[i].last_hands_unfiltered[0]->idx_l);
|
||||
valid_2d_idxs[1].push_back(htd->histories_3d[i].last_hands_unfiltered[0]->idx_r);
|
||||
handednessHandHistory3D(&htd->histories_3d[i]);
|
||||
}
|
||||
|
||||
// Almost certainly not the cleanest way of doing this but leave me alone
|
||||
// Per camera view
|
||||
for (int view = 0; view < 2; view++) {
|
||||
// Per entry in bbox_histories
|
||||
for (size_t hist_idx = 0; hist_idx < htd->views[view].bbox_histories.size(); hist_idx++) {
|
||||
// See if this entry in bbox_histories ever turned into a 3D hand. If not, we notify (in a very
|
||||
// silly way) htImageToKeypoints that it should go away because it was an erroneous detection.
|
||||
for (size_t valid_idx : valid_2d_idxs[view]) {
|
||||
if (valid_idx == hist_idx) {
|
||||
htd->views[view].bbox_histories[hist_idx].htAlgorithm_approves = true;
|
||||
break;
|
||||
} else {
|
||||
htd->views[view].bbox_histories[hist_idx].htAlgorithm_approves = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Whoo! Okay, now we have some unfiltered hands in htd->histories_3d[i].last_hands[0]! Euro filter them!
|
||||
|
||||
std::vector<Hand3D> filtered_hands(num_hands);
|
||||
|
||||
for (size_t hand_index = 0; hand_index < num_hands; hand_index++) {
|
||||
filtered_hands[hand_index] = handEuroFiltersRun(&htd->histories_3d[hand_index]);
|
||||
handEuroFiltersRun(htd, &htd->histories_3d[hand_index], &filtered_hands[hand_index]);
|
||||
htd->histories_3d[hand_index].last_hands_filtered.push(filtered_hands[hand_index]);
|
||||
applyThumbIndexDrag(&filtered_hands[hand_index]);
|
||||
filtered_hands[hand_index].handedness = htd->histories_3d[hand_index].handedness;
|
||||
}
|
||||
|
@ -568,7 +697,6 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
for (size_t i = 0; (i < xr_indices.size()); i++) {
|
||||
Hand3D *hand = hands_to_use[i];
|
||||
|
||||
|
||||
struct xrt_hand_joint_set *put_in_set = &final_hands_ordered_by_handedness[xr_indices[i]];
|
||||
|
||||
xrt_vec3 wrist = hand->kps[0];
|
||||
|
@ -599,56 +727,49 @@ htRunAlgorithm(struct ht_device *htd)
|
|||
|
||||
|
||||
|
||||
// clang-format off
|
||||
htProcessJoint(htd, palm, put_in_set, XRT_HAND_JOINT_PALM);
|
||||
|
||||
htProcessJoint(htd,palm, put_in_set, XRT_HAND_JOINT_PALM);
|
||||
htProcessJoint(htd, hand->kps[0], put_in_set, XRT_HAND_JOINT_WRIST);
|
||||
htProcessJoint(htd, hand->kps[1], put_in_set, XRT_HAND_JOINT_THUMB_METACARPAL);
|
||||
htProcessJoint(htd, hand->kps[2], put_in_set, XRT_HAND_JOINT_THUMB_PROXIMAL);
|
||||
htProcessJoint(htd, hand->kps[3], put_in_set, XRT_HAND_JOINT_THUMB_DISTAL);
|
||||
htProcessJoint(htd, hand->kps[4], put_in_set, XRT_HAND_JOINT_THUMB_TIP);
|
||||
|
||||
htProcessJoint(htd,hand->kps[0], put_in_set, XRT_HAND_JOINT_WRIST);
|
||||
htProcessJoint(htd,hand->kps[1], put_in_set, XRT_HAND_JOINT_THUMB_METACARPAL);
|
||||
htProcessJoint(htd,hand->kps[2], put_in_set, XRT_HAND_JOINT_THUMB_PROXIMAL);
|
||||
htProcessJoint(htd,hand->kps[3], put_in_set, XRT_HAND_JOINT_THUMB_DISTAL);
|
||||
htProcessJoint(htd,hand->kps[4], put_in_set, XRT_HAND_JOINT_THUMB_TIP);
|
||||
htProcessJoint(htd, index_metacarpal, put_in_set, XRT_HAND_JOINT_INDEX_METACARPAL);
|
||||
htProcessJoint(htd, hand->kps[5], put_in_set, XRT_HAND_JOINT_INDEX_PROXIMAL);
|
||||
htProcessJoint(htd, hand->kps[6], put_in_set, XRT_HAND_JOINT_INDEX_INTERMEDIATE);
|
||||
htProcessJoint(htd, hand->kps[7], put_in_set, XRT_HAND_JOINT_INDEX_DISTAL);
|
||||
htProcessJoint(htd, hand->kps[8], put_in_set, XRT_HAND_JOINT_INDEX_TIP);
|
||||
|
||||
htProcessJoint(htd,index_metacarpal, put_in_set, XRT_HAND_JOINT_INDEX_METACARPAL);
|
||||
htProcessJoint(htd,hand->kps[5], put_in_set, XRT_HAND_JOINT_INDEX_PROXIMAL);
|
||||
htProcessJoint(htd,hand->kps[6], put_in_set, XRT_HAND_JOINT_INDEX_INTERMEDIATE);
|
||||
htProcessJoint(htd,hand->kps[7], put_in_set, XRT_HAND_JOINT_INDEX_DISTAL);
|
||||
htProcessJoint(htd,hand->kps[8], put_in_set, XRT_HAND_JOINT_INDEX_TIP);
|
||||
htProcessJoint(htd, middle_metacarpal, put_in_set, XRT_HAND_JOINT_MIDDLE_METACARPAL);
|
||||
htProcessJoint(htd, hand->kps[9], put_in_set, XRT_HAND_JOINT_MIDDLE_PROXIMAL);
|
||||
htProcessJoint(htd, hand->kps[10], put_in_set, XRT_HAND_JOINT_MIDDLE_INTERMEDIATE);
|
||||
htProcessJoint(htd, hand->kps[11], put_in_set, XRT_HAND_JOINT_MIDDLE_DISTAL);
|
||||
htProcessJoint(htd, hand->kps[12], put_in_set, XRT_HAND_JOINT_MIDDLE_TIP);
|
||||
|
||||
htProcessJoint(htd,middle_metacarpal, put_in_set, XRT_HAND_JOINT_MIDDLE_METACARPAL);
|
||||
htProcessJoint(htd,hand->kps[9], put_in_set, XRT_HAND_JOINT_MIDDLE_PROXIMAL);
|
||||
htProcessJoint(htd,hand->kps[10], put_in_set, XRT_HAND_JOINT_MIDDLE_INTERMEDIATE);
|
||||
htProcessJoint(htd,hand->kps[11], put_in_set, XRT_HAND_JOINT_MIDDLE_DISTAL);
|
||||
htProcessJoint(htd,hand->kps[12], put_in_set, XRT_HAND_JOINT_MIDDLE_TIP);
|
||||
|
||||
htProcessJoint(htd,ring_metacarpal, put_in_set, XRT_HAND_JOINT_RING_METACARPAL);
|
||||
htProcessJoint(htd,hand->kps[13], put_in_set, XRT_HAND_JOINT_RING_PROXIMAL);
|
||||
htProcessJoint(htd,hand->kps[14], put_in_set, XRT_HAND_JOINT_RING_INTERMEDIATE);
|
||||
htProcessJoint(htd,hand->kps[15], put_in_set, XRT_HAND_JOINT_RING_DISTAL);
|
||||
htProcessJoint(htd,hand->kps[16], put_in_set, XRT_HAND_JOINT_RING_TIP);
|
||||
htProcessJoint(htd, ring_metacarpal, put_in_set, XRT_HAND_JOINT_RING_METACARPAL);
|
||||
htProcessJoint(htd, hand->kps[13], put_in_set, XRT_HAND_JOINT_RING_PROXIMAL);
|
||||
htProcessJoint(htd, hand->kps[14], put_in_set, XRT_HAND_JOINT_RING_INTERMEDIATE);
|
||||
htProcessJoint(htd, hand->kps[15], put_in_set, XRT_HAND_JOINT_RING_DISTAL);
|
||||
htProcessJoint(htd, hand->kps[16], put_in_set, XRT_HAND_JOINT_RING_TIP);
|
||||
|
||||
htProcessJoint(htd, pinky_metacarpal, put_in_set, XRT_HAND_JOINT_LITTLE_METACARPAL);
|
||||
htProcessJoint(htd,hand->kps[17], put_in_set, XRT_HAND_JOINT_LITTLE_PROXIMAL);
|
||||
htProcessJoint(htd,hand->kps[18], put_in_set, XRT_HAND_JOINT_LITTLE_INTERMEDIATE);
|
||||
htProcessJoint(htd,hand->kps[19], put_in_set, XRT_HAND_JOINT_LITTLE_DISTAL);
|
||||
htProcessJoint(htd,hand->kps[20], put_in_set, XRT_HAND_JOINT_LITTLE_TIP);
|
||||
put_in_set->is_active = true;
|
||||
math_pose_identity(&put_in_set->hand_pose.pose);
|
||||
htProcessJoint(htd, hand->kps[17], put_in_set, XRT_HAND_JOINT_LITTLE_PROXIMAL);
|
||||
htProcessJoint(htd, hand->kps[18], put_in_set, XRT_HAND_JOINT_LITTLE_INTERMEDIATE);
|
||||
htProcessJoint(htd, hand->kps[19], put_in_set, XRT_HAND_JOINT_LITTLE_DISTAL);
|
||||
htProcessJoint(htd, hand->kps[20], put_in_set, XRT_HAND_JOINT_LITTLE_TIP);
|
||||
|
||||
put_in_set->is_active = true;
|
||||
math_pose_identity(&put_in_set->hand_pose.pose);
|
||||
|
||||
|
||||
put_in_set->hand_pose.pose.orientation = htd->stereo_camera_to_left_camera;
|
||||
|
||||
put_in_set->hand_pose.relation_flags = valid_flags_ht;
|
||||
// clang-format on
|
||||
|
||||
applyJointWidths(put_in_set);
|
||||
applyJointOrientations(put_in_set, xr_indices[i]);
|
||||
}
|
||||
|
||||
|
||||
// For some reason, final_hands_ordered_by_handedness[0] is active but the other is inactive.
|
||||
|
||||
os_mutex_lock(&htd->openxr_hand_data_mediator);
|
||||
memcpy(&htd->hands_for_openxr[0], &final_hands_ordered_by_handedness[0], sizeof(struct xrt_hand_joint_set));
|
||||
memcpy(&htd->hands_for_openxr[1], &final_hands_ordered_by_handedness[1], sizeof(struct xrt_hand_joint_set));
|
||||
|
||||
#if defined(JSON_OUTPUT)
|
||||
json_add_set(htd);
|
||||
#endif
|
||||
os_mutex_unlock(&htd->openxr_hand_data_mediator);
|
||||
htExitFrame(htd, false, final_hands_ordered_by_handedness, filtered_hands[0].timestamp);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,15 @@
|
|||
* @ingroup drv_ht
|
||||
*/
|
||||
|
||||
#include "gstreamer/gst_pipeline.h"
|
||||
#include "gstreamer/gst_sink.h"
|
||||
#include "ht_interface.h"
|
||||
#include "ht_driver.hpp"
|
||||
|
||||
#include "../depthai/depthai_interface.h"
|
||||
|
||||
#include "xrt/xrt_defines.h"
|
||||
#include "xrt/xrt_frame.h"
|
||||
#include "xrt/xrt_frameserver.h"
|
||||
|
||||
#include "os/os_time.h"
|
||||
|
@ -33,7 +41,6 @@
|
|||
|
||||
#include "templates/NaivePermutationSort.hpp"
|
||||
|
||||
#include "ht_driver.hpp"
|
||||
#include "ht_algorithm.hpp"
|
||||
|
||||
#include <cjson/cJSON.h>
|
||||
|
@ -86,7 +93,7 @@ getCalibration(struct ht_device *htd, t_stereo_camera_calibration *calibration)
|
|||
wrap.view[0].distortion_mat, // distCoeffs1
|
||||
wrap.view[1].intrinsics_mat, // cameraMatrix2
|
||||
wrap.view[1].distortion_mat, // distCoeffs2
|
||||
cv::Size(960, 960), // imageSize
|
||||
wrap.view[0].image_size_pixels_cv, // imageSize*
|
||||
wrap.camera_rotation_mat, // R
|
||||
wrap.camera_translation_mat, // T
|
||||
htd->views[0].rotate_camera_to_stereo_camera, // R1
|
||||
|
@ -100,6 +107,7 @@ getCalibration(struct ht_device *htd, t_stereo_camera_calibration *calibration)
|
|||
NULL, // validPixROI1
|
||||
NULL); // validPixROI2
|
||||
|
||||
//* Good enough guess that view 0 and view 1 are the same size.
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
htd->views[i].cameraMatrix = wrap.view[i].intrinsics_mat;
|
||||
|
@ -107,6 +115,10 @@ getCalibration(struct ht_device *htd, t_stereo_camera_calibration *calibration)
|
|||
htd->views[i].distortion = wrap.view[i].distortion_fisheye_mat;
|
||||
}
|
||||
|
||||
htd->camera.one_view_size_px.w = wrap.view[0].image_size_pixels.w;
|
||||
htd->camera.one_view_size_px.h = wrap.view[0].image_size_pixels.h;
|
||||
|
||||
|
||||
cv::Matx33d rotate_stereo_camera_to_left_camera = htd->views[0].rotate_camera_to_stereo_camera.inv();
|
||||
|
||||
xrt_matrix_3x3 s;
|
||||
|
@ -140,20 +152,57 @@ getCalibration(struct ht_device *htd, t_stereo_camera_calibration *calibration)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
getStartupConfig(struct ht_device *htd, const cJSON *startup_config)
|
||||
{
|
||||
const cJSON *palm_detection_type = u_json_get(startup_config, "palm_detection_model");
|
||||
const cJSON *keypoint_estimation_type = u_json_get(startup_config, "keypoint_estimation_model");
|
||||
const cJSON *uvc_wire_format = u_json_get(startup_config, "uvc_wire_format");
|
||||
|
||||
// IsString does its own null-checking
|
||||
if (cJSON_IsString(palm_detection_type)) {
|
||||
bool is_collabora = (strcmp(cJSON_GetStringValue(palm_detection_type), "collabora") == 0);
|
||||
bool is_mediapipe = (strcmp(cJSON_GetStringValue(palm_detection_type), "mediapipe") == 0);
|
||||
if (!is_collabora && !is_mediapipe) {
|
||||
HT_WARN(htd, "Unknown palm detection type %s - should be \"collabora\" or \"mediapipe\"",
|
||||
cJSON_GetStringValue(palm_detection_type));
|
||||
}
|
||||
htd->startup_config.palm_detection_use_mediapipe = is_mediapipe;
|
||||
}
|
||||
|
||||
if (cJSON_IsString(keypoint_estimation_type)) {
|
||||
bool is_collabora = (strcmp(cJSON_GetStringValue(keypoint_estimation_type), "collabora") == 0);
|
||||
bool is_mediapipe = (strcmp(cJSON_GetStringValue(keypoint_estimation_type), "mediapipe") == 0);
|
||||
if (!is_collabora && !is_mediapipe) {
|
||||
HT_WARN(htd, "Unknown keypoint estimation type %s - should be \"collabora\" or \"mediapipe\"",
|
||||
cJSON_GetStringValue(keypoint_estimation_type));
|
||||
}
|
||||
htd->startup_config.keypoint_estimation_use_mediapipe = is_mediapipe;
|
||||
}
|
||||
|
||||
if (cJSON_IsString(uvc_wire_format)) {
|
||||
bool is_yuv = (strcmp(cJSON_GetStringValue(uvc_wire_format), "yuv") == 0);
|
||||
bool is_mjpeg = (strcmp(cJSON_GetStringValue(uvc_wire_format), "mjpeg") == 0);
|
||||
if (!is_yuv && !is_mjpeg) {
|
||||
HT_WARN(htd, "Unknown wire format type %s - should be \"yuv\" or \"mjpeg\"",
|
||||
cJSON_GetStringValue(uvc_wire_format));
|
||||
}
|
||||
if (is_yuv) {
|
||||
HT_DEBUG(htd, "Using YUYV422!");
|
||||
htd->startup_config.desired_format = XRT_FORMAT_YUYV422;
|
||||
} else {
|
||||
HT_DEBUG(htd, "Using MJPEG!");
|
||||
htd->startup_config.desired_format = XRT_FORMAT_MJPEG;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
getUserConfig(struct ht_device *htd)
|
||||
{
|
||||
// The game here is to avoid bugs + be paranoid, not to be fast. If you see something that seems "slow" - don't
|
||||
// fix it. Any of the tracking code is way stickier than this could ever be.
|
||||
|
||||
// Set defaults
|
||||
// Admit defeat: for now, Mediapipe's are still better than ours.
|
||||
htd->runtime_config.palm_detection_use_mediapipe = true;
|
||||
htd->runtime_config.keypoint_estimation_use_mediapipe = true;
|
||||
|
||||
// Make sure you build DebugOptimized!
|
||||
htd->runtime_config.desired_format = XRT_FORMAT_YUYV422;
|
||||
|
||||
struct u_config_json config_json = {};
|
||||
|
||||
u_config_json_open_or_create_main_file(&config_json);
|
||||
|
@ -166,52 +215,130 @@ getUserConfig(struct ht_device *htd)
|
|||
return;
|
||||
}
|
||||
|
||||
cJSON *palm_detection_type = cJSON_GetObjectItemCaseSensitive(ht_config_json, "palm_detection_model");
|
||||
cJSON *keypoint_estimation_type = cJSON_GetObjectItemCaseSensitive(ht_config_json, "keypoint_estimation_model");
|
||||
cJSON *uvc_wire_format = cJSON_GetObjectItemCaseSensitive(ht_config_json, "uvc_wire_format");
|
||||
// Don't get it twisted: initializing these to NULL is not cargo-culting.
|
||||
// Uninitialized values on the stack aren't guaranteed to be 0, so these could end up pointing to what we
|
||||
// *think* is a valid address but what is *not* one.
|
||||
char *startup_config_string = NULL;
|
||||
char *dynamic_config_string = NULL;
|
||||
|
||||
// IsString does its own null-checking
|
||||
if (cJSON_IsString(palm_detection_type)) {
|
||||
bool is_collabora = (strcmp(palm_detection_type->valuestring, "collabora") == 0);
|
||||
bool is_mediapipe = (strcmp(palm_detection_type->valuestring, "mediapipe") == 0);
|
||||
if (!is_collabora && !is_mediapipe) {
|
||||
HT_WARN(htd, "Unknown palm detection type %s - should be \"collabora\" or \"mediapipe\"",
|
||||
palm_detection_type->valuestring);
|
||||
{
|
||||
const cJSON *startup_config_string_json = u_json_get(ht_config_json, "startup_config_index");
|
||||
if (cJSON_IsString(startup_config_string_json)) {
|
||||
startup_config_string = cJSON_GetStringValue(startup_config_string_json);
|
||||
}
|
||||
|
||||
const cJSON *dynamic_config_string_json = u_json_get(ht_config_json, "dynamic_config_index");
|
||||
if (cJSON_IsString(dynamic_config_string_json)) {
|
||||
dynamic_config_string = cJSON_GetStringValue(dynamic_config_string_json);
|
||||
}
|
||||
htd->runtime_config.palm_detection_use_mediapipe = is_mediapipe;
|
||||
}
|
||||
|
||||
if (cJSON_IsString(keypoint_estimation_type)) {
|
||||
bool is_collabora = (strcmp(keypoint_estimation_type->valuestring, "collabora") == 0);
|
||||
bool is_mediapipe = (strcmp(keypoint_estimation_type->valuestring, "mediapipe") == 0);
|
||||
if (!is_collabora && !is_mediapipe) {
|
||||
HT_WARN(htd, "Unknown keypoint estimation type %s - should be \"collabora\" or \"mediapipe\"",
|
||||
keypoint_estimation_type->valuestring);
|
||||
}
|
||||
htd->runtime_config.keypoint_estimation_use_mediapipe = is_mediapipe;
|
||||
if (startup_config_string != NULL) {
|
||||
const cJSON *startup_config_obj =
|
||||
u_json_get(u_json_get(ht_config_json, "startup_configs"), startup_config_string);
|
||||
getStartupConfig(htd, startup_config_obj);
|
||||
}
|
||||
|
||||
if (cJSON_IsString(uvc_wire_format)) {
|
||||
bool is_yuv = (strcmp(cJSON_GetStringValue(uvc_wire_format), "yuv") == 0);
|
||||
bool is_mjpeg = (strcmp(cJSON_GetStringValue(uvc_wire_format), "mjpeg") == 0);
|
||||
if (!is_yuv && !is_mjpeg) {
|
||||
HT_WARN(htd, "Unknown wire format type %s - should be \"yuv\" or \"mjpeg\"",
|
||||
cJSON_GetStringValue(uvc_wire_format));
|
||||
}
|
||||
if (is_yuv) {
|
||||
HT_DEBUG(htd, "Using YUYV422!");
|
||||
htd->runtime_config.desired_format = XRT_FORMAT_YUYV422;
|
||||
} else {
|
||||
HT_DEBUG(htd, "Using MJPEG!");
|
||||
htd->runtime_config.desired_format = XRT_FORMAT_MJPEG;
|
||||
if (dynamic_config_string != NULL) {
|
||||
const cJSON *dynamic_config_obj =
|
||||
u_json_get(u_json_get(ht_config_json, "dynamic_configs"), dynamic_config_string);
|
||||
{
|
||||
ht_dynamic_config *hdc = &htd->dynamic_config;
|
||||
// Do the thing
|
||||
u_json_get_string_into_array(u_json_get(dynamic_config_obj, "name"), hdc->name, 64);
|
||||
|
||||
u_json_get_float(u_json_get(dynamic_config_obj, "hand_fc_min"), &hdc->hand_fc_min.val);
|
||||
u_json_get_float(u_json_get(dynamic_config_obj, "hand_fc_min_d"), &hdc->hand_fc_min_d.val);
|
||||
u_json_get_float(u_json_get(dynamic_config_obj, "hand_beta"), &hdc->hand_beta.val);
|
||||
|
||||
u_json_get_float(u_json_get(dynamic_config_obj, "nms_iou"), &hdc->nms_iou.val);
|
||||
u_json_get_float(u_json_get(dynamic_config_obj, "nms_threshold"), &hdc->nms_threshold.val);
|
||||
|
||||
u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_nms_detections"),
|
||||
&hdc->scribble_nms_detections);
|
||||
u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_raw_detections"),
|
||||
&hdc->scribble_raw_detections);
|
||||
u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_2d_keypoints"),
|
||||
&hdc->scribble_2d_keypoints);
|
||||
u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_bounding_box"),
|
||||
&hdc->scribble_bounding_box);
|
||||
|
||||
U_LOG_E("Hey %s %s", dynamic_config_string, cJSON_Print(dynamic_config_obj));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
cJSON_Delete(config_json.root);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
userConfigSetDefaults(struct ht_device *htd)
|
||||
{
|
||||
// Admit defeat: for now, Mediapipe's are still better than ours.
|
||||
htd->startup_config.palm_detection_use_mediapipe = true;
|
||||
htd->startup_config.keypoint_estimation_use_mediapipe = true;
|
||||
|
||||
// Make sure you build DebugOptimized!
|
||||
htd->startup_config.desired_format = XRT_FORMAT_YUYV422;
|
||||
|
||||
|
||||
ht_dynamic_config *hdc = &htd->dynamic_config;
|
||||
|
||||
hdc->scribble_nms_detections = true;
|
||||
hdc->scribble_raw_detections = false;
|
||||
hdc->scribble_2d_keypoints = true;
|
||||
hdc->scribble_bounding_box = false;
|
||||
|
||||
hdc->hand_fc_min.min = 0.0f;
|
||||
hdc->hand_fc_min.max = 50.0f;
|
||||
hdc->hand_fc_min.step = 0.05f;
|
||||
hdc->hand_fc_min.val = FCMIN_HAND;
|
||||
|
||||
hdc->hand_fc_min_d.min = 0.0f;
|
||||
hdc->hand_fc_min_d.max = 50.0f;
|
||||
hdc->hand_fc_min_d.step = 0.05f;
|
||||
hdc->hand_fc_min_d.val = FCMIN_D_HAND;
|
||||
|
||||
|
||||
hdc->hand_beta.min = 0.0f;
|
||||
hdc->hand_beta.max = 50.0f;
|
||||
hdc->hand_beta.step = 0.05f;
|
||||
hdc->hand_beta.val = BETA_HAND;
|
||||
|
||||
hdc->max_vel.min = 0.0f;
|
||||
hdc->max_vel.max = 50.0f;
|
||||
hdc->max_vel.step = 0.05f;
|
||||
hdc->max_vel.val = 30.0f; // 30 m/s; about 108 kph. If your hand is going this fast, our tracking failing is the
|
||||
// least of your problems.
|
||||
|
||||
hdc->max_acc.min = 0.0f;
|
||||
hdc->max_acc.max = 100.0f;
|
||||
hdc->max_acc.step = 0.1f;
|
||||
hdc->max_acc.val = 100.0f; // 100 m/s^2; about 10 Gs. Ditto.
|
||||
|
||||
hdc->nms_iou.min = 0.0f;
|
||||
hdc->nms_iou.max = 1.0f;
|
||||
hdc->nms_iou.step = 0.01f;
|
||||
|
||||
|
||||
hdc->nms_threshold.min = 0.0f;
|
||||
hdc->nms_threshold.max = 1.0f;
|
||||
hdc->nms_threshold.step = 0.01f;
|
||||
|
||||
hdc->new_detection_threshold.min = 0.0f;
|
||||
hdc->new_detection_threshold.max = 1.0f;
|
||||
hdc->new_detection_threshold.step = 0.01f;
|
||||
|
||||
|
||||
hdc->nms_iou.val = 0.05f;
|
||||
hdc->nms_threshold.val = 0.3f;
|
||||
hdc->new_detection_threshold.val = 0.6f;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
getModelsFolder(struct ht_device *htd)
|
||||
{
|
||||
|
@ -235,21 +362,136 @@ getModelsFolder(struct ht_device *htd)
|
|||
}
|
||||
|
||||
strcat(exec_location, "../share/monado/hand-tracking-models/");
|
||||
strcpy(htd->runtime_config.model_slug, exec_location);
|
||||
strcpy(htd->startup_config.model_slug, exec_location);
|
||||
#else
|
||||
const char *xdg_home = getenv("XDG_CONFIG_HOME");
|
||||
const char *home = getenv("HOME");
|
||||
if (xdg_home != NULL) {
|
||||
strcpy(htd->runtime_config.model_slug, xdg_home);
|
||||
strcpy(htd->startup_config.model_slug, xdg_home);
|
||||
} else if (home != NULL) {
|
||||
strcpy(htd->runtime_config.model_slug, home);
|
||||
strcpy(htd->startup_config.model_slug, home);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
strcat(htd->runtime_config.model_slug, "/.local/share/monado/hand-tracking-models/");
|
||||
strcat(htd->startup_config.model_slug, "/.local/share/monado/hand-tracking-models/");
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(EXPERIMENTAL_DATASET_RECORDING)
|
||||
|
||||
static void
|
||||
htStartJsonCB(void *ptr)
|
||||
{
|
||||
struct ht_device *htd = (struct ht_device *)ptr;
|
||||
HT_INFO(htd, "Magic button pressed!");
|
||||
|
||||
// Wait for the hand tracker to be totally done with the current frame, then make it wait trying to relock this
|
||||
// mutex for us to be done.
|
||||
os_mutex_lock(&htd->unlocked_between_frames);
|
||||
|
||||
if (htd->tracking_should_record_dataset == false) {
|
||||
// Then we're starting up the pipeline.
|
||||
HT_INFO(htd, "Starting dataset recording!");
|
||||
|
||||
|
||||
const char *source_name = "source_name";
|
||||
char pipeline_string[2048];
|
||||
|
||||
/*
|
||||
None (0) – No preset
|
||||
ultrafast (1) – ultrafast
|
||||
superfast (2) – superfast
|
||||
veryfast (3) – veryfast
|
||||
faster (4) – faster
|
||||
fast (5) – fast
|
||||
medium (6) – medium
|
||||
slow (7) – slow
|
||||
slower (8) – slower
|
||||
veryslow (9) – veryslow
|
||||
placebo (10) – placebo
|
||||
*/
|
||||
|
||||
#if 0
|
||||
snprintf(pipeline_string, //
|
||||
sizeof(pipeline_string), //
|
||||
"appsrc name=\"%s\" ! "
|
||||
"queue ! "
|
||||
"videoconvert ! "
|
||||
"queue ! "
|
||||
"x264enc pass=qual quantizer=0 tune=film bitrate=\"%s\" speed-preset=\"%s\" ! "
|
||||
"h264parse ! "
|
||||
"queue ! "
|
||||
"mp4mux ! "
|
||||
"filesink location=\"%s\"",
|
||||
source_name, "16384", "fast", "/tmp/moses.mp4");
|
||||
#elif 1
|
||||
snprintf(pipeline_string, //
|
||||
sizeof(pipeline_string), //
|
||||
"appsrc name=\"%s\" ! "
|
||||
"queue ! "
|
||||
"videoconvert ! "
|
||||
"queue ! "
|
||||
"x264enc pass=quant quantizer=20 tune=\"film\" speed-preset=\"veryfast\" ! "
|
||||
"h264parse ! "
|
||||
"queue ! "
|
||||
"matroskamux ! "
|
||||
"filesink location=\"%s\"",
|
||||
source_name, "/tmp/moses.mkv");
|
||||
#elif 1
|
||||
snprintf(pipeline_string, //
|
||||
sizeof(pipeline_string), //
|
||||
"appsrc name=\"%s\" ! "
|
||||
"queue ! "
|
||||
"videoconvert ! "
|
||||
"x265enc ! "
|
||||
"h265parse ! "
|
||||
"matroskamux ! "
|
||||
"filesink location=\"%s\"",
|
||||
source_name, "/tmp/moses.mkv");
|
||||
#endif
|
||||
|
||||
gstreamer_pipeline_create_from_string(&htd->gst.xfctx, pipeline_string, &htd->gst.gp);
|
||||
|
||||
gstreamer_sink_create_with_pipeline(htd->gst.gp, 2560, 800, XRT_FORMAT_R8G8B8, source_name,
|
||||
&htd->gst.gs, &htd->gst.sink);
|
||||
gstreamer_pipeline_play(htd->gst.gp);
|
||||
|
||||
|
||||
htd->gst.output_root = cJSON_CreateObject();
|
||||
htd->gst.output_array = cJSON_CreateArray();
|
||||
cJSON_AddItemToObject(htd->gst.output_root, "hand_array", htd->gst.output_array);
|
||||
|
||||
strcpy(htd->gui.start_json_record.label, "Stop recording and save dataset!");
|
||||
htd->gst.current_index = 0;
|
||||
htd->tracking_should_record_dataset = true;
|
||||
|
||||
} else {
|
||||
// Then the pipeline was created sometime in the past and we have to destroy it + save everything to a
|
||||
// file.
|
||||
|
||||
gstreamer_pipeline_stop(htd->gst.gp);
|
||||
|
||||
xrt_frame_context_destroy_nodes(&htd->gst.xfctx);
|
||||
|
||||
|
||||
cJSON_AddNumberToObject(htd->gst.output_root, "num_frames", htd->gst.current_index);
|
||||
cJSON_AddNumberToObject(htd->gst.output_root, "length_ns", htd->gst.last_frame_ns);
|
||||
const char *string = cJSON_Print(htd->gst.output_root);
|
||||
FILE *fp;
|
||||
fp = fopen("/tmp/moses.json", "w");
|
||||
fprintf(fp, "%s", string);
|
||||
fclose(fp);
|
||||
cJSON_Delete(htd->gst.output_root);
|
||||
|
||||
strcpy(htd->gui.start_json_record.label, "Start recording dataset!");
|
||||
htd->tracking_should_record_dataset = false;
|
||||
}
|
||||
|
||||
// We're done; let the hand tracker go about its business
|
||||
os_mutex_unlock(&htd->unlocked_between_frames);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
on_video_device(struct xrt_prober *xp,
|
||||
struct xrt_prober_device *pdev,
|
||||
|
@ -266,7 +508,6 @@ on_video_device(struct xrt_prober *xp,
|
|||
if (product != NULL && manufacturer != NULL) {
|
||||
if ((strcmp(product, "3D Camera") == 0) && (strcmp(manufacturer, "Etron Technology, Inc.") == 0)) {
|
||||
xrt_prober_open_video_device(xp, pdev, &htd->camera.xfctx, &htd->camera.xfs);
|
||||
htd->found_camera = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -284,13 +525,13 @@ ht_sink_push_frame(struct xrt_frame_sink *xs, struct xrt_frame *xf)
|
|||
assert(xf != NULL);
|
||||
|
||||
if (!htd->tracking_should_die) {
|
||||
os_mutex_lock(&htd->dying_breath);
|
||||
os_mutex_lock(&htd->unlocked_between_frames);
|
||||
|
||||
xrt_frame_reference(&htd->frame_for_process, xf);
|
||||
htRunAlgorithm(htd);
|
||||
xrt_frame_reference(&htd->frame_for_process, NULL); // Could let go of it a little earlier but nah
|
||||
|
||||
os_mutex_unlock(&htd->dying_breath);
|
||||
os_mutex_unlock(&htd->unlocked_between_frames);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -328,9 +569,9 @@ static void
|
|||
ht_device_get_hand_tracking(struct xrt_device *xdev,
|
||||
enum xrt_input_name name,
|
||||
uint64_t at_timestamp_ns,
|
||||
struct xrt_hand_joint_set *out_value)
|
||||
struct xrt_hand_joint_set *out_value,
|
||||
uint64_t *out_timestamp_ns)
|
||||
{
|
||||
// Note! Currently, this totally ignores at_timestamp_ns. We need a better interface.
|
||||
struct ht_device *htd = ht_device(xdev);
|
||||
|
||||
if (name != XRT_INPUT_GENERIC_HAND_TRACKING_LEFT && name != XRT_INPUT_GENERIC_HAND_TRACKING_RIGHT) {
|
||||
|
@ -343,6 +584,8 @@ ht_device_get_hand_tracking(struct xrt_device *xdev,
|
|||
|
||||
os_mutex_lock(&htd->openxr_hand_data_mediator);
|
||||
memcpy(out_value, &htd->hands_for_openxr[hand_index], sizeof(struct xrt_hand_joint_set));
|
||||
// Instead of pose-predicting, we tell the caller that this joint set is a little old
|
||||
*out_timestamp_ns = htd->hands_for_openxr_timestamp;
|
||||
os_mutex_unlock(&htd->openxr_hand_data_mediator);
|
||||
}
|
||||
|
||||
|
@ -354,22 +597,14 @@ ht_device_destroy(struct xrt_device *xdev)
|
|||
|
||||
|
||||
xrt_frame_context_destroy_nodes(&htd->camera.xfctx);
|
||||
#ifdef EXPERIMENTAL_DATASET_RECORDING
|
||||
xrt_frame_context_destroy_nodes(&htd->gst.xfctx);
|
||||
#endif
|
||||
htd->tracking_should_die = true;
|
||||
|
||||
// Lock this mutex so we don't try to free things as they're being used on the last iteration
|
||||
os_mutex_lock(&htd->dying_breath);
|
||||
os_mutex_lock(&htd->unlocked_between_frames);
|
||||
destroyOnnx(htd);
|
||||
#if defined(JSON_OUTPUT)
|
||||
const char *string = cJSON_Print(htd->output_root);
|
||||
FILE *fp;
|
||||
fp = fopen("/1/2handtrack/aug12.json", "w");
|
||||
|
||||
|
||||
fprintf(fp, "%s", string);
|
||||
fclose(fp);
|
||||
cJSON_Delete(htd->output_root);
|
||||
#endif
|
||||
|
||||
// Remove the variable tracking.
|
||||
u_var_remove_root(htd);
|
||||
|
||||
|
@ -389,6 +624,7 @@ ht_device_destroy(struct xrt_device *xdev)
|
|||
extern "C" struct xrt_device *
|
||||
ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *calib)
|
||||
{
|
||||
enum ht_run_type run_type = HT_RUN_TYPE_VALVE_INDEX;
|
||||
XRT_TRACE_MARKER();
|
||||
enum u_device_alloc_flags flags = U_DEVICE_ALLOC_NO_FLAGS;
|
||||
|
||||
|
@ -401,28 +637,36 @@ ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *cali
|
|||
// Setup logging first. We like logging.
|
||||
htd->ll = debug_get_log_option_ht_log();
|
||||
|
||||
// Get configuration
|
||||
/*
|
||||
* Get configuration
|
||||
*/
|
||||
|
||||
assert(calib != NULL);
|
||||
htd->run_type = run_type;
|
||||
getCalibration(htd, calib);
|
||||
// Set defaults - most people won't have a config json and it won't get past here.
|
||||
userConfigSetDefaults(htd);
|
||||
getUserConfig(htd);
|
||||
getModelsFolder(htd);
|
||||
|
||||
// Add xrt_frame_sink and xrt_frame_node implementations
|
||||
/*
|
||||
* Add our xrt_frame_sink and xrt_frame_node implementations to ourselves
|
||||
*/
|
||||
|
||||
htd->sink.push_frame = &ht_sink_push_frame;
|
||||
htd->node.break_apart = &ht_node_break_apart;
|
||||
htd->node.destroy = &ht_node_destroy;
|
||||
|
||||
// Add ourselves to the frame context
|
||||
xrt_frame_context_add(&htd->camera.xfctx, &htd->node);
|
||||
|
||||
|
||||
htd->camera.one_view_size_px.w = 960;
|
||||
htd->camera.one_view_size_px.h = 960;
|
||||
|
||||
htd->camera.prober = xp;
|
||||
htd->camera.xfs = NULL; // paranoia
|
||||
|
||||
xrt_prober_list_video_devices(htd->camera.prober, on_video_device, htd);
|
||||
|
||||
|
||||
if (!htd->found_camera) {
|
||||
if (htd->camera.xfs == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -444,7 +688,7 @@ ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *cali
|
|||
htd->base.tracking_origin->offset.orientation.w = 1.0f;
|
||||
|
||||
os_mutex_init(&htd->openxr_hand_data_mediator);
|
||||
os_mutex_init(&htd->dying_breath);
|
||||
os_mutex_init(&htd->unlocked_between_frames);
|
||||
|
||||
htd->base.update_inputs = ht_device_update_inputs;
|
||||
htd->base.get_hand_tracking = ht_device_get_hand_tracking;
|
||||
|
@ -463,16 +707,8 @@ ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *cali
|
|||
htd->base.position_tracking_supported = true;
|
||||
htd->base.hand_tracking_supported = true;
|
||||
|
||||
#if defined(JSON_OUTPUT)
|
||||
htd->output_root = cJSON_CreateObject();
|
||||
htd->output_array = cJSON_CreateArray();
|
||||
cJSON_AddItemToObject(htd->output_root, "hand_array", htd->output_array);
|
||||
#endif
|
||||
|
||||
struct xrt_frame_sink *tmp = &htd->sink;
|
||||
|
||||
u_var_add_root(htd, "Camera based Hand Tracker", true);
|
||||
u_var_add_ro_text(htd, htd->base.str, "Name");
|
||||
|
||||
// This puts u_sink_create_to_r8g8b8_or_l8 on its own thread, so that nothing gets backed up if it runs slower
|
||||
// than the native camera framerate.
|
||||
|
@ -480,7 +716,7 @@ ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *cali
|
|||
|
||||
// Converts images (we'd expect YUV422 or MJPEG) to R8G8B8. Can take a long time, especially on unoptimized
|
||||
// builds. If it's really slow, triple-check that you built Monado with optimizations!
|
||||
u_sink_create_to_r8g8b8_or_l8(&htd->camera.xfctx, tmp, &tmp);
|
||||
u_sink_create_format_converter(&htd->camera.xfctx, XRT_FORMAT_R8G8B8, tmp, &tmp);
|
||||
|
||||
// Puts the hand tracking code on its own thread, so that nothing upstream of it gets backed up if the hand
|
||||
// tracking code runs slower than the upstream framerate.
|
||||
|
@ -497,7 +733,7 @@ ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *cali
|
|||
uint32_t selected_mode = 0;
|
||||
|
||||
for (; selected_mode < count; selected_mode++) {
|
||||
if (modes[selected_mode].format == htd->runtime_config.desired_format) {
|
||||
if (modes[selected_mode].format == htd->startup_config.desired_format) {
|
||||
found_mode = true;
|
||||
break;
|
||||
}
|
||||
|
@ -510,13 +746,30 @@ ht_device_create(struct xrt_prober *xp, struct t_stereo_camera_calibration *cali
|
|||
|
||||
free(modes);
|
||||
|
||||
u_var_add_root(htd, "Camera-based Hand Tracker", true);
|
||||
|
||||
xrt_fs_stream_start(htd->camera.xfs, tmp, XRT_FS_CAPTURE_TYPE_TRACKING, selected_mode);
|
||||
u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_fc_min, "hand_fc_min");
|
||||
u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_fc_min_d, "hand_fc_min_d");
|
||||
u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_beta, "hand_beta");
|
||||
u_var_add_draggable_f32(htd, &htd->dynamic_config.nms_iou, "nms_iou");
|
||||
u_var_add_draggable_f32(htd, &htd->dynamic_config.nms_threshold, "nms_threshold");
|
||||
u_var_add_draggable_f32(htd, &htd->dynamic_config.new_detection_threshold, "new_detection_threshold");
|
||||
|
||||
#if 0
|
||||
u_var_add_sink(htd, &htd->debug_sink, "Debug visualization");
|
||||
u_var_add_bool(htd, &htd->dynamic_config.scribble_raw_detections, "Scribble raw detections");
|
||||
u_var_add_bool(htd, &htd->dynamic_config.scribble_nms_detections, "Scribble NMS detections");
|
||||
u_var_add_bool(htd, &htd->dynamic_config.scribble_2d_keypoints, "Scribble 2D keypoints");
|
||||
u_var_add_bool(htd, &htd->dynamic_config.scribble_bounding_box, "Scribble bounding box");
|
||||
|
||||
#ifdef EXPERIMENTAL_DATASET_RECORDING
|
||||
htd->gui.start_json_record.ptr = htd;
|
||||
htd->gui.start_json_record.cb = htStartJsonCB;
|
||||
strcpy(htd->gui.start_json_record.label, "Start recording dataset!");
|
||||
u_var_add_button(htd, &htd->gui.start_json_record, "");
|
||||
#endif
|
||||
|
||||
u_var_add_sink_debug(htd, &htd->debug_sink, "i");
|
||||
|
||||
xrt_fs_stream_start(htd->camera.xfs, tmp, XRT_FS_CAPTURE_TYPE_TRACKING, selected_mode);
|
||||
|
||||
HT_DEBUG(htd, "Hand Tracker initialized!");
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ht_interface.h"
|
||||
#include "os/os_threading.h"
|
||||
|
||||
#include "xrt/xrt_device.h"
|
||||
|
@ -28,6 +29,11 @@
|
|||
|
||||
#include "util/u_template_historybuf.hpp"
|
||||
|
||||
#ifdef XRT_HAVE_GST
|
||||
#include "gstreamer/gst_pipeline.h"
|
||||
#include "gstreamer/gst_sink.h"
|
||||
#endif
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include "core/session/onnxruntime_c_api.h"
|
||||
|
@ -35,6 +41,7 @@
|
|||
#include <future>
|
||||
#include <vector>
|
||||
|
||||
using namespace xrt::auxiliary::util;
|
||||
|
||||
DEBUG_GET_ONCE_LOG_OPTION(ht_log, "HT_LOG", U_LOGGING_WARN)
|
||||
|
||||
|
@ -44,7 +51,8 @@ DEBUG_GET_ONCE_LOG_OPTION(ht_log, "HT_LOG", U_LOGGING_WARN)
|
|||
#define HT_WARN(htd, ...) U_LOG_XDEV_IFL_W(&htd->base, htd->ll, __VA_ARGS__)
|
||||
#define HT_ERROR(htd, ...) U_LOG_XDEV_IFL_E(&htd->base, htd->ll, __VA_ARGS__)
|
||||
|
||||
using namespace xrt::auxiliary::util;
|
||||
// #define ht_
|
||||
|
||||
|
||||
// To make clang-tidy happy
|
||||
#define opencv_distortion_param_num 4
|
||||
|
@ -54,11 +62,20 @@ using namespace xrt::auxiliary::util;
|
|||
* Compile-time defines to choose where to get camera frames from and what kind of output to give out
|
||||
*
|
||||
*/
|
||||
#undef JSON_OUTPUT
|
||||
#undef EXPERIMENTAL_DATASET_RECORDING
|
||||
|
||||
#define FCMIN_BBOX_ORIENTATION 3.0f
|
||||
#define FCMIN_D_BB0X_ORIENTATION 10.0f
|
||||
#define BETA_BB0X_ORIENTATION 0.0f
|
||||
|
||||
// #define FCMIN_BBOX_POSITION 15.0f
|
||||
// #define FCMIN_D_BB0X_POSITION 12.0f
|
||||
// #define BETA_BB0X_POSITION 0.3f
|
||||
|
||||
#define FCMIN_BBOX_POSITION 30.0f
|
||||
#define FCMIN_D_BB0X_POSITION 25.0f
|
||||
#define BETA_BB0X_POSITION 0.6f
|
||||
|
||||
#define FCMIN_BBOX 3.0f
|
||||
#define FCMIN_D_BB0X 10.0f
|
||||
#define BETA_BB0X 0.0f
|
||||
|
||||
|
||||
#define FCMIN_HAND 4.0f
|
||||
|
@ -113,6 +130,18 @@ enum HandJoint21Keypoint
|
|||
struct Palm7KP
|
||||
{
|
||||
struct xrt_vec2 kps[7];
|
||||
float confidence; // BETWEEN 0 and 1. okay???? okay????!???
|
||||
};
|
||||
|
||||
struct DetectionModelOutput
|
||||
{
|
||||
float rotation;
|
||||
float size;
|
||||
xrt_vec2 center;
|
||||
Palm7KP palm;
|
||||
|
||||
cv::Matx23f warp_there;
|
||||
cv::Matx23f warp_back;
|
||||
};
|
||||
|
||||
// To keep you on your toes. *Don't* think the 2D hand is the same as the 3D!
|
||||
|
@ -127,38 +156,51 @@ struct Hand3D
|
|||
struct xrt_vec3 kps[21];
|
||||
float y_disparity_error;
|
||||
float flow_error;
|
||||
int idx_l;
|
||||
int idx_r;
|
||||
bool rejected_by_smush; // init to false.
|
||||
|
||||
float handedness;
|
||||
uint64_t timestamp;
|
||||
};
|
||||
|
||||
|
||||
struct DetectionModelOutput
|
||||
{
|
||||
float rotation;
|
||||
float size;
|
||||
xrt_vec2 center;
|
||||
xrt_vec2 wrist;
|
||||
cv::Matx23f warp_there;
|
||||
cv::Matx23f warp_back;
|
||||
};
|
||||
|
||||
struct HandHistory3D
|
||||
{
|
||||
// Index 0 is current frame, index 1 is last frame, index 2 is second to last frame.
|
||||
// No particular reason to keep the last 5 frames. we only really only use the current and last one.
|
||||
float handedness;
|
||||
HistoryBuffer<Hand3D, 5> last_hands;
|
||||
bool have_prev_hand = false;
|
||||
double prev_dy;
|
||||
uint64_t prev_ts_for_alpha; // also in last_hands_unfiltered[0] but go away.
|
||||
|
||||
uint64_t first_ts;
|
||||
uint64_t prev_filtered_ts;
|
||||
|
||||
HistoryBuffer<Hand3D, 10> last_hands_unfiltered;
|
||||
HistoryBuffer<Hand3D, 10> last_hands_filtered;
|
||||
|
||||
// Euro filter for 21kps.
|
||||
m_filter_euro_vec3 filters[21];
|
||||
int uuid;
|
||||
};
|
||||
|
||||
struct HandHistory2DBBox
|
||||
{
|
||||
m_filter_euro_vec2 m_filter_wrist;
|
||||
m_filter_euro_vec2 m_filter_middle;
|
||||
// Ugh, I should definitely iterate these somehow...
|
||||
// m_filter_euro_vec2 m_filter_wrist;
|
||||
// m_filter_euro_vec2 m_filter_index;
|
||||
// m_filter_euro_vec2 m_filter_middle;
|
||||
// m_filter_euro_vec2 m_filter_pinky;
|
||||
|
||||
m_filter_euro_vec2 m_filter_center;
|
||||
m_filter_euro_vec2 m_filter_direction;
|
||||
|
||||
HistoryBuffer<xrt_vec2, 50> wrist_unfiltered;
|
||||
HistoryBuffer<xrt_vec2, 50> index_unfiltered;
|
||||
HistoryBuffer<xrt_vec2, 50> middle_unfiltered;
|
||||
HistoryBuffer<xrt_vec2, 50> pinky_unfiltered;
|
||||
bool htAlgorithm_approves = false;
|
||||
};
|
||||
|
||||
|
||||
|
@ -202,6 +244,39 @@ struct ht_view
|
|||
Hand2D (*run_keypoint_model)(struct ht_view *htv, cv::Mat img);
|
||||
};
|
||||
|
||||
enum ht_detection_scribble
|
||||
{
|
||||
HT_DETECTION_SCRIBBLE_ALL,
|
||||
HT_DETECTION_SCRIBBLE_SOME,
|
||||
HT_DETECTION_SCRIBBLE_NONE
|
||||
};
|
||||
|
||||
struct ht_dynamic_config
|
||||
{
|
||||
char name[64];
|
||||
struct u_var_draggable_f32 hand_fc_min;
|
||||
struct u_var_draggable_f32 hand_fc_min_d;
|
||||
struct u_var_draggable_f32 hand_beta;
|
||||
struct u_var_draggable_f32 max_vel;
|
||||
struct u_var_draggable_f32 max_acc;
|
||||
struct u_var_draggable_f32 nms_iou;
|
||||
struct u_var_draggable_f32 nms_threshold;
|
||||
struct u_var_draggable_f32 new_detection_threshold;
|
||||
bool scribble_raw_detections;
|
||||
bool scribble_nms_detections;
|
||||
bool scribble_2d_keypoints;
|
||||
bool scribble_bounding_box;
|
||||
};
|
||||
|
||||
struct ht_startup_config
|
||||
{
|
||||
bool palm_detection_use_mediapipe = false;
|
||||
bool keypoint_estimation_use_mediapipe = false;
|
||||
enum xrt_format desired_format;
|
||||
char model_slug[1024];
|
||||
};
|
||||
|
||||
// This is all ad-hoc! Review very welcome!
|
||||
struct ht_device
|
||||
{
|
||||
struct xrt_device base;
|
||||
|
@ -211,7 +286,7 @@ struct ht_device
|
|||
struct xrt_frame_sink sink;
|
||||
struct xrt_frame_node node;
|
||||
|
||||
struct xrt_frame_sink *debug_sink; // this must be bad.
|
||||
struct u_sink_debug debug_sink; // this must be bad.
|
||||
|
||||
|
||||
struct
|
||||
|
@ -227,19 +302,39 @@ struct ht_device
|
|||
struct xrt_size one_view_size_px;
|
||||
} camera;
|
||||
|
||||
bool found_camera;
|
||||
|
||||
|
||||
#if defined(EXPERIMENTAL_DATASET_RECORDING)
|
||||
struct
|
||||
{
|
||||
struct u_var_button start_json_record;
|
||||
} gui;
|
||||
struct
|
||||
{
|
||||
struct gstreamer_pipeline *gp;
|
||||
struct gstreamer_sink *gs;
|
||||
struct xrt_frame_sink *sink;
|
||||
struct xrt_frame_context xfctx;
|
||||
uint64_t offset_ns;
|
||||
uint64_t last_frame_ns;
|
||||
uint64_t current_index;
|
||||
|
||||
cJSON *output_root;
|
||||
cJSON *output_array;
|
||||
} gst;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
const OrtApi *ort_api;
|
||||
OrtEnv *ort_env;
|
||||
|
||||
struct xrt_frame *frame_for_process;
|
||||
cv::Mat *mat_for_process;
|
||||
|
||||
struct ht_view views[2];
|
||||
|
||||
// These are all we need - R and T don't aren't of interest to us.
|
||||
// [2];
|
||||
float baseline;
|
||||
|
||||
struct xrt_quat stereo_camera_to_left_camera;
|
||||
|
||||
uint64_t current_frame_timestamp; // SUPER dumb.
|
||||
|
@ -248,25 +343,25 @@ struct ht_device
|
|||
|
||||
struct os_mutex openxr_hand_data_mediator;
|
||||
struct xrt_hand_joint_set hands_for_openxr[2];
|
||||
uint64_t hands_for_openxr_timestamp;
|
||||
|
||||
// Only change these when you have unlocked_between_frames, ie. when the hand tracker is between frames.
|
||||
bool tracking_should_die;
|
||||
struct os_mutex dying_breath;
|
||||
bool tracking_should_record_dataset;
|
||||
struct os_mutex unlocked_between_frames;
|
||||
|
||||
// Change this whenever you want
|
||||
bool debug_scribble = true;
|
||||
|
||||
ht_run_type run_type;
|
||||
|
||||
#if defined(JSON_OUTPUT)
|
||||
cJSON *output_root;
|
||||
cJSON *output_array;
|
||||
#endif
|
||||
|
||||
struct
|
||||
{
|
||||
bool palm_detection_use_mediapipe;
|
||||
bool keypoint_estimation_use_mediapipe;
|
||||
enum xrt_format desired_format;
|
||||
char model_slug[1024];
|
||||
} runtime_config;
|
||||
|
||||
struct ht_startup_config startup_config;
|
||||
struct ht_dynamic_config dynamic_config;
|
||||
|
||||
|
||||
int dynamic_config_to_use;
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include "math/m_vec3.h"
|
||||
|
||||
#include "ht_driver.hpp"
|
||||
#include "util/u_time.h"
|
||||
#include "xrt/xrt_defines.h"
|
||||
|
||||
|
||||
const int num_real_joints = 21;
|
||||
|
@ -21,35 +23,33 @@ const int num_real_joints = 21;
|
|||
static float
|
||||
errHandDisparity(Hand2D *left_rays, Hand2D *right_rays)
|
||||
{
|
||||
float error = 0.0f;
|
||||
float error_y_diff = 0.0f;
|
||||
for (int i = 0; i < 21; i++) {
|
||||
float diff = fabsf(left_rays->kps[i].y - right_rays->kps[i].y);
|
||||
float diff_y = fabsf(left_rays->kps[i].y - right_rays->kps[i].y);
|
||||
// Big question about what's the best loss function. Gut feeling was "I should be using sum of squared
|
||||
// errors" but I don't really know. Using just sum of errors for now. Ideally it'd also be not very
|
||||
// sensitive to one or two really bad outliers.
|
||||
error += diff;
|
||||
error_y_diff += diff_y;
|
||||
}
|
||||
return error;
|
||||
// U_LOG_E("stereo camera err is %f, y_disparity is %f", err_stereo_camera, error_y_diff);
|
||||
return error_y_diff;
|
||||
}
|
||||
|
||||
|
||||
static float
|
||||
errHandFlow(Hand3D *prev, Hand3D *next)
|
||||
sumOfHandJointDistances(Hand3D *one, Hand3D *two)
|
||||
{
|
||||
float error = 0.0f;
|
||||
float dist = 0.0f;
|
||||
for (int i = 0; i < num_real_joints; i++) {
|
||||
xrt_vec3 first = prev->kps[i];
|
||||
xrt_vec3 second = next->kps[i];
|
||||
error += m_vec3_len(m_vec3_sub(second, first));
|
||||
dist += m_vec3_len(one->kps[i] - two->kps[i]);
|
||||
}
|
||||
return error;
|
||||
return dist;
|
||||
}
|
||||
|
||||
static float
|
||||
errHandHistory(HandHistory3D *history_hand, Hand3D *present_hand)
|
||||
{
|
||||
// Remember we never have to deal with an empty hand. Can always access the last element.
|
||||
return errHandFlow(history_hand->last_hands[0], present_hand);
|
||||
return sumOfHandJointDistances(history_hand->last_hands_unfiltered[0], present_hand);
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,7 +59,7 @@ applyJointWidths(struct xrt_hand_joint_set *set)
|
|||
// Thanks to Nick Klingensmith for this idea
|
||||
struct xrt_hand_joint_value *gr = set->values.hand_joint_set_default;
|
||||
|
||||
const float hand_joint_size[5] = {0.022f, 0.021f, 0.022f, 0.021f, 0.02f};
|
||||
const float finger_joint_size[5] = {0.022f, 0.021f, 0.022f, 0.021f, 0.02f};
|
||||
const float hand_finger_size[5] = {1.0f, 1.0f, 0.83f, 0.75f};
|
||||
|
||||
const float thumb_size[4] = {0.016f, 0.014f, 0.012f, 0.012f};
|
||||
|
@ -73,10 +73,15 @@ applyJointWidths(struct xrt_hand_joint_set *set)
|
|||
for (int finger = 0; finger < 4; finger++) {
|
||||
for (int joint = 0; joint < 5; joint++) {
|
||||
int set_idx = finger * 5 + joint + XRT_HAND_JOINT_INDEX_METACARPAL;
|
||||
float val = hand_joint_size[joint] * hand_finger_size[finger] * .5 * mul;
|
||||
float val = finger_joint_size[joint] * hand_finger_size[finger] * .5 * mul;
|
||||
gr[set_idx].radius = val;
|
||||
}
|
||||
}
|
||||
// The radius of each joint is the distance from the joint to the skin in meters. -OpenXR spec.
|
||||
set->values.hand_joint_set_default[XRT_HAND_JOINT_PALM].radius =
|
||||
.032f * .5f; // Measured my palm thickness with calipers
|
||||
set->values.hand_joint_set_default[XRT_HAND_JOINT_WRIST].radius =
|
||||
.040f * .5f; // Measured my wrist thickness with calipers
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -84,11 +89,11 @@ applyThumbIndexDrag(Hand3D *hand)
|
|||
{
|
||||
// TERRIBLE HACK.
|
||||
// Puts the thumb and pointer a bit closer together to be better at triggering XR clients' pinch detection.
|
||||
const float max_radius = 0.09; // 9 centimeters.
|
||||
const float max_radius = 0.05;
|
||||
const float min_radius = 0.00;
|
||||
|
||||
// no min drag, min drag always 0.
|
||||
const float max_drag = 0.75f;
|
||||
const float max_drag = 0.85f;
|
||||
|
||||
xrt_vec3 thumb = hand->kps[THMB_TIP];
|
||||
xrt_vec3 index = hand->kps[INDX_TIP];
|
||||
|
@ -243,7 +248,7 @@ static void
|
|||
handednessHandHistory3D(HandHistory3D *history)
|
||||
{
|
||||
|
||||
float inter = handednessJointSet(history->last_hands[0]);
|
||||
float inter = handednessJointSet(history->last_hands_unfiltered[0]);
|
||||
|
||||
if ((fabsf(inter) > 0.3f) || (fabsf(history->handedness) < 0.3f)) {
|
||||
history->handedness += inter;
|
||||
|
@ -264,47 +269,163 @@ handEuroFiltersInit(HandHistory3D *history, double fc_min, double fc_min_d, doub
|
|||
}
|
||||
}
|
||||
|
||||
static Hand3D
|
||||
handEuroFiltersRun(HandHistory3D *history)
|
||||
static double
|
||||
calc_smoothing_alpha(double Fc, double dt)
|
||||
{
|
||||
/* Calculate alpha = (1 / (1 + tau/dt)) where tau = 1.0 / (2 * pi * Fc),
|
||||
* this is a straight rearrangement with fewer divisions */
|
||||
double r = 2.0 * M_PI * Fc * dt;
|
||||
return r / (r + 1.0);
|
||||
}
|
||||
|
||||
static double
|
||||
exp_smooth(double alpha, double y, double prev_y)
|
||||
{
|
||||
return alpha * y + (1.0 - alpha) * prev_y;
|
||||
}
|
||||
|
||||
void
|
||||
handEuroFiltersRun(struct ht_device *htd, HandHistory3D *f, Hand3D *out_hand)
|
||||
{
|
||||
// Assume present hand is in element 0!
|
||||
Hand3D hand;
|
||||
for (int i = 0; i < 21; i++) {
|
||||
m_filter_euro_vec3_run(&history->filters[i], history->last_hands[0]->timestamp,
|
||||
&history->last_hands[0]->kps[i], &hand.kps[i]);
|
||||
#if 0
|
||||
// float vals[4] = {0.5, 0.33, 0.1, 0.07};
|
||||
float vals[4] = {0.9, 0.09, 0.009, 0.001};
|
||||
int m = f->last_hands_unfiltered.length-1;
|
||||
double ts_out = (vals[0] * (double)f->last_hands_unfiltered[std::min(m,0)]->timestamp) +
|
||||
(vals[1] * (double)f->last_hands_unfiltered[std::min(m,1)]->timestamp) +
|
||||
(vals[2] * (double)f->last_hands_unfiltered[std::min(m,2)]->timestamp) +
|
||||
(vals[3] * (double)f->last_hands_unfiltered[std::min(m,3)]->timestamp);
|
||||
out_hand->timestamp = (uint64_t)ts_out;
|
||||
|
||||
for (int kp_idx = 0; kp_idx < 21; kp_idx++) {
|
||||
for (int hist_idx = 0; hist_idx < 4; hist_idx++) {
|
||||
float *in_y_arr = (float *)&f->last_hands_unfiltered[std::min(m,hist_idx)]->kps[kp_idx];
|
||||
float *out_y_arr = (float *)&out_hand->kps[kp_idx];
|
||||
for (int i = 0; i < 3; i++) {
|
||||
out_y_arr[i] += in_y_arr[i] * vals[hist_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
return hand;
|
||||
#elif 0
|
||||
for (int i = 0; i < 21; i++) {
|
||||
m_filter_euro_vec3_run(&f->filters[i], f->last_hands_unfiltered[0]->timestamp,
|
||||
&f->last_hands_unfiltered[0]->kps[i], &out_hand->kps[i]);
|
||||
}
|
||||
// conspicuously wrong!
|
||||
out_hand->timestamp = f->last_hands_unfiltered[0]->timestamp;
|
||||
#else
|
||||
|
||||
if (!f->have_prev_hand) {
|
||||
f->last_hands_filtered.push(*f->last_hands_unfiltered[0]);
|
||||
uint64_t ts = f->last_hands_unfiltered[0]->timestamp;
|
||||
f->prev_ts_for_alpha = ts;
|
||||
f->first_ts = ts;
|
||||
f->prev_filtered_ts = ts;
|
||||
f->prev_dy = 0;
|
||||
f->have_prev_hand = true;
|
||||
*out_hand = *f->last_hands_unfiltered[0];
|
||||
}
|
||||
uint64_t ts = f->last_hands_unfiltered[0]->timestamp;
|
||||
double dt, alpha_d;
|
||||
dt = (double)(ts - f->prev_ts_for_alpha) / U_TIME_1S_IN_NS;
|
||||
|
||||
double abs_dy =
|
||||
(sumOfHandJointDistances(f->last_hands_unfiltered[0], f->last_hands_filtered[0]) / 21.0f) * 0.7f;
|
||||
alpha_d = calc_smoothing_alpha(htd->dynamic_config.hand_fc_min_d.val, dt);
|
||||
|
||||
double alpha, fc_cutoff;
|
||||
f->prev_dy = exp_smooth(alpha_d, abs_dy, f->prev_dy);
|
||||
|
||||
fc_cutoff = htd->dynamic_config.hand_fc_min.val + htd->dynamic_config.hand_beta.val * f->prev_dy;
|
||||
alpha = calc_smoothing_alpha(fc_cutoff, dt);
|
||||
HT_DEBUG(htd, "dt is %f, abs_dy is %f, alpha is %f", dt, abs_dy, alpha);
|
||||
|
||||
for (int i = 0; i < 21; i++) {
|
||||
out_hand->kps[i].x =
|
||||
exp_smooth(alpha, f->last_hands_unfiltered[0]->kps[i].x, f->last_hands_filtered[0]->kps[i].x);
|
||||
out_hand->kps[i].y =
|
||||
exp_smooth(alpha, f->last_hands_unfiltered[0]->kps[i].y, f->last_hands_filtered[0]->kps[i].y);
|
||||
out_hand->kps[i].z =
|
||||
exp_smooth(alpha, f->last_hands_unfiltered[0]->kps[i].z, f->last_hands_filtered[0]->kps[i].z);
|
||||
}
|
||||
double prev_ts_offset = (double)(f->prev_filtered_ts - f->first_ts);
|
||||
double current_ts_offset = (double)(ts - f->first_ts);
|
||||
double new_filtered_ts_offset = exp_smooth(alpha, current_ts_offset, prev_ts_offset);
|
||||
uint64_t new_filtered_ts = (uint64_t)(new_filtered_ts_offset) + f->first_ts;
|
||||
out_hand->timestamp = new_filtered_ts;
|
||||
f->prev_filtered_ts = out_hand->timestamp;
|
||||
f->prev_ts_for_alpha = ts; // NOT the filtered timestamp. NO.
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
rejectTooFarOrTooClose(Hand3D *hand)
|
||||
rejectTooFar(struct ht_device *htd, Hand3D *hand)
|
||||
{
|
||||
const float max_dist_from_camera_sqrd =
|
||||
2.f * 2.f; // If you ever run into somebody with 2-meter-long arms, let me know!
|
||||
const float min_dist_from_camera_sqrd = 0.05f * 0.05f;
|
||||
const float max_dist = 1.0f; // this sucks too - make it bigger if you can.
|
||||
const float max_dist_from_camera_sqrd = max_dist * max_dist;
|
||||
for (int i = 0; i < 21; i++) {
|
||||
xrt_vec3 pos = hand->kps[i];
|
||||
float len = m_vec3_len_sqrd(pos); // Faster.
|
||||
if (len > max_dist_from_camera_sqrd) {
|
||||
return false;
|
||||
U_LOG_W("Hand is somewhere we wouldn't expect!");
|
||||
}
|
||||
if (len < min_dist_from_camera_sqrd) {
|
||||
return false;
|
||||
}
|
||||
if (pos.z > 0.0f) { // remember negative-Z is forward!
|
||||
return false;
|
||||
goto reject;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
reject:
|
||||
HT_TRACE(htd, "Rejected too far!");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
rejectBadHand(Hand3D *hand)
|
||||
rejectTooClose(struct ht_device *htd, Hand3D *hand)
|
||||
{
|
||||
if (!rejectTooFarOrTooClose(hand)) {
|
||||
return false;
|
||||
const float min_dist = 0.12f; // Be a bit aggressive here - it's nice to not let people see our tracking fail
|
||||
// when the hands are way too close
|
||||
const float min_dist_from_camera_sqrd = min_dist * min_dist;
|
||||
|
||||
for (int i = 0; i < 21; i++) {
|
||||
xrt_vec3 pos = hand->kps[i];
|
||||
float len = m_vec3_len_sqrd(pos); // Faster.
|
||||
if (len < min_dist_from_camera_sqrd) {
|
||||
goto reject;
|
||||
}
|
||||
if (pos.z > min_dist) { // remember negative-Z is forward!
|
||||
goto reject;
|
||||
}
|
||||
}
|
||||
// todo: add lots of checks! finger length, fingers bending backwards, etc.
|
||||
return true;
|
||||
|
||||
reject:
|
||||
HT_TRACE(htd, "Rejected too close!");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
rejectTinyPalm(struct ht_device *htd, Hand3D *hand)
|
||||
{
|
||||
// This one sucks, because some people really have tiny hands. If at some point you can stop using it, stop
|
||||
// using it.
|
||||
// Weird scoping so that we can still do gotos
|
||||
|
||||
{
|
||||
float len = m_vec3_len(hand->kps[WRIST] - hand->kps[INDX_PXM]);
|
||||
if ((len < 0.03f || len > 0.25f)) {
|
||||
goto reject;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
float len = m_vec3_len(hand->kps[WRIST] - hand->kps[MIDL_PXM]);
|
||||
if (len < 0.03f || len > 0.25f) {
|
||||
goto reject;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
reject:
|
||||
HT_TRACE(htd, "Rejected because too big or too small!");
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -111,8 +111,8 @@ blackbar(cv::Mat &in, cv::Mat &out, xrt_size out_size)
|
|||
// Easy to think about, always right, but pretty slow:
|
||||
// Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back.
|
||||
// Then just warpAffine() it.
|
||||
// More expensive on the computer, but cheap in programmer time - I'm somewhat allergic to thinking in pixel
|
||||
// coordinates. We can come back and optimize later.
|
||||
// Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize
|
||||
// later.
|
||||
|
||||
// Do the black bars need to be on top and bottom, or on left and right?
|
||||
float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1
|
||||
|
@ -153,6 +153,8 @@ blackbar(cv::Mat &in, cv::Mat &out, xrt_size out_size)
|
|||
ret(0,0) = scale_from_out_to_in; ret(0,1) = 0.0f; ret(0,2) = 0.0f;
|
||||
ret(1,0) = 0.0f; ret(1,1) = scale_from_out_to_in; ret(1,2) = 0.0f;
|
||||
// clang-format on
|
||||
cv::imshow("hi", out);
|
||||
cv::waitKey(1);
|
||||
return ret;
|
||||
}
|
||||
assert(!"Uh oh! Unimplemented!");
|
||||
|
@ -177,13 +179,19 @@ transformVecBy2x3(T in, cv::Matx23f warp_back)
|
|||
|
||||
//! Draw some dots. Factors out some boilerplate.
|
||||
static void
|
||||
handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, int type)
|
||||
handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type)
|
||||
{
|
||||
cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, 1.0f, 1.0f), type);
|
||||
cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type);
|
||||
}
|
||||
|
||||
static DetectionModelOutput
|
||||
rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 middle, xrt_vec2 wrist, DetectionModelOutput *out)
|
||||
static void
|
||||
centerAndRotationFromJoints(struct ht_view *htv,
|
||||
const xrt_vec2 *wrist,
|
||||
const xrt_vec2 *index,
|
||||
const xrt_vec2 *middle,
|
||||
const xrt_vec2 *little,
|
||||
xrt_vec2 *out_center,
|
||||
xrt_vec2 *out_wrist_to_middle)
|
||||
{
|
||||
// Close to what Mediapipe does, but slightly different - just uses the middle proximal instead of "estimating"
|
||||
// it from the pinky and index.
|
||||
|
@ -193,17 +201,25 @@ rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 middle, xrt_vec2 wrist, Dete
|
|||
// Feel free to look at the way MP does it, you can see it's different.
|
||||
// https://github.com/google/mediapipe/blob/master/mediapipe/modules/holistic_landmark/calculators/hand_detections_from_pose_to_rects_calculator.cc
|
||||
|
||||
struct xrt_vec2 hand_center = middle; // Middle proximal, straight-up.
|
||||
// struct xrt_vec2 hand_center = m_vec2_mul_scalar(middle, 0.5) + m_vec2_mul_scalar(index, 0.5*(2.0f/3.0f)) +
|
||||
// m_vec2_mul_scalar(little, 0.5f*((1.0f/3.0f))); // Middle proximal, straight-up.
|
||||
// U_LOG_E("%f %f %f %f %f %f %f %f ", wrist.x, wrist.y, index.x, index.y, middle.x, middle.y, little.x,
|
||||
// little.y);
|
||||
*out_center = m_vec2_lerp(*middle, m_vec2_lerp(*index, *little, 1.0f / 3.0f), 0.25f);
|
||||
|
||||
struct xrt_vec2 wrist_to_middle = middle - wrist;
|
||||
*out_wrist_to_middle = *out_center - *wrist;
|
||||
}
|
||||
|
||||
float box_size = m_vec2_len(wrist_to_middle) * 2.0f * 1.7f;
|
||||
static DetectionModelOutput
|
||||
rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 center, xrt_vec2 wrist_to_middle, DetectionModelOutput *out)
|
||||
{
|
||||
float box_size = m_vec2_len(wrist_to_middle) * 2.0f * 1.73f;
|
||||
|
||||
double rot = atan2(wrist_to_middle.x, wrist_to_middle.y) * (-180.0f / M_PI);
|
||||
|
||||
out->rotation = rot;
|
||||
out->size = box_size;
|
||||
out->center = hand_center;
|
||||
out->center = center;
|
||||
|
||||
cv::RotatedRect rrect =
|
||||
cv::RotatedRect(cv::Point2f(out->center.x, out->center.y), cv::Size2f(out->size, out->size), out->rotation);
|
||||
|
@ -211,10 +227,14 @@ rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 middle, xrt_vec2 wrist, Dete
|
|||
|
||||
cv::Point2f vertices[4];
|
||||
rrect.points(vertices);
|
||||
if (htv->htd->debug_scribble) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
line(htv->debug_out_to_this, vertices[i], vertices[(i + 1) % 4], cv::Scalar(i * 63, i * 63, 0),
|
||||
2);
|
||||
if (htv->htd->debug_scribble && htv->htd->dynamic_config.scribble_bounding_box) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
cv::Scalar b = cv::Scalar(10, 30, 30);
|
||||
if (i == 3) {
|
||||
b = cv::Scalar(255, 255, 0);
|
||||
}
|
||||
cv::line(htv->debug_out_to_this, vertices[i], vertices[(i + 1) % 4], b, 2);
|
||||
}
|
||||
}
|
||||
// topright is 0. bottomright is 1. bottomleft is 2. topleft is 3.
|
||||
|
||||
|
@ -225,7 +245,7 @@ rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 middle, xrt_vec2 wrist, Dete
|
|||
out->warp_there = getAffineTransform(src_tri, dest_tri);
|
||||
out->warp_back = getAffineTransform(dest_tri, src_tri);
|
||||
|
||||
out->wrist = wrist;
|
||||
// out->wrist = wrist;
|
||||
|
||||
return *out;
|
||||
}
|
||||
|
|
|
@ -12,13 +12,26 @@
|
|||
|
||||
#include "xrt/xrt_device.h"
|
||||
|
||||
#include "tracking/t_tracking.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct t_stereo_camera_calibration;
|
||||
enum ht_run_type
|
||||
{
|
||||
HT_RUN_TYPE_VALVE_INDEX,
|
||||
HT_RUN_TYPE_NORTH_STAR,
|
||||
};
|
||||
// YES this is stupid. PLEASE bikeshed me on this when the time comes, this is terrible.
|
||||
|
||||
// With Valve Index, we use the frameserver prober and look for the Valve Index camera, and we give the joint poses out
|
||||
// in the space of the left (unrectified) camera.
|
||||
|
||||
// With North Star, (really just Moses's headset :)) we hard-code to opening up a depthai_fs_stereo_rgb and give the
|
||||
// joint poses out in the space of the "center" of the stereo camera. (Why? Because I don't have exact extrinsics from
|
||||
// the NS "eyes" to the cameras. Less code this way.)
|
||||
|
||||
/*!
|
||||
* @defgroup drv_ht Camera based hand tracking
|
||||
|
|
|
@ -501,6 +501,7 @@ runKeypointEstimator(struct ht_view *htv, cv::Mat img)
|
|||
return dumb;
|
||||
}
|
||||
|
||||
#undef HEAVY_SCRIBBLE
|
||||
|
||||
|
||||
static std::vector<Palm7KP>
|
||||
|
@ -524,14 +525,13 @@ runHandDetector(struct ht_view *htv, cv::Mat &raw_input)
|
|||
|
||||
std::vector<float> real_thing(size);
|
||||
|
||||
if (htv->htd->runtime_config.palm_detection_use_mediapipe) {
|
||||
if (htv->htd->startup_config.palm_detection_use_mediapipe) {
|
||||
std::vector<uint8_t> combined_planes(size);
|
||||
planarize(img, combined_planes.data());
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
float val = (float)combined_planes[i];
|
||||
real_thing[i] = (val - mean) / std;
|
||||
}
|
||||
// Hope it was worth it...
|
||||
} else {
|
||||
|
||||
assert(img.isContinuous());
|
||||
|
@ -592,7 +592,8 @@ runHandDetector(struct ht_view *htv, cv::Mat &raw_input)
|
|||
float score0 = classificators[i];
|
||||
float score = 1.0 / (1.0 + exp(-score0));
|
||||
|
||||
if (score > 0.6) {
|
||||
// Let a lot of detections in - they'll be slowly rejected later
|
||||
if (score > htv->htd->dynamic_config.nms_threshold.val) {
|
||||
// Boundary box.
|
||||
NMSPalm det;
|
||||
|
||||
|
@ -635,6 +636,24 @@ runHandDetector(struct ht_view *htv, cv::Mat &raw_input)
|
|||
detections.push_back(det);
|
||||
count++;
|
||||
|
||||
if (htv->htd->debug_scribble && (htv->htd->dynamic_config.scribble_raw_detections)) {
|
||||
xrt_vec2 center = transformVecBy2x3(xrt_vec2{cx, cy}, back_from_blackbar);
|
||||
|
||||
float sz = det.bbox.w * scale_factor;
|
||||
|
||||
cv::rectangle(
|
||||
htv->debug_out_to_this,
|
||||
{(int)(center.x - (sz / 2)), (int)(center.y - (sz / 2)), (int)sz, (int)sz},
|
||||
hsv2rgb(0.0f, math_map_ranges(det.confidence, 0.0f, 1.0f, 1.5f, -0.1f),
|
||||
math_map_ranges(det.confidence, 0.0f, 1.0f, 0.2f, 1.4f)),
|
||||
1);
|
||||
|
||||
for (int i = 0; i < 7; i++) {
|
||||
handDot(htv->debug_out_to_this, transformVecBy2x3(kps[i], back_from_blackbar),
|
||||
det.confidence * 7, ((float)i) * (360.0f / 7.0f), det.confidence, 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
int square = fmax(w, h);
|
||||
|
@ -647,20 +666,28 @@ runHandDetector(struct ht_view *htv, cv::Mat &raw_input)
|
|||
goto cleanup;
|
||||
}
|
||||
|
||||
nms_palms = filterBoxesWeightedAvg(detections);
|
||||
nms_palms = filterBoxesWeightedAvg(detections, htv->htd->dynamic_config.nms_iou.val);
|
||||
|
||||
|
||||
|
||||
for (NMSPalm cooler : nms_palms) {
|
||||
|
||||
// Display box
|
||||
|
||||
struct xrt_vec2 tl = {cooler.bbox.cx - cooler.bbox.w / 2, cooler.bbox.cy - cooler.bbox.h / 2};
|
||||
struct xrt_vec2 bob = transformVecBy2x3(tl, back_from_blackbar);
|
||||
float sz = cooler.bbox.w / scale_factor;
|
||||
float sz = cooler.bbox.w * scale_factor;
|
||||
|
||||
if (htv->htd->debug_scribble) {
|
||||
cv::rectangle(htv->debug_out_to_this, {(int)bob.x, (int)bob.y, (int)sz, (int)sz}, {0, 0, 255},
|
||||
5);
|
||||
if (htv->htd->debug_scribble && htv->htd->dynamic_config.scribble_nms_detections) {
|
||||
cv::rectangle(htv->debug_out_to_this, {(int)bob.x, (int)bob.y, (int)sz, (int)sz},
|
||||
hsv2rgb(180.0f, math_map_ranges(cooler.confidence, 0.0f, 1.0f, 0.8f, -0.1f),
|
||||
math_map_ranges(cooler.confidence, 0.0f, 1.0f, 0.2f, 1.4f)),
|
||||
2);
|
||||
for (int i = 0; i < 7; i++) {
|
||||
handDot(htv->debug_out_to_this,
|
||||
transformVecBy2x3(cooler.keypoints[i], back_from_blackbar),
|
||||
cooler.confidence * 14, ((float)i) * (360.0f / 7.0f), cooler.confidence, 3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -669,11 +696,8 @@ runHandDetector(struct ht_view *htv, cv::Mat &raw_input)
|
|||
for (int i = 0; i < 7; i++) {
|
||||
struct xrt_vec2 b = cooler.keypoints[i];
|
||||
this_element.kps[i] = transformVecBy2x3(b, back_from_blackbar);
|
||||
if (htv->htd->debug_scribble) {
|
||||
handDot(htv->debug_out_to_this, this_element.kps[i], 5, ((float)i) * (360.0f / 7.0f),
|
||||
2);
|
||||
}
|
||||
}
|
||||
this_element.confidence = cooler.confidence;
|
||||
|
||||
output.push_back(this_element);
|
||||
}
|
||||
|
@ -689,7 +713,7 @@ cleanup:
|
|||
static void
|
||||
addSlug(struct ht_device *htd, const char *suffix, char *out)
|
||||
{
|
||||
strcpy(out, htd->runtime_config.model_slug);
|
||||
strcpy(out, htd->startup_config.model_slug);
|
||||
strcat(out, suffix);
|
||||
}
|
||||
|
||||
|
@ -706,7 +730,7 @@ initKeypointEstimator(struct ht_device *htd, ht_view *htv)
|
|||
ORT_CHECK(g_ort, g_ort->SetIntraOpNumThreads(opts, 1));
|
||||
|
||||
char modelLocation[1024];
|
||||
if (htd->runtime_config.keypoint_estimation_use_mediapipe) {
|
||||
if (htd->startup_config.keypoint_estimation_use_mediapipe) {
|
||||
addSlug(htd, "hand_landmark_MEDIAPIPE.onnx", modelLocation);
|
||||
} else {
|
||||
addSlug(htd, "hand_landmark_COLLABORA.onnx", modelLocation);
|
||||
|
@ -750,7 +774,7 @@ initHandDetector(struct ht_device *htd, ht_view *htv)
|
|||
// Hard-coded. Even though you can use the ONNX runtime's API to dynamically figure these out, that doesn't make
|
||||
// any sense because these don't change between runs, and if you are swapping models you have to do much more
|
||||
// than just change the input/output names.
|
||||
if (htd->runtime_config.palm_detection_use_mediapipe) {
|
||||
if (htd->startup_config.palm_detection_use_mediapipe) {
|
||||
addSlug(htd, "palm_detection_MEDIAPIPE.onnx", modelLocation);
|
||||
model_hd->input_shape.push_back(1);
|
||||
model_hd->input_shape.push_back(3);
|
||||
|
|
|
@ -75,7 +75,7 @@ boxIOU(const Box &a, const Box &b)
|
|||
static NMSPalm
|
||||
weightedAvgBoxes(std::vector<NMSPalm> &detections)
|
||||
{
|
||||
float weight = 0.0f;
|
||||
float weight = 0.0f; // or, sum_confidences.
|
||||
float cx = 0.0f;
|
||||
float cy = 0.0f;
|
||||
float size = 0.0f;
|
||||
|
@ -100,6 +100,26 @@ weightedAvgBoxes(std::vector<NMSPalm> &detections)
|
|||
out.keypoints[i].x /= weight;
|
||||
out.keypoints[i].y /= weight;
|
||||
}
|
||||
|
||||
|
||||
float bare_confidence = weight / detections.size();
|
||||
|
||||
// desmos \frac{1}{1+e^{-.5x}}-.5
|
||||
|
||||
float steep = 0.2;
|
||||
float cent = 0.5;
|
||||
|
||||
float exp = detections.size();
|
||||
|
||||
float sigmoid_addendum = (1.0f / (1.0f + pow(M_E, (-steep * exp)))) - cent;
|
||||
|
||||
float diff_bare_to_one = 1.0f - bare_confidence;
|
||||
|
||||
out.confidence = bare_confidence + (sigmoid_addendum * diff_bare_to_one);
|
||||
|
||||
// U_LOG_E("Bare %f num %f sig %f diff %f out %f", bare_confidence, exp, sigmoid_addendum, diff_bare_to_one,
|
||||
// out.confidence);
|
||||
|
||||
out.bbox.cx = cx;
|
||||
out.bbox.cy = cy;
|
||||
out.bbox.w = size;
|
||||
|
@ -108,7 +128,7 @@ weightedAvgBoxes(std::vector<NMSPalm> &detections)
|
|||
}
|
||||
|
||||
static std::vector<NMSPalm>
|
||||
filterBoxesWeightedAvg(std::vector<NMSPalm> &detections)
|
||||
filterBoxesWeightedAvg(std::vector<NMSPalm> &detections, float min_iou = 0.1f)
|
||||
{
|
||||
std::vector<std::vector<NMSPalm>> overlaps;
|
||||
std::vector<NMSPalm> outs;
|
||||
|
@ -123,7 +143,7 @@ filterBoxesWeightedAvg(std::vector<NMSPalm> &detections)
|
|||
// U_LOG_D("IOU is %f\n", iou);
|
||||
// U_LOG_D("Outs box is %f %f %f %f", outs[i].bbox.cx, outs[i].bbox.cy, outs[i].bbox.w,
|
||||
// outs[i].bbox.h)
|
||||
if (iou > 0.1f) {
|
||||
if (iou > min_iou) {
|
||||
// This one intersects with the whole thing
|
||||
overlaps[i].push_back(detection);
|
||||
outs[i] = weightedAvgBoxes(overlaps[i]);
|
||||
|
|
|
@ -49,7 +49,8 @@ naive_sort_permutation_by_error(
|
|||
std::vector<size_t> &out_indices_2,
|
||||
std::vector<float> &out_errs,
|
||||
|
||||
float (*calc_error)(Tp_1 *one, Tp_2 *two))
|
||||
float (*calc_error)(Tp_1 *one, Tp_2 *two),
|
||||
float max_err = std::numeric_limits<float>::max())
|
||||
{
|
||||
used_1 = std::vector<bool>(in_1.size()); // silly? Unsure.
|
||||
used_2 = std::vector<bool>(in_2.size());
|
||||
|
@ -76,7 +77,7 @@ naive_sort_permutation_by_error(
|
|||
|
||||
for (size_t i = 0; i < associations.size(); i++) {
|
||||
psort_atom_t chonk = associations[i];
|
||||
if (used_1[chonk.idx_1] || used_2[chonk.idx_2]) {
|
||||
if (used_1[chonk.idx_1] || used_2[chonk.idx_2] || (chonk.err > max_err)) {
|
||||
continue;
|
||||
}
|
||||
used_1[chonk.idx_1] = true;
|
||||
|
@ -87,4 +88,4 @@ naive_sort_permutation_by_error(
|
|||
|
||||
out_errs.push_back(chonk.err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue