h/mercury: Remove really old keypoint estimator and NCNN stuff

This commit is contained in:
Moses Turner 2022-11-02 17:28:37 -05:00 committed by Moses Turner
parent ede2292690
commit 32da562bc5
3 changed files with 6 additions and 477 deletions

View file

@ -400,166 +400,6 @@ init_keypoint_estimation(HandTracking *hgt, onnx_wrap *wrap)
std::filesystem::path path = hgt->models_folder;
path /= "grayscale_keypoint_simdr.onnx";
wrap->input_name = "inputImg";
wrap->input_shape[0] = 1;
wrap->input_shape[1] = 1;
wrap->input_shape[2] = 128;
wrap->input_shape[3] = 128;
wrap->api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
OrtSessionOptions *opts = nullptr;
ORT(CreateSessionOptions(&opts));
ORT(SetSessionGraphOptimizationLevel(opts, ORT_ENABLE_ALL));
ORT(SetIntraOpNumThreads(opts, 1));
ORT(CreateEnv(ORT_LOGGING_LEVEL_FATAL, "monado_ht", &wrap->env));
ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &wrap->meminfo));
ORT(CreateSession(wrap->env, path.c_str(), opts, &wrap->session));
assert(wrap->session != NULL);
size_t input_size = wrap->input_shape[0] * wrap->input_shape[1] * wrap->input_shape[2] * wrap->input_shape[3];
wrap->data = (float *)malloc(input_size * sizeof(float));
ORT(CreateTensorWithDataAsOrtValue(wrap->meminfo, //
wrap->data, //
input_size * sizeof(float), //
wrap->input_shape, //
4, //
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, //
&wrap->tensor));
assert(wrap->tensor);
int is_tensor;
ORT(IsTensor(wrap->tensor, &is_tensor));
assert(is_tensor);
wrap->api->ReleaseSessionOptions(opts);
}
void
run_keypoint_estimation(void *ptr)
{
// data has already been filled with what we're meant to detect on
XRT_TRACE_MARKER();
keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr;
onnx_wrap *wrap = &info->view->keypoint[info->hand_idx];
struct HandTracking *hgt = info->view->hgt;
cv::Mat &debug = info->view->debug_out_to_this;
hand_bounding_box *output = &info->view->bboxes_this_frame[info->hand_idx];
cv::Point2f src_tri[3];
cv::Point2f dst_tri[3];
// top-left
cv::Point2f center = {output->center.x, output->center.y};
cv::Point2f go_right = {output->size_px / 2, 0};
cv::Point2f go_down = {0, output->size_px / 2};
if (info->hand_idx == 1) {
go_right *= -1;
}
// top left
src_tri[0] = {center - go_down - go_right};
// bottom left
src_tri[1] = {center + go_down - go_right};
// top right
src_tri[2] = {center - go_down + go_right};
dst_tri[0] = {0, 0};
dst_tri[1] = {0, 128};
dst_tri[2] = {128, 0};
cv::Matx23f go_there = getAffineTransform(src_tri, dst_tri);
cv::Matx23f go_back = getAffineTransform(dst_tri, src_tri);
cv::Mat data_128x128_uint8;
cv::warpAffine(info->view->run_model_on_this, data_128x128_uint8, go_there, cv::Size(128, 128),
cv::INTER_LINEAR);
cv::Mat data_128x128_float(cv::Size(128, 128), CV_32FC1, wrap->data, 128 * sizeof(float));
normalizeGrayscaleImage(data_128x128_uint8, data_128x128_float);
const char *output_names[2] = {"x_axis_hmap", "y_axis_hmap"};
OrtValue *output_tensor[2] = {nullptr, nullptr};
ORT(Run(wrap->session, nullptr, &wrap->input_name, &wrap->tensor, 1, output_names, 2, output_tensor));
// To here
float *out_data_x = nullptr;
float *out_data_y = nullptr;
ORT(GetTensorMutableData(output_tensor[0], (void **)&out_data_x));
ORT(GetTensorMutableData(output_tensor[1], (void **)&out_data_y));
Hand2D &px_coord = info->view->keypoint_outputs[info->hand_idx].hand_px_coord;
Hand2D &tan_space = info->view->keypoint_outputs[info->hand_idx].hand_tan_space;
xrt_vec2 *keypoints_global = px_coord.kps;
cv::Mat x(cv::Size(128, 21), CV_32FC1, out_data_x);
cv::Mat y(cv::Size(128, 21), CV_32FC1, out_data_y);
for (int i = 0; i < 21; i++) {
int loc_x = argmax(&out_data_x[i * 128], 128);
int loc_y = argmax(&out_data_y[i * 128], 128);
xrt_vec2 loc;
loc.x = loc_x;
loc.y = loc_y;
loc = transformVecBy2x3(loc, go_back);
px_coord.kps[i] = loc;
tan_space.kps[i] = raycoord(info->view, loc);
}
if (hgt->debug_scribble && hgt->tuneable_values.scribble_keypoint_model_outputs) {
for (int finger = 0; finger < 5; finger++) {
cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y};
for (int joint = 0; joint < 4; joint++) {
cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x,
(int)keypoints_global[1 + finger * 4 + joint].y};
cv::line(debug, last, the_new, colors[info->hand_idx]);
last = the_new;
}
}
for (int i = 0; i < 21; i++) {
xrt_vec2 loc = keypoints_global[i];
handDot(debug, loc, 2, (float)(i) / 21.0, 1, 2);
}
}
wrap->api->ReleaseValue(output_tensor[0]);
wrap->api->ReleaseValue(output_tensor[1]);
}
void
init_keypoint_estimation_new(HandTracking *hgt, onnx_wrap *wrap)
{
std::filesystem::path path = hgt->models_folder;
path /= "grayscale_keypoint_new.onnx";
// input_names = {"input_image_grayscale"};
@ -693,7 +533,7 @@ make_keypoint_heatmap_output(int camera_idx, int hand_idx, int grid_pt_x, int gr
void
run_keypoint_estimation_new(void *ptr)
run_keypoint_estimation(void *ptr)
{
XRT_TRACE_MARKER();
keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr;
@ -851,263 +691,4 @@ release_onnx_wrap(onnx_wrap *wrap)
free(wrap->data);
}
#ifdef USE_NCNN
int
ncnn_extractor_input_wrap(ncnn_extractor_t ex, const char *name, const ncnn_mat_t mat)
{
XRT_TRACE_MARKER();
return ncnn_extractor_input(ex, name, mat);
}
int
ncnn_extractor_extract_wrap(ncnn_extractor_t ex, const char *name, ncnn_mat_t *mat)
{
XRT_TRACE_MARKER();
return ncnn_extractor_extract(ex, name, mat);
}
ncnn_mat_t
ncnn_mat_from_pixels_resize_wrap(const unsigned char *pixels,
int type,
int w,
int h,
int stride,
int target_width,
int target_height,
ncnn_allocator_t allocator)
{
XRT_TRACE_MARKER();
return ncnn_mat_from_pixels_resize(pixels, type, w, h, stride, target_width, target_height, allocator);
}
void
ncnn_mat_substract_mean_normalize_wrap(ncnn_mat_t mat, const float *mean_vals, const float *norm_vals)
{
XRT_TRACE_MARKER();
return ncnn_mat_substract_mean_normalize(mat, mean_vals, norm_vals);
}
void
run_hand_detection_ncnn(void *ptr)
{
ht_view *view = (ht_view *)ptr;
HandTracking *hgt = view->hgt;
onnx_wrap *wrap = &view->detection;
cv::Mat &data_400x640 = view->run_model_on_this;
ncnn_mat_t in = ncnn_mat_from_pixels_resize(view->run_model_on_this.data, NCNN_MAT_PIXEL_GRAY, 640, 400,
view->run_model_on_this.step[0], 320, 240, NULL);
const float norm_vals[3] = {1 / 255.0, 1 / 255.0, 1 / 255.0};
ncnn_mat_substract_mean_normalize_wrap(in, 0, norm_vals);
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_extractor_t ex = ncnn_extractor_create(hgt->net);
ncnn_extractor_set_option(ex, opt);
ncnn_extractor_input_wrap(ex, "input_image_grayscale", in);
ncnn_mat_t out;
ncnn_extractor_extract_wrap(ex, "hand_locations_radii", &out);
#if 0
{
int out_dims = ncnn_mat_get_dims(out);
const int out_w = ncnn_mat_get_w(out);
const int out_h = ncnn_mat_get_h(out);
const int out_d = ncnn_mat_get_d(out);
const int out_c = ncnn_mat_get_c(out);
U_LOG_E("out: %d: %d %d %d %d", out_dims, out_w, out_h, out_d, out_c);
}
{
int out_dims = ncnn_mat_get_dims(in);
const int out_w = ncnn_mat_get_w(in);
const int out_h = ncnn_mat_get_h(in);
const int out_d = ncnn_mat_get_d(in);
const int out_c = ncnn_mat_get_c(in);
U_LOG_E("in: %d: %d %d %d %d", out_dims, out_w, out_h, out_d, out_c);
}
#endif
const float *out_data = (const float *)ncnn_mat_get_data(out);
size_t plane_size = 80 * 60;
cv::Scalar colors[2] = {{255, 255, 0}, {255, 0, 0}};
for (int hand_idx = 0; hand_idx < 2; hand_idx++) {
const float *this_side_data = out_data + hand_idx * plane_size * 2;
int max_idx;
det_output *output = &view->det_outputs[hand_idx];
output->found = argmax(this_side_data, 4800, &max_idx) && this_side_data[max_idx] > 0.3;
if (output->found) {
int row = max_idx / 80;
int col = max_idx % 80;
output->size_px = average_size(this_side_data + plane_size, this_side_data, col, row, 80, 60);
// output->size_px *= 640 * 1.6;
// output->size_px *= 640 * 1.8;
output->size_px *= 640 * 2.0;
float size = output->size_px;
float refined_x, refined_y;
refine_center_of_distribution(this_side_data, col, row, 80, 60, &refined_x, &refined_y);
output->center.x = refined_x * 8.0;
output->center.y = refined_y * 6.6666666666666666667;
// cv::Point2i pt(refined_x * 16.0 * .5, refined_y * 13.333333333333334 * .5);
cv::Point2i pt(output->center.x, output->center.y);
cv::Mat &debug_frame = view->debug_out_to_this;
cv::rectangle(debug_frame, cv::Rect(pt - cv::Point2i(size / 2, size / 2), cv::Size(size, size)),
colors[hand_idx], 1);
}
}
ncnn_extractor_destroy(ex);
ncnn_mat_destroy(in);
ncnn_mat_destroy(out);
}
void
run_keypoint_estimation_new_ncnn(void *ptr)
{
XRT_TRACE_MARKER();
keypoint_estimation_run_info *info = (keypoint_estimation_run_info *)ptr;
struct HandTracking *hgt = info->view->hgt;
// Factor out starting here
cv::Mat &debug = info->view->debug_out_to_this;
det_output *output = &info->view->det_outputs[info->hand_idx];
cv::Point2f src_tri[3];
cv::Point2f dst_tri[3];
// top-left
cv::Point2f center = {output->center.x, output->center.y};
cv::Point2f go_right = {output->size_px / 2, 0};
cv::Point2f go_down = {0, output->size_px / 2};
if (info->hand_idx == 1) {
go_right *= -1;
}
// top left
src_tri[0] = {center - go_down - go_right};
// bottom left
src_tri[1] = {center + go_down - go_right};
// top right
src_tri[2] = {center - go_down + go_right};
dst_tri[0] = {0, 0};
dst_tri[1] = {0, 128};
dst_tri[2] = {128, 0};
cv::Matx23f go_there = getAffineTransform(src_tri, dst_tri);
cv::Matx23f go_back = getAffineTransform(dst_tri, src_tri);
XRT_TRACE_IDENT(transforms);
cv::Mat data_128x128_uint8;
cv::warpAffine(info->view->run_model_on_this, data_128x128_uint8, go_there, cv::Size(128, 128),
cv::INTER_LINEAR);
// cv::Mat data_128x128_float(cv::Size(128, 128), CV_32FC1);
// normalizeGrayscaleImage(data_128x128_uint8, data_128x128_float);
ncnn_mat_t in = ncnn_mat_from_pixels(data_128x128_uint8.data, NCNN_MAT_PIXEL_GRAY, 128, 128,
data_128x128_uint8.step[0], NULL);
const float norm_vals[3] = {1 / 255.0, 1 / 255.0, 1 / 255.0};
ncnn_mat_substract_mean_normalize_wrap(in, 0, norm_vals);
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_extractor_t ex = ncnn_extractor_create(hgt->net_keypoint);
ncnn_extractor_set_option(ex, opt);
ncnn_extractor_input_wrap(ex, "inputImg", in);
ncnn_mat_t out;
ncnn_extractor_extract_wrap(ex, "heatmap", &out);
// Ending here
const float *out_data = (const float *)ncnn_mat_get_data(out);
Hand2D &px_coord = info->view->keypoint_outputs[info->hand_idx].hand_px_coord;
Hand2D &tan_space = info->view->keypoint_outputs[info->hand_idx].hand_tan_space;
xrt_vec2 *keypoints_global = px_coord.kps;
size_t plane_size = 22 * 22;
for (int i = 0; i < 21; i++) {
const float *data = &out_data[i * plane_size];
int out_idx = 0;
argmax(data, 22 * 22, &out_idx);
int row = out_idx / 22;
int col = out_idx % 22;
xrt_vec2 loc;
refine_center_of_distribution(data, col, row, 22, 22, &loc.x, &loc.y);
loc.x *= 128.0f / 22.0f;
loc.y *= 128.0f / 22.0f;
loc = transformVecBy2x3(loc, go_back);
px_coord.kps[i] = loc;
tan_space.kps[i] = raycoord(info->view, loc);
}
if (hgt->debug_scribble) {
for (int finger = 0; finger < 5; finger++) {
cv::Point last = {(int)keypoints_global[0].x, (int)keypoints_global[0].y};
for (int joint = 0; joint < 4; joint++) {
cv::Point the_new = {(int)keypoints_global[1 + finger * 4 + joint].x,
(int)keypoints_global[1 + finger * 4 + joint].y};
cv::line(debug, last, the_new, colors[info->hand_idx]);
last = the_new;
}
}
for (int i = 0; i < 21; i++) {
xrt_vec2 loc = keypoints_global[i];
handDot(debug, loc, 2, (float)(i) / 21.0, 1, 2);
}
}
ncnn_extractor_destroy(ex);
ncnn_mat_destroy(in);
ncnn_mat_destroy(out);
}
#endif
} // namespace xrt::tracking::hand::mercury

View file

@ -24,7 +24,6 @@ namespace xrt::tracking::hand::mercury {
#define DEG_TO_RAD(DEG) (DEG * M_PI / 180.)
DEBUG_GET_ONCE_LOG_OPTION(mercury_log, "MERCURY_LOG", U_LOGGING_WARN)
DEBUG_GET_ONCE_BOOL_OPTION(mercury_use_simdr_keypoint, "MERCURY_USE_SIMDR_KEYPOINT", false)
// Flags to tell state tracker that these are indeed valid joints
static const enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_relation_flags)(
@ -962,7 +961,6 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
// Setup logging first. We like logging.
hgt->log_level = xrt::tracking::hand::mercury::debug_get_log_option_mercury_log();
bool use_simdr = xrt::tracking::hand::mercury::debug_get_bool_option_mercury_use_simdr_keypoint();
/*
* Get configuration
@ -975,32 +973,6 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
getCalibration(hgt, calib);
getModelsFolder(hgt);
#ifdef USE_NCNN
{
hgt->net = ncnn_net_create();
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_net_set_option(hgt->net, opt);
ncnn_net_load_param(hgt->net, "/3/clones/ncnn/batch_size_2.param");
ncnn_net_load_model(hgt->net, "/3/clones/ncnn/batch_size_2.bin");
}
{
hgt->net_keypoint = ncnn_net_create();
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_net_set_option(hgt->net_keypoint, opt);
ncnn_net_load_param(
hgt->net_keypoint,
"/home/moses/.local/share/monado/hand-tracking-models/grayscale_keypoint_new.param");
ncnn_net_load_model(hgt->net_keypoint,
"/home/moses/.local/share/monado/hand-tracking-models/grayscale_keypoint_new.bin");
}
#endif
hgt->views[0].hgt = hgt;
hgt->views[1].hgt = hgt; // :)
@ -1011,21 +983,12 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
init_hand_detection(hgt, &hgt->views[0].detection);
init_hand_detection(hgt, &hgt->views[1].detection);
if (use_simdr) {
init_keypoint_estimation(hgt, &hgt->views[0].keypoint[0]);
init_keypoint_estimation(hgt, &hgt->views[0].keypoint[1]);
init_keypoint_estimation(hgt, &hgt->views[1].keypoint[0]);
init_keypoint_estimation(hgt, &hgt->views[1].keypoint[1]);
hgt->keypoint_estimation_run_func = xrt::tracking::hand::mercury::run_keypoint_estimation;
} else {
init_keypoint_estimation_new(hgt, &hgt->views[0].keypoint[0]);
init_keypoint_estimation_new(hgt, &hgt->views[0].keypoint[1]);
init_keypoint_estimation_new(hgt, &hgt->views[1].keypoint[0]);
init_keypoint_estimation_new(hgt, &hgt->views[1].keypoint[1]);
hgt->keypoint_estimation_run_func = xrt::tracking::hand::mercury::run_keypoint_estimation_new;
}
hgt->views[0].view = 0;
hgt->views[1].view = 1;

View file

@ -67,8 +67,6 @@ static const cv::Scalar PINK(255, 0, 255);
static const cv::Scalar colors[2] = {YELLOW, RED};
#undef USE_NCNN
// Forward declaration for ht_view
struct HandTracking;
struct ht_view;
@ -208,11 +206,6 @@ public:
// So that we can calibrate cameras at 1280x800 but ship images over USB at 640x400
struct xrt_size last_frame_one_view_size_px = {};
#ifdef USE_NCNN
ncnn_net_t net;
ncnn_net_t net_keypoint;
#endif
struct ht_view views[2] = {};
struct model_output_visualizers visualizers;
@ -307,17 +300,9 @@ run_hand_detection(void *ptr);
void
init_keypoint_estimation(HandTracking *hgt, onnx_wrap *wrap);
void
run_keypoint_estimation(void *ptr);
void
init_keypoint_estimation_new(HandTracking *hgt, onnx_wrap *wrap);
void
run_keypoint_estimation_new(void *ptr);
void
release_onnx_wrap(onnx_wrap *wrap);