d/ht: split ht_image_math into source and header

This commit is contained in:
Simon Zeni 2021-11-04 19:29:17 -04:00 committed by Moses Turner
parent 44b46a28bf
commit 83630efc3d
4 changed files with 259 additions and 222 deletions

View file

@ -217,7 +217,7 @@ if(XRT_BUILD_DRIVER_HANDTRACKING)
ht/ht_interface.h
ht/ht_models.hpp
ht/ht_hand_math.cpp
ht/ht_image_math.hpp
ht/ht_image_math.cpp
ht/ht_nms.hpp
ht/templates/NaivePermutationSort.hpp)
target_link_libraries(drv_ht PRIVATE xrt-interfaces aux_os aux_util aux_math aux_gstreamer ONNXRuntime::ONNXRuntime ${OpenCV_LIBRARIES})

View file

@ -0,0 +1,241 @@
// Copyright 2021, Collabora, Ltd.
// SPDX-License-Identifier: BSL-1.0
/*!
* @file
* @brief Helper math to do things with images for the camera-based hand tracker
* @author Moses Turner <moses@collabora.com>
* @ingroup drv_ht
*/
#include "math/m_vec2.h"
#include "math/m_vec3.h"
#include "ht_image_math.hpp"
#include <opencv2/imgproc.hpp>
#include <opencv2/core/mat.hpp>
#include <opencv2/core/types.hpp>
cv::Scalar
hsv2rgb(float fH, float fS, float fV)
{
const float fC = fV * fS; // Chroma
const float fHPrime = fmod(fH / 60.0, 6);
const float fX = fC * (1 - fabs(fmod(fHPrime, 2) - 1));
const float fM = fV - fC;
float fR, fG, fB;
if (0 <= fHPrime && fHPrime < 1) {
fR = fC;
fG = fX;
fB = 0;
} else if (1 <= fHPrime && fHPrime < 2) {
fR = fX;
fG = fC;
fB = 0;
} else if (2 <= fHPrime && fHPrime < 3) {
fR = 0;
fG = fC;
fB = fX;
} else if (3 <= fHPrime && fHPrime < 4) {
fR = 0;
fG = fX;
fB = fC;
} else if (4 <= fHPrime && fHPrime < 5) {
fR = fX;
fG = 0;
fB = fC;
} else if (5 <= fHPrime && fHPrime < 6) {
fR = fC;
fG = 0;
fB = fX;
} else {
fR = 0;
fG = 0;
fB = 0;
}
fR += fM;
fG += fM;
fB += fM;
return {fR * 255.0f, fG * 255.0f, fB * 255.0f};
}
struct xrt_vec3
raycoord(struct ht_view *htv, struct xrt_vec3 model_out)
{
cv::Mat in_px_coords(1, 1, CV_32FC2);
float *write_in;
write_in = in_px_coords.ptr<float>(0);
write_in[0] = model_out.x;
write_in[1] = model_out.y;
cv::Mat out_ray(1, 1, CV_32FC2);
cv::fisheye::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
float n_x = out_ray.at<float>(0, 0);
float n_y = out_ray.at<float>(0, 1);
struct xrt_vec3 n = {n_x, n_y, 1.0f};
cv::Matx33f R = htv->rotate_camera_to_stereo_camera;
struct xrt_vec3 o = {
(n.x * R(0, 0)) + (n.y * R(0, 1)) + (n.z * R(0, 2)),
(n.x * R(1, 0)) + (n.y * R(1, 1)) + (n.z * R(1, 2)),
(n.x * R(2, 0)) + (n.y * R(2, 1)) + (n.z * R(2, 2)),
};
math_vec3_scalar_mul(1.0f / o.z, &o);
return o;
}
cv::Matx23f
blackbar(cv::Mat &in, cv::Mat &out, xrt_size out_size)
{
#if 1
// Easy to think about, always right, but pretty slow:
// Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back.
// Then just warpAffine() it.
// Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize
// later.
// Do the black bars need to be on top and bottom, or on left and right?
float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1
float scale_down_h = (float)out_size.h / (float)in.rows; // 128/800 = 0.16
float scale_down = fmin(scale_down_w, scale_down_h); // 0.1
float width_inside = (float)in.cols * scale_down;
float height_inside = (float)in.rows * scale_down;
float translate_x = (out_size.w - width_inside) / 2; // should be 0 for 1280x800
float translate_y = (out_size.h - height_inside) / 2; // should be (1280-800)/2 = 240
cv::Matx23f go;
// clang-format off
go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x;
go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y;
// clang-format on
cv::warpAffine(in, out, go, cv::Size(out_size.w, out_size.h));
cv::Matx23f ret;
// clang-format off
ret(0,0) = 1.0f/scale_down; ret(0,1) = 0.0f; ret(0,2) = -translate_x/scale_down;
ret(1,0) = 0.0f; ret(1,1) = 1.0f/scale_down; ret(1,2) = -translate_y/scale_down;
// clang-format on
return ret;
#else
// Fast, always wrong if the input isn't square. You'd end up using something like this, plus some
// copyMakeBorder if you want to optimize.
if (aspect_ratio_input == aspect_ratio_output) {
cv::resize(in, out, {out_size.w, out_size.h});
cv::Matx23f ret;
float scale_from_out_to_in = (float)in.cols / (float)out_size.w;
// clang-format off
ret(0,0) = scale_from_out_to_in; ret(0,1) = 0.0f; ret(0,2) = 0.0f;
ret(1,0) = 0.0f; ret(1,1) = scale_from_out_to_in; ret(1,2) = 0.0f;
// clang-format on
cv::imshow("hi", out);
cv::waitKey(1);
return ret;
}
assert(!"Uh oh! Unimplemented!");
return {};
#endif
}
void
handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type)
{
cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type);
}
void
centerAndRotationFromJoints(struct ht_view *htv,
const xrt_vec2 *wrist,
const xrt_vec2 *index,
const xrt_vec2 *middle,
const xrt_vec2 *little,
xrt_vec2 *out_center,
xrt_vec2 *out_wrist_to_middle)
{
// Close to what Mediapipe does, but slightly different - just uses the middle proximal instead of "estimating"
// it from the pinky and index.
// at the end of the day I should probably do that basis vector filtering thing to get a nicer middle metacarpal
// from 6 keypoints (not thumb proximal) OR SHOULD I. because distortion. hmm
// Feel free to look at the way MP does it, you can see it's different.
// https://github.com/google/mediapipe/blob/master/mediapipe/modules/holistic_landmark/calculators/hand_detections_from_pose_to_rects_calculator.cc
// struct xrt_vec2 hand_center = m_vec2_mul_scalar(middle, 0.5) + m_vec2_mul_scalar(index, 0.5*(2.0f/3.0f)) +
// m_vec2_mul_scalar(little, 0.5f*((1.0f/3.0f))); // Middle proximal, straight-up.
// U_LOG_E("%f %f %f %f %f %f %f %f ", wrist.x, wrist.y, index.x, index.y, middle.x, middle.y, little.x,
// little.y);
*out_center = m_vec2_lerp(*middle, m_vec2_lerp(*index, *little, 1.0f / 3.0f), 0.25f);
*out_wrist_to_middle = *out_center - *wrist;
}
struct DetectionModelOutput
rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 center, xrt_vec2 wrist_to_middle, DetectionModelOutput *out)
{
float box_size = m_vec2_len(wrist_to_middle) * 2.0f * 1.73f;
double rot = atan2(wrist_to_middle.x, wrist_to_middle.y) * (-180.0f / M_PI);
out->rotation = rot;
out->size = box_size;
out->center = center;
cv::RotatedRect rrect =
cv::RotatedRect(cv::Point2f(out->center.x, out->center.y), cv::Size2f(out->size, out->size), out->rotation);
cv::Point2f vertices[4];
rrect.points(vertices);
if (htv->htd->debug_scribble && htv->htd->dynamic_config.scribble_bounding_box) {
for (int i = 0; i < 4; i++) {
cv::Scalar b = cv::Scalar(10, 30, 30);
if (i == 3) {
b = cv::Scalar(255, 255, 0);
}
cv::line(htv->debug_out_to_this, vertices[i], vertices[(i + 1) % 4], b, 2);
}
}
// topright is 0. bottomright is 1. bottomleft is 2. topleft is 3.
cv::Point2f src_tri[3] = {vertices[3], vertices[2], vertices[1]}; // top-left, bottom-left, bottom-right
cv::Point2f dest_tri[3] = {cv::Point2f(0, 0), cv::Point2f(0, 224), cv::Point2f(224, 224)};
out->warp_there = getAffineTransform(src_tri, dest_tri);
out->warp_back = getAffineTransform(dest_tri, src_tri);
// out->wrist = wrist;
return *out;
}
void
planarize(cv::Mat &input, uint8_t *output)
{
// output better be the right size, because we are not doing any bounds checking!
assert(input.isContinuous());
int lix = input.cols;
int liy = input.rows;
cv::Mat planes[3];
cv::split(input, planes);
cv::Mat red = planes[0];
cv::Mat green = planes[1];
cv::Mat blue = planes[2];
memcpy(output, red.data, lix * liy);
memcpy(output + (lix * liy), green.data, lix * liy);
memcpy(output + (lix * liy * 2), blue.data, lix * liy);
}

View file

@ -9,164 +9,31 @@
#pragma once
#include "xrt/xrt_defines.h"
#include "math/m_api.h"
#include "math/m_vec2.h"
#include "math/m_vec3.h"
#include "ht_driver.hpp"
#include <opencv2/calib3d.hpp>
#include <opencv2/core/matx.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/core/mat.hpp>
#include <opencv2/core/types.hpp>
struct ht_view;
static cv::Scalar
hsv2rgb(float fH, float fS, float fV)
{
float fC = fV * fS; // Chroma
float fHPrime = fmod(fH / 60.0, 6);
float fX = fC * (1 - fabs(fmod(fHPrime, 2) - 1));
float fM = fV - fC;
float fR, fG, fB;
if (0 <= fHPrime && fHPrime < 1) {
fR = fC;
fG = fX;
fB = 0;
} else if (1 <= fHPrime && fHPrime < 2) {
fR = fX;
fG = fC;
fB = 0;
} else if (2 <= fHPrime && fHPrime < 3) {
fR = 0;
fG = fC;
fB = fX;
} else if (3 <= fHPrime && fHPrime < 4) {
fR = 0;
fG = fX;
fB = fC;
} else if (4 <= fHPrime && fHPrime < 5) {
fR = fX;
fG = 0;
fB = fC;
} else if (5 <= fHPrime && fHPrime < 6) {
fR = fC;
fG = 0;
fB = fX;
} else {
fR = 0;
fG = 0;
fB = 0;
}
fR += fM;
fG += fM;
fB += fM;
return {fR * 255.0f, fG * 255.0f, fB * 255.0f};
}
static xrt_vec3
raycoord(struct ht_view *htv, struct xrt_vec3 model_out)
{
cv::Mat in_px_coords(1, 1, CV_32FC2);
float *write_in;
write_in = in_px_coords.ptr<float>(0);
write_in[0] = model_out.x;
write_in[1] = model_out.y;
cv::Mat out_ray(1, 1, CV_32FC2);
cv::fisheye::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
float n_x = out_ray.at<float>(0, 0);
float n_y = out_ray.at<float>(0, 1);
struct xrt_vec3 n = {n_x, n_y, 1.0f};
cv::Matx33f R = htv->rotate_camera_to_stereo_camera;
struct xrt_vec3 o = {
(n.x * R(0, 0)) + (n.y * R(0, 1)) + (n.z * R(0, 2)),
(n.x * R(1, 0)) + (n.y * R(1, 1)) + (n.z * R(1, 2)),
(n.x * R(2, 0)) + (n.y * R(2, 1)) + (n.z * R(2, 2)),
};
math_vec3_scalar_mul(1.0f / o.z, &o);
return o;
}
cv::Scalar
hsv2rgb(float fH, float fS, float fV);
struct xrt_vec3
raycoord(struct ht_view *htv, struct xrt_vec3 model_out);
/*!
* Returns a 2x3 transform matrix that takes you back from the blackbarred image to the original image.
*/
static cv::Matx23f
blackbar(cv::Mat &in, cv::Mat &out, xrt_size out_size)
{
#if 1
// Easy to think about, always right, but pretty slow:
// Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back.
// Then just warpAffine() it.
// Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize
// later.
// Do the black bars need to be on top and bottom, or on left and right?
float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1
float scale_down_h = (float)out_size.h / (float)in.rows; // 128/800 = 0.16
float scale_down = fmin(scale_down_w, scale_down_h); // 0.1
float width_inside = (float)in.cols * scale_down;
float height_inside = (float)in.rows * scale_down;
float translate_x = (out_size.w - width_inside) / 2; // should be 0 for 1280x800
float translate_y = (out_size.h - height_inside) / 2; // should be (1280-800)/2 = 240
cv::Matx23f go;
// clang-format off
go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x;
go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y;
// clang-format on
cv::warpAffine(in, out, go, cv::Size(out_size.w, out_size.h));
cv::Matx23f ret;
// clang-format off
ret(0,0) = 1.0f/scale_down; ret(0,1) = 0.0f; ret(0,2) = -translate_x/scale_down;
ret(1,0) = 0.0f; ret(1,1) = 1.0f/scale_down; ret(1,2) = -translate_y/scale_down;
// clang-format on
return ret;
#else
// Fast, always wrong if the input isn't square. You'd end up using something like this, plus some
// copyMakeBorder if you want to optimize.
if (aspect_ratio_input == aspect_ratio_output) {
cv::resize(in, out, {out_size.w, out_size.h});
cv::Matx23f ret;
float scale_from_out_to_in = (float)in.cols / (float)out_size.w;
// clang-format off
ret(0,0) = scale_from_out_to_in; ret(0,1) = 0.0f; ret(0,2) = 0.0f;
ret(1,0) = 0.0f; ret(1,1) = scale_from_out_to_in; ret(1,2) = 0.0f;
// clang-format on
cv::imshow("hi", out);
cv::waitKey(1);
return ret;
}
assert(!"Uh oh! Unimplemented!");
return {};
#endif
}
cv::Matx23f
blackbar(cv::Mat &in, cv::Mat &out, xrt_size out_size);
/*!
* This is a template so that we can use xrt_vec3 or xrt_vec2.
* Please don't use this for anything other than xrt_vec3 or xrt_vec2!
*/
template <typename T>
T
transformVecBy2x3(T in, cv::Matx23f warp_back)
@ -178,91 +45,20 @@ transformVecBy2x3(T in, cv::Matx23f warp_back)
}
//! Draw some dots. Factors out some boilerplate.
static void
handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type)
{
cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type);
}
void
handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type);
static void
void
centerAndRotationFromJoints(struct ht_view *htv,
const xrt_vec2 *wrist,
const xrt_vec2 *index,
const xrt_vec2 *middle,
const xrt_vec2 *little,
xrt_vec2 *out_center,
xrt_vec2 *out_wrist_to_middle)
{
// Close to what Mediapipe does, but slightly different - just uses the middle proximal instead of "estimating"
// it from the pinky and index.
// at the end of the day I should probably do that basis vector filtering thing to get a nicer middle metacarpal
// from 6 keypoints (not thumb proximal) OR SHOULD I. because distortion. hmm
xrt_vec2 *out_wrist_to_middle);
// Feel free to look at the way MP does it, you can see it's different.
// https://github.com/google/mediapipe/blob/master/mediapipe/modules/holistic_landmark/calculators/hand_detections_from_pose_to_rects_calculator.cc
struct DetectionModelOutput
rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 center, xrt_vec2 wrist_to_middle, DetectionModelOutput *out);
// struct xrt_vec2 hand_center = m_vec2_mul_scalar(middle, 0.5) + m_vec2_mul_scalar(index, 0.5*(2.0f/3.0f)) +
// m_vec2_mul_scalar(little, 0.5f*((1.0f/3.0f))); // Middle proximal, straight-up.
// U_LOG_E("%f %f %f %f %f %f %f %f ", wrist.x, wrist.y, index.x, index.y, middle.x, middle.y, little.x,
// little.y);
*out_center = m_vec2_lerp(*middle, m_vec2_lerp(*index, *little, 1.0f / 3.0f), 0.25f);
*out_wrist_to_middle = *out_center - *wrist;
}
static DetectionModelOutput
rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 center, xrt_vec2 wrist_to_middle, DetectionModelOutput *out)
{
float box_size = m_vec2_len(wrist_to_middle) * 2.0f * 1.73f;
double rot = atan2(wrist_to_middle.x, wrist_to_middle.y) * (-180.0f / M_PI);
out->rotation = rot;
out->size = box_size;
out->center = center;
cv::RotatedRect rrect =
cv::RotatedRect(cv::Point2f(out->center.x, out->center.y), cv::Size2f(out->size, out->size), out->rotation);
cv::Point2f vertices[4];
rrect.points(vertices);
if (htv->htd->debug_scribble && htv->htd->dynamic_config.scribble_bounding_box) {
for (int i = 0; i < 4; i++) {
cv::Scalar b = cv::Scalar(10, 30, 30);
if (i == 3) {
b = cv::Scalar(255, 255, 0);
}
cv::line(htv->debug_out_to_this, vertices[i], vertices[(i + 1) % 4], b, 2);
}
}
// topright is 0. bottomright is 1. bottomleft is 2. topleft is 3.
cv::Point2f src_tri[3] = {vertices[3], vertices[2], vertices[1]}; // top-left, bottom-left, bottom-right
cv::Point2f dest_tri[3] = {cv::Point2f(0, 0), cv::Point2f(0, 224), cv::Point2f(224, 224)};
out->warp_there = getAffineTransform(src_tri, dest_tri);
out->warp_back = getAffineTransform(dest_tri, src_tri);
// out->wrist = wrist;
return *out;
}
static void
planarize(cv::Mat &input, uint8_t *output)
{
// output better be the right size, because we are not doing any bounds checking!
assert(input.isContinuous());
int lix = input.cols;
int liy = input.rows;
cv::Mat planes[3];
cv::split(input, planes);
cv::Mat red = planes[0];
cv::Mat green = planes[1];
cv::Mat blue = planes[2];
memcpy(output, red.data, lix * liy);
memcpy(output + (lix * liy), green.data, lix * liy);
memcpy(output + (lix * liy * 2), blue.data, lix * liy);
}
void
planarize(cv::Mat &input, uint8_t *output);

View file

@ -92,7 +92,7 @@ lib_drv_ht = static_library(
'ht/ht_interface.h',
'ht/ht_models.hpp',
'ht/ht_hand_math.cpp',
'ht/ht_image_math.hpp',
'ht/ht_image_math.cpp',
'ht/ht_nms.hpp',
'ht/templates/NaivePermutationSort.hpp',
),