t/ht: Add support for camera orientation

Add a way to pass in extra information about camera views, this new
struct is merged with the old image boundary information struct.

Co-authored-by: Moses Turner <moses@collabora.com>
Co-authored-by: Jakob Bornecrantz <jakob@collabora.com>
This commit is contained in:
Jan Schmidt 2022-08-14 01:24:01 +10:00 committed by Jakob Bornecrantz
parent a6c30b4083
commit 1811951dd6
9 changed files with 129 additions and 68 deletions

View file

@ -241,7 +241,7 @@ int
ht_device_create(struct xrt_frame_context *xfctx,
struct t_stereo_camera_calibration *calib,
enum t_hand_tracking_algorithm algorithm_choice,
struct t_image_boundary_info boundary_info,
struct t_camera_extra_info extra_camera_info,
struct xrt_slam_sinks **out_sinks,
struct xrt_device **out_device)
{
@ -253,7 +253,7 @@ ht_device_create(struct xrt_frame_context *xfctx,
switch (algorithm_choice) {
case HT_ALGORITHM_MERCURY: {
sync = t_hand_tracking_sync_mercury_create(calib, boundary_info);
sync = t_hand_tracking_sync_mercury_create(calib, extra_camera_info);
} break;
case HT_ALGORITHM_OLD_RGB: {
//!@todo Either have this deal with the output space correctly, or have everything use LEFT_CAMERA
@ -291,9 +291,9 @@ ht_device_create_depthai_ov9282()
struct t_hand_tracking_sync *sync;
struct t_image_boundary_info info;
info.views[0].type = HT_IMAGE_BOUNDARY_NONE;
info.views[1].type = HT_IMAGE_BOUNDARY_NONE;
struct t_camera_extra_info info;
info.views[0].boundary_type = HT_IMAGE_BOUNDARY_NONE;
info.views[1].boundary_type = HT_IMAGE_BOUNDARY_NONE;
sync = t_hand_tracking_sync_mercury_create(calib, info);

View file

@ -55,7 +55,7 @@ int
ht_device_create(struct xrt_frame_context *xfctx,
struct t_stereo_camera_calibration *calib,
enum t_hand_tracking_algorithm algorithm_choice,
struct t_image_boundary_info boundary_info,
struct t_camera_extra_info extra_camera_info,
struct xrt_slam_sinks **out_sinks,
struct xrt_device **out_device);

View file

@ -1493,14 +1493,14 @@ wmr_hmd_hand_track(struct wmr_hmd *wh,
#ifdef XRT_BUILD_DRIVER_HANDTRACKING
//!@todo Turning it off is okay for now, but we should plug metric_radius (or whatever it's called) in, at some
//! point.
struct t_image_boundary_info boundary_info;
boundary_info.views[0].type = HT_IMAGE_BOUNDARY_NONE;
boundary_info.views[1].type = HT_IMAGE_BOUNDARY_NONE;
struct t_camera_extra_info extra_camera_info;
extra_camera_info.views[0].boundary_type = HT_IMAGE_BOUNDARY_NONE;
extra_camera_info.views[1].boundary_type = HT_IMAGE_BOUNDARY_NONE;
int create_status = ht_device_create(&wh->tracking.xfctx, //
stereo_calib, //
HT_ALGORITHM_MERCURY, //
boundary_info, //
extra_camera_info, //
&sinks, //
&device);
if (create_status != 0) {

View file

@ -23,6 +23,9 @@ extern "C" {
*
* Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
* of the camera's vignette.
*
* Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
*
* @ingroup xrt_iface
*/
enum t_image_boundary_type
@ -36,6 +39,9 @@ enum t_image_boundary_type
*
* Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
* of the camera's vignette.
*
* Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
*
* @ingroup xrt_iface
*/
struct t_image_boundary_circle
@ -49,31 +55,53 @@ struct t_image_boundary_circle
};
/*!
* @brief Image boundary for one view.
* @brief Logical orientation of the camera image, relative to the user's head.
* For example, Rift S uses CAMERA_ORIENTATION_90 for the two front cameras.
*
* Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
*
*/
enum t_camera_orientation
{
CAMERA_ORIENTATION_0 = 0, // Normal "horizontal" orientation
CAMERA_ORIENTATION_90 = 90, // Camera rotated 90° to the right
CAMERA_ORIENTATION_180 = 180, // Camera rotated 180° upside down
CAMERA_ORIENTATION_270 = 270, // Camera rotated 270° to the left
};
/*!
* @brief Information about image boundary and camera orientation for one view.
*
* Currently used by hand-tracking to determine if parts of the hand are not
* visible to the camera, ie. they are outside of the camera's vignette.
*
* Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
* of the camera's vignette.
* @ingroup xrt_iface
*/
struct t_image_boundary_info_one_view
struct t_camera_extra_info_one_view
{
enum t_image_boundary_type type;
enum t_image_boundary_type boundary_type;
union {
struct t_image_boundary_circle circle;
} boundary;
enum t_camera_orientation camera_orientation;
};
/*!
* @brief Image boundaries for all the cameras used in a tracking system.
* @brief Information about image boundaries and camera orientations for all the
* cameras used in a tracking system.
*
* Currently used by hand-tracking to determine if parts of the hand are not
* visible to the camera, ie. they are outside of the camera's vignette.
*
* Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
* of the camera's vignette.
* @ingroup xrt_iface
*/
struct t_image_boundary_info
struct t_camera_extra_info
{
//!@todo Hardcoded to 2 - needs to increase as we support headsets with more cameras.
struct t_image_boundary_info_one_view views[2];
struct t_camera_extra_info_one_view views[2];
};
/*!

View file

@ -188,9 +188,9 @@ lighthouse_hand_track(struct u_system_devices *usysd,
LH_ASSERT_(stereo_calib != NULL);
struct t_image_boundary_info info;
info.views[0].type = HT_IMAGE_BOUNDARY_CIRCLE;
info.views[1].type = HT_IMAGE_BOUNDARY_CIRCLE;
struct t_camera_extra_info info;
info.views[0].boundary_type = HT_IMAGE_BOUNDARY_CIRCLE;
info.views[1].boundary_type = HT_IMAGE_BOUNDARY_CIRCLE;
//!@todo This changes by like 50ish pixels from device to device. For now, the solution is simple: just

View file

@ -23,7 +23,7 @@ extern "C" {
*/
struct t_hand_tracking_sync *
t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
struct t_image_boundary_info boundary_info);
struct t_camera_extra_info extra_camera_info);
#ifdef __cplusplus
} // extern "C"

View file

@ -29,63 +29,93 @@ namespace xrt::tracking::hand::mercury {
} \
} while (0)
static cv::Matx23f
blackbar(const cv::Mat &in, cv::Mat &out, xrt_size out_size)
blackbar(const cv::Mat &in, enum t_camera_orientation rot, cv::Mat &out, xrt_size out_size)
{
#if 1
// Easy to think about, always right, but pretty slow:
// Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back.
// Then just warpAffine() it.
// Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize
// later.
bool swapped_wh = false;
float in_w, in_h;
// Do the black bars need to be on top and bottom, or on left and right?
float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1
float scale_down_h = (float)out_size.h / (float)in.rows; // 128/800 = 0.16
switch (rot) {
case CAMERA_ORIENTATION_90:
case CAMERA_ORIENTATION_270:
// Swap width and height
in_w = in.rows;
in_h = in.cols;
swapped_wh = true;
break;
default:
in_w = in.cols;
in_h = in.rows;
break;
}
// Figure out from the rotation and frame sizes if the black bars need to be on top and bottom, or on left and
// right?
float scale_down_w = (float)out_size.w / in_w; // 128/1280 = 0.1
float scale_down_h = (float)out_size.h / in_h; // 128/800 = 0.16
float scale_down = fmin(scale_down_w, scale_down_h); // 0.1
float width_inside = (float)in.cols * scale_down;
float height_inside = (float)in.rows * scale_down;
float width_inside, height_inside;
if (swapped_wh) {
width_inside = (float)in.rows * scale_down;
height_inside = (float)in.cols * scale_down;
} else {
width_inside = (float)in.cols * scale_down;
height_inside = (float)in.rows * scale_down;
}
float translate_x = (out_size.w - width_inside) / 2; // should be 0 for 1280x800
float translate_y = (out_size.h - height_inside) / 2; // should be (1280-800)/2 = 240
cv::Matx23f go;
// clang-format off
go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x;
go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y;
// clang-format on
cv::Point2f center(in.rows / 2, in.cols / 2);
switch (rot) {
case CAMERA_ORIENTATION_0:
// clang-format off
go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x;
go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y;
// clang-format on
break;
case CAMERA_ORIENTATION_90:
// clang-format off
go(0,0) = 0.0f; go(0,1) = scale_down; go(0,2) = translate_x;
go(1,0) = -scale_down; go(1,1) = 0.0f; go(1,2) = translate_y+out_size.h-1;
// clang-format on
break;
case CAMERA_ORIENTATION_180:
// clang-format off
go(0,0) = -scale_down; go(0,1) = 0.0f; go(0,2) = translate_x+out_size.w-1;
go(1,0) = 0.0f; go(1,1) = -scale_down; go(1,2) = translate_y+out_size.h-1;
// clang-format on
break;
case CAMERA_ORIENTATION_270:
// clang-format off
go(0,0) = 0.0f; go(0,1) = -scale_down; go(0,2) = translate_x+out_size.w-1;
go(1,0) = scale_down; go(1,1) = 0.0f; go(1,2) = translate_y;
// clang-format on
break;
}
cv::warpAffine(in, out, go, cv::Size(out_size.w, out_size.h));
cv::Matx23f ret;
// Return the inverse affine transform by passing
// through a 3x3 rotation matrix
cv::Mat e = cv::Mat::eye(3, 3, CV_32F);
cv::Mat tmp = e(cv::Rect(0, 0, 3, 2));
cv::Mat(go).copyTo(tmp);
// clang-format off
ret(0,0) = 1.0f/scale_down; ret(0,1) = 0.0f; ret(0,2) = -translate_x/scale_down;
ret(1,0) = 0.0f; ret(1,1) = 1.0f/scale_down; ret(1,2) = -translate_y/scale_down;
// clang-format on
e = e.inv();
cv::Matx23f ret = e(cv::Rect(0, 0, 3, 2));
return ret;
#else
// Fast, always wrong if the input isn't square. You'd end up using something like this, plus some
// copyMakeBorder if you want to optimize.
if (aspect_ratio_input == aspect_ratio_output) {
cv::resize(in, out, {out_size.w, out_size.h});
cv::Matx23f ret;
float scale_from_out_to_in = (float)in.cols / (float)out_size.w;
// clang-format off
ret(0,0) = scale_from_out_to_in; ret(0,1) = 0.0f; ret(0,2) = 0.0f;
ret(1,0) = 0.0f; ret(1,1) = scale_from_out_to_in; ret(1,2) = 0.0f;
// clang-format on
cv::imshow("hi", out);
cv::waitKey(1);
return ret;
}
assert(!"Uh oh! Unimplemented!");
return {};
#endif
}
static inline int
@ -275,7 +305,8 @@ run_hand_detection(void *ptr)
desire.h = 240;
desire.w = 320;
cv::Matx23f go_back = blackbar(data_400x640, _240x320_uint8, desire);
cv::Matx23f go_back = blackbar(data_400x640, view->camera_info.camera_orientation, _240x320_uint8, desire);
cv::Mat _240x320(cv::Size(320, 240), CV_32FC1, wrap->data, 320 * sizeof(float));

View file

@ -165,7 +165,7 @@ getModelsFolder(struct HandTracking *hgt)
template <typename Vec>
static inline bool
check_outside_view(struct HandTracking *hgt, struct t_image_boundary_info_one_view boundary, Vec &keypoint)
check_outside_view(struct HandTracking *hgt, struct t_camera_extra_info_one_view boundary, Vec &keypoint)
{
// Regular case - the keypoint is literally outside the image
if (keypoint.y > hgt->calibration_one_view_size_px.h || //
@ -175,7 +175,7 @@ check_outside_view(struct HandTracking *hgt, struct t_image_boundary_info_one_vi
return true;
}
switch (boundary.type) {
switch (boundary.boundary_type) {
// No boundary, and we passed the previous check. Not outside the view.
case HT_IMAGE_BOUNDARY_NONE: return false; break;
case HT_IMAGE_BOUNDARY_CIRCLE: {
@ -254,7 +254,7 @@ back_project(struct HandTracking *hgt,
xrt_vec2 keypoints_global[26];
bool outside_view[26] = {};
for (int i = 0; i < 26; i++) {
if (check_outside_view(hgt, hgt->image_boundary_info.views[view_idx], out[i]) ||
if (check_outside_view(hgt, hgt->views[view_idx].camera_info, out[i]) ||
any_joint_behind_camera) {
outside_view[i] = true;
if (num_outside != NULL) {
@ -568,9 +568,9 @@ scribble_image_boundary(struct HandTracking *hgt)
struct ht_view *view = &hgt->views[view_idx];
cv::Mat &debug_frame = view->debug_out_to_this;
t_image_boundary_info_one_view &info = hgt->image_boundary_info.views[view_idx];
t_camera_extra_info_one_view &info = hgt->views[view_idx].camera_info;
if (info.type == HT_IMAGE_BOUNDARY_CIRCLE) {
if (info.boundary_type == HT_IMAGE_BOUNDARY_CIRCLE) {
int center_x = hgt->last_frame_one_view_size_px.w * info.boundary.circle.normalized_center.x;
int center_y = hgt->last_frame_one_view_size_px.h * info.boundary.circle.normalized_center.y;
cv::circle(debug_frame, {center_x, center_y},
@ -913,7 +913,7 @@ using namespace xrt::tracking::hand::mercury;
extern "C" t_hand_tracking_sync *
t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
struct t_image_boundary_info boundary_info)
struct t_camera_extra_info extra_camera_info)
{
XRT_TRACE_MARKER();
@ -964,7 +964,8 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
hgt->views[0].hgt = hgt;
hgt->views[1].hgt = hgt; // :)
hgt->image_boundary_info = boundary_info;
hgt->views[0].camera_info = extra_camera_info.views[0];
hgt->views[1].camera_info = extra_camera_info.views[1];
init_hand_detection(hgt, &hgt->views[0].detection);
init_hand_detection(hgt, &hgt->views[1].detection);

View file

@ -133,6 +133,8 @@ struct ht_view
onnx_wrap keypoint[2];
int view;
struct t_camera_extra_info_one_view camera_info;
cv::Mat distortion;
cv::Matx<double, 3, 3> cameraMatrix;
cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2
@ -242,7 +244,6 @@ public:
struct xrt_pose left_in_right = {};
struct t_image_boundary_info image_boundary_info;
u_frame_times_widget ft_widget = {};