mirror of
https://gitlab.freedesktop.org/monado/monado.git
synced 2025-01-29 18:08:29 +00:00
t/ht: Add support for camera orientation
Add a way to pass in extra information about camera views, this new struct is merged with the old image boundary information struct. Co-authored-by: Moses Turner <moses@collabora.com> Co-authored-by: Jakob Bornecrantz <jakob@collabora.com>
This commit is contained in:
parent
a6c30b4083
commit
1811951dd6
|
@ -241,7 +241,7 @@ int
|
|||
ht_device_create(struct xrt_frame_context *xfctx,
|
||||
struct t_stereo_camera_calibration *calib,
|
||||
enum t_hand_tracking_algorithm algorithm_choice,
|
||||
struct t_image_boundary_info boundary_info,
|
||||
struct t_camera_extra_info extra_camera_info,
|
||||
struct xrt_slam_sinks **out_sinks,
|
||||
struct xrt_device **out_device)
|
||||
{
|
||||
|
@ -253,7 +253,7 @@ ht_device_create(struct xrt_frame_context *xfctx,
|
|||
|
||||
switch (algorithm_choice) {
|
||||
case HT_ALGORITHM_MERCURY: {
|
||||
sync = t_hand_tracking_sync_mercury_create(calib, boundary_info);
|
||||
sync = t_hand_tracking_sync_mercury_create(calib, extra_camera_info);
|
||||
} break;
|
||||
case HT_ALGORITHM_OLD_RGB: {
|
||||
//!@todo Either have this deal with the output space correctly, or have everything use LEFT_CAMERA
|
||||
|
@ -291,9 +291,9 @@ ht_device_create_depthai_ov9282()
|
|||
|
||||
struct t_hand_tracking_sync *sync;
|
||||
|
||||
struct t_image_boundary_info info;
|
||||
info.views[0].type = HT_IMAGE_BOUNDARY_NONE;
|
||||
info.views[1].type = HT_IMAGE_BOUNDARY_NONE;
|
||||
struct t_camera_extra_info info;
|
||||
info.views[0].boundary_type = HT_IMAGE_BOUNDARY_NONE;
|
||||
info.views[1].boundary_type = HT_IMAGE_BOUNDARY_NONE;
|
||||
|
||||
sync = t_hand_tracking_sync_mercury_create(calib, info);
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ int
|
|||
ht_device_create(struct xrt_frame_context *xfctx,
|
||||
struct t_stereo_camera_calibration *calib,
|
||||
enum t_hand_tracking_algorithm algorithm_choice,
|
||||
struct t_image_boundary_info boundary_info,
|
||||
struct t_camera_extra_info extra_camera_info,
|
||||
struct xrt_slam_sinks **out_sinks,
|
||||
struct xrt_device **out_device);
|
||||
|
||||
|
|
|
@ -1493,14 +1493,14 @@ wmr_hmd_hand_track(struct wmr_hmd *wh,
|
|||
#ifdef XRT_BUILD_DRIVER_HANDTRACKING
|
||||
//!@todo Turning it off is okay for now, but we should plug metric_radius (or whatever it's called) in, at some
|
||||
//! point.
|
||||
struct t_image_boundary_info boundary_info;
|
||||
boundary_info.views[0].type = HT_IMAGE_BOUNDARY_NONE;
|
||||
boundary_info.views[1].type = HT_IMAGE_BOUNDARY_NONE;
|
||||
struct t_camera_extra_info extra_camera_info;
|
||||
extra_camera_info.views[0].boundary_type = HT_IMAGE_BOUNDARY_NONE;
|
||||
extra_camera_info.views[1].boundary_type = HT_IMAGE_BOUNDARY_NONE;
|
||||
|
||||
int create_status = ht_device_create(&wh->tracking.xfctx, //
|
||||
stereo_calib, //
|
||||
HT_ALGORITHM_MERCURY, //
|
||||
boundary_info, //
|
||||
extra_camera_info, //
|
||||
&sinks, //
|
||||
&device);
|
||||
if (create_status != 0) {
|
||||
|
|
|
@ -23,6 +23,9 @@ extern "C" {
|
|||
*
|
||||
* Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
|
||||
* of the camera's vignette.
|
||||
*
|
||||
* Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
|
||||
*
|
||||
* @ingroup xrt_iface
|
||||
*/
|
||||
enum t_image_boundary_type
|
||||
|
@ -36,6 +39,9 @@ enum t_image_boundary_type
|
|||
*
|
||||
* Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
|
||||
* of the camera's vignette.
|
||||
*
|
||||
* Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
|
||||
*
|
||||
* @ingroup xrt_iface
|
||||
*/
|
||||
struct t_image_boundary_circle
|
||||
|
@ -49,31 +55,53 @@ struct t_image_boundary_circle
|
|||
};
|
||||
|
||||
/*!
|
||||
* @brief Image boundary for one view.
|
||||
* @brief Logical orientation of the camera image, relative to the user's head.
|
||||
* For example, Rift S uses CAMERA_ORIENTATION_90 for the two front cameras.
|
||||
*
|
||||
* Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
|
||||
*
|
||||
*/
|
||||
enum t_camera_orientation
|
||||
{
|
||||
CAMERA_ORIENTATION_0 = 0, // Normal "horizontal" orientation
|
||||
CAMERA_ORIENTATION_90 = 90, // Camera rotated 90° to the right
|
||||
CAMERA_ORIENTATION_180 = 180, // Camera rotated 180° upside down
|
||||
CAMERA_ORIENTATION_270 = 270, // Camera rotated 270° to the left
|
||||
};
|
||||
|
||||
|
||||
/*!
|
||||
* @brief Information about image boundary and camera orientation for one view.
|
||||
*
|
||||
* Currently used by hand-tracking to determine if parts of the hand are not
|
||||
* visible to the camera, ie. they are outside of the camera's vignette.
|
||||
*
|
||||
* Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
|
||||
* of the camera's vignette.
|
||||
* @ingroup xrt_iface
|
||||
*/
|
||||
struct t_image_boundary_info_one_view
|
||||
struct t_camera_extra_info_one_view
|
||||
{
|
||||
enum t_image_boundary_type type;
|
||||
enum t_image_boundary_type boundary_type;
|
||||
|
||||
union {
|
||||
struct t_image_boundary_circle circle;
|
||||
} boundary;
|
||||
|
||||
enum t_camera_orientation camera_orientation;
|
||||
};
|
||||
|
||||
/*!
|
||||
* @brief Image boundaries for all the cameras used in a tracking system.
|
||||
* @brief Information about image boundaries and camera orientations for all the
|
||||
* cameras used in a tracking system.
|
||||
*
|
||||
* Currently used by hand-tracking to determine if parts of the hand are not
|
||||
* visible to the camera, ie. they are outside of the camera's vignette.
|
||||
*
|
||||
* Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
|
||||
* of the camera's vignette.
|
||||
* @ingroup xrt_iface
|
||||
*/
|
||||
struct t_image_boundary_info
|
||||
struct t_camera_extra_info
|
||||
{
|
||||
//!@todo Hardcoded to 2 - needs to increase as we support headsets with more cameras.
|
||||
struct t_image_boundary_info_one_view views[2];
|
||||
struct t_camera_extra_info_one_view views[2];
|
||||
};
|
||||
|
||||
/*!
|
||||
|
|
|
@ -188,9 +188,9 @@ lighthouse_hand_track(struct u_system_devices *usysd,
|
|||
|
||||
LH_ASSERT_(stereo_calib != NULL);
|
||||
|
||||
struct t_image_boundary_info info;
|
||||
info.views[0].type = HT_IMAGE_BOUNDARY_CIRCLE;
|
||||
info.views[1].type = HT_IMAGE_BOUNDARY_CIRCLE;
|
||||
struct t_camera_extra_info info;
|
||||
info.views[0].boundary_type = HT_IMAGE_BOUNDARY_CIRCLE;
|
||||
info.views[1].boundary_type = HT_IMAGE_BOUNDARY_CIRCLE;
|
||||
|
||||
|
||||
//!@todo This changes by like 50ish pixels from device to device. For now, the solution is simple: just
|
||||
|
|
|
@ -23,7 +23,7 @@ extern "C" {
|
|||
*/
|
||||
struct t_hand_tracking_sync *
|
||||
t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
|
||||
struct t_image_boundary_info boundary_info);
|
||||
struct t_camera_extra_info extra_camera_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -29,63 +29,93 @@ namespace xrt::tracking::hand::mercury {
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
static cv::Matx23f
|
||||
blackbar(const cv::Mat &in, cv::Mat &out, xrt_size out_size)
|
||||
blackbar(const cv::Mat &in, enum t_camera_orientation rot, cv::Mat &out, xrt_size out_size)
|
||||
{
|
||||
#if 1
|
||||
// Easy to think about, always right, but pretty slow:
|
||||
// Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back.
|
||||
// Then just warpAffine() it.
|
||||
// Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize
|
||||
// later.
|
||||
bool swapped_wh = false;
|
||||
float in_w, in_h;
|
||||
|
||||
// Do the black bars need to be on top and bottom, or on left and right?
|
||||
float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1
|
||||
float scale_down_h = (float)out_size.h / (float)in.rows; // 128/800 = 0.16
|
||||
switch (rot) {
|
||||
case CAMERA_ORIENTATION_90:
|
||||
case CAMERA_ORIENTATION_270:
|
||||
// Swap width and height
|
||||
in_w = in.rows;
|
||||
in_h = in.cols;
|
||||
swapped_wh = true;
|
||||
break;
|
||||
default:
|
||||
in_w = in.cols;
|
||||
in_h = in.rows;
|
||||
break;
|
||||
}
|
||||
|
||||
// Figure out from the rotation and frame sizes if the black bars need to be on top and bottom, or on left and
|
||||
// right?
|
||||
float scale_down_w = (float)out_size.w / in_w; // 128/1280 = 0.1
|
||||
float scale_down_h = (float)out_size.h / in_h; // 128/800 = 0.16
|
||||
|
||||
float scale_down = fmin(scale_down_w, scale_down_h); // 0.1
|
||||
|
||||
float width_inside = (float)in.cols * scale_down;
|
||||
float height_inside = (float)in.rows * scale_down;
|
||||
float width_inside, height_inside;
|
||||
|
||||
if (swapped_wh) {
|
||||
width_inside = (float)in.rows * scale_down;
|
||||
height_inside = (float)in.cols * scale_down;
|
||||
} else {
|
||||
width_inside = (float)in.cols * scale_down;
|
||||
height_inside = (float)in.rows * scale_down;
|
||||
}
|
||||
|
||||
float translate_x = (out_size.w - width_inside) / 2; // should be 0 for 1280x800
|
||||
float translate_y = (out_size.h - height_inside) / 2; // should be (1280-800)/2 = 240
|
||||
|
||||
cv::Matx23f go;
|
||||
// clang-format off
|
||||
go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x;
|
||||
go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y;
|
||||
// clang-format on
|
||||
cv::Point2f center(in.rows / 2, in.cols / 2);
|
||||
|
||||
switch (rot) {
|
||||
case CAMERA_ORIENTATION_0:
|
||||
// clang-format off
|
||||
go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x;
|
||||
go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y;
|
||||
// clang-format on
|
||||
break;
|
||||
case CAMERA_ORIENTATION_90:
|
||||
// clang-format off
|
||||
go(0,0) = 0.0f; go(0,1) = scale_down; go(0,2) = translate_x;
|
||||
go(1,0) = -scale_down; go(1,1) = 0.0f; go(1,2) = translate_y+out_size.h-1;
|
||||
// clang-format on
|
||||
break;
|
||||
case CAMERA_ORIENTATION_180:
|
||||
// clang-format off
|
||||
go(0,0) = -scale_down; go(0,1) = 0.0f; go(0,2) = translate_x+out_size.w-1;
|
||||
go(1,0) = 0.0f; go(1,1) = -scale_down; go(1,2) = translate_y+out_size.h-1;
|
||||
// clang-format on
|
||||
break;
|
||||
case CAMERA_ORIENTATION_270:
|
||||
// clang-format off
|
||||
go(0,0) = 0.0f; go(0,1) = -scale_down; go(0,2) = translate_x+out_size.w-1;
|
||||
go(1,0) = scale_down; go(1,1) = 0.0f; go(1,2) = translate_y;
|
||||
// clang-format on
|
||||
break;
|
||||
}
|
||||
|
||||
cv::warpAffine(in, out, go, cv::Size(out_size.w, out_size.h));
|
||||
|
||||
cv::Matx23f ret;
|
||||
// Return the inverse affine transform by passing
|
||||
// through a 3x3 rotation matrix
|
||||
cv::Mat e = cv::Mat::eye(3, 3, CV_32F);
|
||||
cv::Mat tmp = e(cv::Rect(0, 0, 3, 2));
|
||||
cv::Mat(go).copyTo(tmp);
|
||||
|
||||
// clang-format off
|
||||
ret(0,0) = 1.0f/scale_down; ret(0,1) = 0.0f; ret(0,2) = -translate_x/scale_down;
|
||||
ret(1,0) = 0.0f; ret(1,1) = 1.0f/scale_down; ret(1,2) = -translate_y/scale_down;
|
||||
// clang-format on
|
||||
e = e.inv();
|
||||
cv::Matx23f ret = e(cv::Rect(0, 0, 3, 2));
|
||||
|
||||
return ret;
|
||||
#else
|
||||
// Fast, always wrong if the input isn't square. You'd end up using something like this, plus some
|
||||
// copyMakeBorder if you want to optimize.
|
||||
if (aspect_ratio_input == aspect_ratio_output) {
|
||||
cv::resize(in, out, {out_size.w, out_size.h});
|
||||
cv::Matx23f ret;
|
||||
float scale_from_out_to_in = (float)in.cols / (float)out_size.w;
|
||||
// clang-format off
|
||||
ret(0,0) = scale_from_out_to_in; ret(0,1) = 0.0f; ret(0,2) = 0.0f;
|
||||
ret(1,0) = 0.0f; ret(1,1) = scale_from_out_to_in; ret(1,2) = 0.0f;
|
||||
// clang-format on
|
||||
cv::imshow("hi", out);
|
||||
cv::waitKey(1);
|
||||
return ret;
|
||||
}
|
||||
assert(!"Uh oh! Unimplemented!");
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int
|
||||
|
@ -275,7 +305,8 @@ run_hand_detection(void *ptr)
|
|||
desire.h = 240;
|
||||
desire.w = 320;
|
||||
|
||||
cv::Matx23f go_back = blackbar(data_400x640, _240x320_uint8, desire);
|
||||
cv::Matx23f go_back = blackbar(data_400x640, view->camera_info.camera_orientation, _240x320_uint8, desire);
|
||||
|
||||
|
||||
cv::Mat _240x320(cv::Size(320, 240), CV_32FC1, wrap->data, 320 * sizeof(float));
|
||||
|
||||
|
|
|
@ -165,7 +165,7 @@ getModelsFolder(struct HandTracking *hgt)
|
|||
|
||||
template <typename Vec>
|
||||
static inline bool
|
||||
check_outside_view(struct HandTracking *hgt, struct t_image_boundary_info_one_view boundary, Vec &keypoint)
|
||||
check_outside_view(struct HandTracking *hgt, struct t_camera_extra_info_one_view boundary, Vec &keypoint)
|
||||
{
|
||||
// Regular case - the keypoint is literally outside the image
|
||||
if (keypoint.y > hgt->calibration_one_view_size_px.h || //
|
||||
|
@ -175,7 +175,7 @@ check_outside_view(struct HandTracking *hgt, struct t_image_boundary_info_one_vi
|
|||
return true;
|
||||
}
|
||||
|
||||
switch (boundary.type) {
|
||||
switch (boundary.boundary_type) {
|
||||
// No boundary, and we passed the previous check. Not outside the view.
|
||||
case HT_IMAGE_BOUNDARY_NONE: return false; break;
|
||||
case HT_IMAGE_BOUNDARY_CIRCLE: {
|
||||
|
@ -254,7 +254,7 @@ back_project(struct HandTracking *hgt,
|
|||
xrt_vec2 keypoints_global[26];
|
||||
bool outside_view[26] = {};
|
||||
for (int i = 0; i < 26; i++) {
|
||||
if (check_outside_view(hgt, hgt->image_boundary_info.views[view_idx], out[i]) ||
|
||||
if (check_outside_view(hgt, hgt->views[view_idx].camera_info, out[i]) ||
|
||||
any_joint_behind_camera) {
|
||||
outside_view[i] = true;
|
||||
if (num_outside != NULL) {
|
||||
|
@ -568,9 +568,9 @@ scribble_image_boundary(struct HandTracking *hgt)
|
|||
struct ht_view *view = &hgt->views[view_idx];
|
||||
|
||||
cv::Mat &debug_frame = view->debug_out_to_this;
|
||||
t_image_boundary_info_one_view &info = hgt->image_boundary_info.views[view_idx];
|
||||
t_camera_extra_info_one_view &info = hgt->views[view_idx].camera_info;
|
||||
|
||||
if (info.type == HT_IMAGE_BOUNDARY_CIRCLE) {
|
||||
if (info.boundary_type == HT_IMAGE_BOUNDARY_CIRCLE) {
|
||||
int center_x = hgt->last_frame_one_view_size_px.w * info.boundary.circle.normalized_center.x;
|
||||
int center_y = hgt->last_frame_one_view_size_px.h * info.boundary.circle.normalized_center.y;
|
||||
cv::circle(debug_frame, {center_x, center_y},
|
||||
|
@ -913,7 +913,7 @@ using namespace xrt::tracking::hand::mercury;
|
|||
|
||||
extern "C" t_hand_tracking_sync *
|
||||
t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
|
||||
struct t_image_boundary_info boundary_info)
|
||||
struct t_camera_extra_info extra_camera_info)
|
||||
{
|
||||
XRT_TRACE_MARKER();
|
||||
|
||||
|
@ -964,7 +964,8 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
|
|||
hgt->views[0].hgt = hgt;
|
||||
hgt->views[1].hgt = hgt; // :)
|
||||
|
||||
hgt->image_boundary_info = boundary_info;
|
||||
hgt->views[0].camera_info = extra_camera_info.views[0];
|
||||
hgt->views[1].camera_info = extra_camera_info.views[1];
|
||||
|
||||
init_hand_detection(hgt, &hgt->views[0].detection);
|
||||
init_hand_detection(hgt, &hgt->views[1].detection);
|
||||
|
|
|
@ -133,6 +133,8 @@ struct ht_view
|
|||
onnx_wrap keypoint[2];
|
||||
int view;
|
||||
|
||||
struct t_camera_extra_info_one_view camera_info;
|
||||
|
||||
cv::Mat distortion;
|
||||
cv::Matx<double, 3, 3> cameraMatrix;
|
||||
cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2
|
||||
|
@ -242,7 +244,6 @@ public:
|
|||
|
||||
|
||||
struct xrt_pose left_in_right = {};
|
||||
struct t_image_boundary_info image_boundary_info;
|
||||
|
||||
u_frame_times_widget ft_widget = {};
|
||||
|
||||
|
|
Loading…
Reference in a new issue