t/ht: Add support for camera orientation

Add a way to pass in extra information about camera views, this new struct is merged with the old image boundary information struct. Co-authored-by: Moses Turner <moses@collabora.com> Co-authored-by: Jakob Bornecrantz <jakob@collabora.com>
2025-01-29 18:08:29 +00:00 · 2022-08-14 01:24:01 +10:00 · 2022-08-14 01:24:01 +10:00 · 1811951dd6
parent a6c30b4083
commit 1811951dd6
9 changed files with 129 additions and 68 deletions
--- a/src/xrt/drivers/ht/ht_driver.c
+++ b/src/xrt/drivers/ht/ht_driver.c
@ -241,7 +241,7 @@ int
 ht_device_create(struct xrt_frame_context *xfctx,
                 struct t_stereo_camera_calibration *calib,
                 enum t_hand_tracking_algorithm algorithm_choice,
-                 struct t_image_boundary_info boundary_info,
+                 struct t_camera_extra_info extra_camera_info,
                 struct xrt_slam_sinks **out_sinks,
                 struct xrt_device **out_device)
 {
@ -253,7 +253,7 @@ ht_device_create(struct xrt_frame_context *xfctx,

 	switch (algorithm_choice) {
 	case HT_ALGORITHM_MERCURY: {
-		sync = t_hand_tracking_sync_mercury_create(calib, boundary_info);
+		sync = t_hand_tracking_sync_mercury_create(calib, extra_camera_info);
 	} break;
 	case HT_ALGORITHM_OLD_RGB: {
 		//!@todo Either have this deal with the output space correctly, or have everything use LEFT_CAMERA
@ -291,9 +291,9 @@ ht_device_create_depthai_ov9282()

 	struct t_hand_tracking_sync *sync;

-	struct t_image_boundary_info info;
-	info.views[0].type = HT_IMAGE_BOUNDARY_NONE;
-	info.views[1].type = HT_IMAGE_BOUNDARY_NONE;
+	struct t_camera_extra_info info;
+	info.views[0].boundary_type = HT_IMAGE_BOUNDARY_NONE;
+	info.views[1].boundary_type = HT_IMAGE_BOUNDARY_NONE;

 	sync = t_hand_tracking_sync_mercury_create(calib, info);

--- a/src/xrt/drivers/ht/ht_interface.h
+++ b/src/xrt/drivers/ht/ht_interface.h
@ -55,7 +55,7 @@ int
 ht_device_create(struct xrt_frame_context *xfctx,
                 struct t_stereo_camera_calibration *calib,
                 enum t_hand_tracking_algorithm algorithm_choice,
-                 struct t_image_boundary_info boundary_info,
+                 struct t_camera_extra_info extra_camera_info,
                 struct xrt_slam_sinks **out_sinks,
                 struct xrt_device **out_device);

--- a/src/xrt/drivers/wmr/wmr_hmd.c
+++ b/src/xrt/drivers/wmr/wmr_hmd.c
@ -1493,14 +1493,14 @@ wmr_hmd_hand_track(struct wmr_hmd *wh,
 #ifdef XRT_BUILD_DRIVER_HANDTRACKING
 	//!@todo Turning it off is okay for now, but we should plug metric_radius (or whatever it's called) in, at some
 	//! point.
-	struct t_image_boundary_info boundary_info;
-	boundary_info.views[0].type = HT_IMAGE_BOUNDARY_NONE;
-	boundary_info.views[1].type = HT_IMAGE_BOUNDARY_NONE;
+	struct t_camera_extra_info extra_camera_info;
+	extra_camera_info.views[0].boundary_type = HT_IMAGE_BOUNDARY_NONE;
+	extra_camera_info.views[1].boundary_type = HT_IMAGE_BOUNDARY_NONE;

 	int create_status = ht_device_create(&wh->tracking.xfctx,  //
 	                                     stereo_calib,         //
 	                                     HT_ALGORITHM_MERCURY, //
-	                                     boundary_info,        //
+	                                     extra_camera_info,    //
 	                                     &sinks,               //
 	                                     &device);
 	if (create_status != 0) {
--- a/src/xrt/include/tracking/t_hand_tracking.h
+++ b/src/xrt/include/tracking/t_hand_tracking.h
@ -23,6 +23,9 @@ extern "C" {
 *
 * Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
 * of the camera's vignette.
+ *
+ * Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
+ *
 * @ingroup xrt_iface
 */
 enum t_image_boundary_type
@ -36,6 +39,9 @@ enum t_image_boundary_type
 *
 * Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
 * of the camera's vignette.
+ *
+ * Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
+ *
 * @ingroup xrt_iface
 */
 struct t_image_boundary_circle
@ -49,31 +55,53 @@ struct t_image_boundary_circle
 };

 /*!
- * @brief Image boundary for one view.
+ * @brief Logical orientation of the camera image, relative to the user's head.
+ * For example, Rift S uses CAMERA_ORIENTATION_90 for the two front cameras.
+ *
+ * Feel free to move this out of t_hand_tracking if this becomes more generally applicable.
+ *
+ */
+enum t_camera_orientation
+{
+	CAMERA_ORIENTATION_0 = 0,     // Normal "horizontal" orientation
+	CAMERA_ORIENTATION_90 = 90,   // Camera rotated 90° to the right
+	CAMERA_ORIENTATION_180 = 180, // Camera rotated 180° upside down
+	CAMERA_ORIENTATION_270 = 270, // Camera rotated 270° to the left
+};
+
+
+/*!
+ * @brief Information about image boundary and camera orientation for one view.
+ *
+ * Currently used by hand-tracking to determine if parts of the hand are not
+ * visible to the camera, ie. they are outside of the camera's vignette.
 *
- * Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
- * of the camera's vignette.
 * @ingroup xrt_iface
 */
-struct t_image_boundary_info_one_view
+struct t_camera_extra_info_one_view
 {
-	enum t_image_boundary_type type;
+	enum t_image_boundary_type boundary_type;
+
 	union {
 		struct t_image_boundary_circle circle;
 	} boundary;
+
+	enum t_camera_orientation camera_orientation;
 };

 /*!
- * @brief Image boundaries for all the cameras used in a tracking system.
+ * @brief Information about image boundaries and camera orientations for all the
+ * cameras used in a tracking system.
+ *
+ * Currently used by hand-tracking to determine if parts of the hand are not
+ * visible to the camera, ie. they are outside of the camera's vignette.
 *
- * Currently used by hand-tracking to determine if parts of the hand are not visible to the camera, ie. they are outside
- * of the camera's vignette.
 * @ingroup xrt_iface
 */
-struct t_image_boundary_info
+struct t_camera_extra_info
 {
 	//!@todo Hardcoded to 2 - needs to increase as we support headsets with more cameras.
-	struct t_image_boundary_info_one_view views[2];
+	struct t_camera_extra_info_one_view views[2];
 };

 /*!
--- a/src/xrt/targets/common/target_builder_lighthouse.c
+++ b/src/xrt/targets/common/target_builder_lighthouse.c
@ -188,9 +188,9 @@ lighthouse_hand_track(struct u_system_devices *usysd,

 	LH_ASSERT_(stereo_calib != NULL);

-	struct t_image_boundary_info info;
-	info.views[0].type = HT_IMAGE_BOUNDARY_CIRCLE;
-	info.views[1].type = HT_IMAGE_BOUNDARY_CIRCLE;
+	struct t_camera_extra_info info;
+	info.views[0].boundary_type = HT_IMAGE_BOUNDARY_CIRCLE;
+	info.views[1].boundary_type = HT_IMAGE_BOUNDARY_CIRCLE;


 	//!@todo This changes by like 50ish pixels from device to device. For now, the solution is simple: just
--- a/src/xrt/tracking/hand/mercury/hg_interface.h
+++ b/src/xrt/tracking/hand/mercury/hg_interface.h
@ -23,7 +23,7 @@ extern "C" {
 */
 struct t_hand_tracking_sync *
 t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
-                                    struct t_image_boundary_info boundary_info);
+                                    struct t_camera_extra_info extra_camera_info);

 #ifdef __cplusplus
 } // extern "C"
--- a/src/xrt/tracking/hand/mercury/hg_model.cpp
+++ b/src/xrt/tracking/hand/mercury/hg_model.cpp
@ -29,63 +29,93 @@ namespace xrt::tracking::hand::mercury {
 		}                                                                                                      \
 	} while (0)

-
 static cv::Matx23f
-blackbar(const cv::Mat &in, cv::Mat &out, xrt_size out_size)
+blackbar(const cv::Mat &in, enum t_camera_orientation rot, cv::Mat &out, xrt_size out_size)
 {
-#if 1
 	// Easy to think about, always right, but pretty slow:
 	// Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back.
 	// Then just warpAffine() it.
 	// Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize
 	// later.
+	bool swapped_wh = false;
+	float in_w, in_h;

-	// Do the black bars need to be on top and bottom, or on left and right?
-	float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1
-	float scale_down_h = (float)out_size.h / (float)in.rows; // 128/800 =  0.16
+	switch (rot) {
+	case CAMERA_ORIENTATION_90:
+	case CAMERA_ORIENTATION_270:
+		// Swap width and height
+		in_w = in.rows;
+		in_h = in.cols;
+		swapped_wh = true;
+		break;
+	default:
+		in_w = in.cols;
+		in_h = in.rows;
+		break;
+	}
+
+	// Figure out from the rotation and frame sizes if the black bars need to be on top and bottom, or on left and
+	// right?
+	float scale_down_w = (float)out_size.w / in_w; // 128/1280 = 0.1
+	float scale_down_h = (float)out_size.h / in_h; // 128/800 =  0.16

 	float scale_down = fmin(scale_down_w, scale_down_h); // 0.1

-	float width_inside = (float)in.cols * scale_down;
-	float height_inside = (float)in.rows * scale_down;
+	float width_inside, height_inside;
+
+	if (swapped_wh) {
+		width_inside = (float)in.rows * scale_down;
+		height_inside = (float)in.cols * scale_down;
+	} else {
+		width_inside = (float)in.cols * scale_down;
+		height_inside = (float)in.rows * scale_down;
+	}

 	float translate_x = (out_size.w - width_inside) / 2;  // should be 0 for 1280x800
 	float translate_y = (out_size.h - height_inside) / 2; // should be (1280-800)/2 = 240

 	cv::Matx23f go;
-	// clang-format off
-	go(0,0) = scale_down;  go(0,1) = 0.0f;                  go(0,2) = translate_x;
-	go(1,0) = 0.0f;                  go(1,1) = scale_down;  go(1,2) = translate_y;
-	// clang-format on
+	cv::Point2f center(in.rows / 2, in.cols / 2);
+
+	switch (rot) {
+	case CAMERA_ORIENTATION_0:
+		// clang-format off
+			go(0,0) = scale_down;  go(0,1) = 0.0f;          go(0,2) = translate_x;
+			go(1,0) = 0.0f;        go(1,1) = scale_down;    go(1,2) = translate_y;
+		// clang-format on
+		break;
+	case CAMERA_ORIENTATION_90:
+		// clang-format off
+			go(0,0) = 0.0f;         go(0,1) = scale_down;   go(0,2) = translate_x;
+			go(1,0) = -scale_down;  go(1,1) = 0.0f;         go(1,2) = translate_y+out_size.h-1;
+		// clang-format on
+		break;
+	case CAMERA_ORIENTATION_180:
+		// clang-format off
+			go(0,0) = -scale_down;  go(0,1) = 0.0f;         go(0,2) = translate_x+out_size.w-1;
+			go(1,0) = 0.0f;         go(1,1) = -scale_down;  go(1,2) = translate_y+out_size.h-1;
+		// clang-format on
+		break;
+	case CAMERA_ORIENTATION_270:
+		// clang-format off
+			go(0,0) = 0.0f;        go(0,1) = -scale_down;   go(0,2) = translate_x+out_size.w-1;
+			go(1,0) = scale_down;  go(1,1) = 0.0f;          go(1,2) = translate_y;
+		// clang-format on
+		break;
+	}

 	cv::warpAffine(in, out, go, cv::Size(out_size.w, out_size.h));

-	cv::Matx23f ret;
+	// Return the inverse affine transform by passing
+	// through a 3x3 rotation matrix
+	cv::Mat e = cv::Mat::eye(3, 3, CV_32F);
+	cv::Mat tmp = e(cv::Rect(0, 0, 3, 2));
+	cv::Mat(go).copyTo(tmp);

-	// clang-format off
-	ret(0,0) = 1.0f/scale_down;  ret(0,1) = 0.0f;             ret(0,2) = -translate_x/scale_down;
-	ret(1,0) = 0.0f;             ret(1,1) = 1.0f/scale_down;  ret(1,2) = -translate_y/scale_down;
-	// clang-format on
+	e = e.inv();
+	cv::Matx23f ret = e(cv::Rect(0, 0, 3, 2));

 	return ret;
-#else
-	// Fast, always wrong if the input isn't square. You'd end up using something like this, plus some
-	// copyMakeBorder if you want to optimize.
-	if (aspect_ratio_input == aspect_ratio_output) {
-		cv::resize(in, out, {out_size.w, out_size.h});
-		cv::Matx23f ret;
-		float scale_from_out_to_in = (float)in.cols / (float)out_size.w;
-		// clang-format off
-		ret(0,0) = scale_from_out_to_in;  ret(0,1) = 0.0f;                  ret(0,2) = 0.0f;
-		ret(1,0) = 0.0f;                  ret(1,1) = scale_from_out_to_in;  ret(1,2) = 0.0f;
-		// clang-format on
-		cv::imshow("hi", out);
-		cv::waitKey(1);
-		return ret;
-	}
-	assert(!"Uh oh! Unimplemented!");
-	return {};
-#endif
 }

 static inline int
@ -275,7 +305,8 @@ run_hand_detection(void *ptr)
 	desire.h = 240;
 	desire.w = 320;

-	cv::Matx23f go_back = blackbar(data_400x640, _240x320_uint8, desire);
+	cv::Matx23f go_back = blackbar(data_400x640, view->camera_info.camera_orientation, _240x320_uint8, desire);
+

 	cv::Mat _240x320(cv::Size(320, 240), CV_32FC1, wrap->data, 320 * sizeof(float));

--- a/src/xrt/tracking/hand/mercury/hg_sync.cpp
+++ b/src/xrt/tracking/hand/mercury/hg_sync.cpp
@ -165,7 +165,7 @@ getModelsFolder(struct HandTracking *hgt)

 template <typename Vec>
 static inline bool
-check_outside_view(struct HandTracking *hgt, struct t_image_boundary_info_one_view boundary, Vec &keypoint)
+check_outside_view(struct HandTracking *hgt, struct t_camera_extra_info_one_view boundary, Vec &keypoint)
 {
 	// Regular case - the keypoint is literally outside the image
 	if (keypoint.y > hgt->calibration_one_view_size_px.h || //
@ -175,7 +175,7 @@ check_outside_view(struct HandTracking *hgt, struct t_image_boundary_info_one_vi
 		return true;
 	}

-	switch (boundary.type) {
+	switch (boundary.boundary_type) {
 	// No boundary, and we passed the previous check. Not outside the view.
 	case HT_IMAGE_BOUNDARY_NONE: return false; break;
 	case HT_IMAGE_BOUNDARY_CIRCLE: {
@ -254,7 +254,7 @@ back_project(struct HandTracking *hgt,
 		xrt_vec2 keypoints_global[26];
 		bool outside_view[26] = {};
 		for (int i = 0; i < 26; i++) {
-			if (check_outside_view(hgt, hgt->image_boundary_info.views[view_idx], out[i]) ||
+			if (check_outside_view(hgt, hgt->views[view_idx].camera_info, out[i]) ||
 			    any_joint_behind_camera) {
 				outside_view[i] = true;
 				if (num_outside != NULL) {
@ -568,9 +568,9 @@ scribble_image_boundary(struct HandTracking *hgt)
 		struct ht_view *view = &hgt->views[view_idx];

 		cv::Mat &debug_frame = view->debug_out_to_this;
-		t_image_boundary_info_one_view &info = hgt->image_boundary_info.views[view_idx];
+		t_camera_extra_info_one_view &info = hgt->views[view_idx].camera_info;

-		if (info.type == HT_IMAGE_BOUNDARY_CIRCLE) {
+		if (info.boundary_type == HT_IMAGE_BOUNDARY_CIRCLE) {
 			int center_x = hgt->last_frame_one_view_size_px.w * info.boundary.circle.normalized_center.x;
 			int center_y = hgt->last_frame_one_view_size_px.h * info.boundary.circle.normalized_center.y;
 			cv::circle(debug_frame, {center_x, center_y},
@ -913,7 +913,7 @@ using namespace xrt::tracking::hand::mercury;

 extern "C" t_hand_tracking_sync *
 t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
-                                    struct t_image_boundary_info boundary_info)
+                                    struct t_camera_extra_info extra_camera_info)
 {
 	XRT_TRACE_MARKER();

@ -964,7 +964,8 @@ t_hand_tracking_sync_mercury_create(struct t_stereo_camera_calibration *calib,
 	hgt->views[0].hgt = hgt;
 	hgt->views[1].hgt = hgt; // :)

-	hgt->image_boundary_info = boundary_info;
+	hgt->views[0].camera_info = extra_camera_info.views[0];
+	hgt->views[1].camera_info = extra_camera_info.views[1];

 	init_hand_detection(hgt, &hgt->views[0].detection);
 	init_hand_detection(hgt, &hgt->views[1].detection);
--- a/src/xrt/tracking/hand/mercury/hg_sync.hpp
+++ b/src/xrt/tracking/hand/mercury/hg_sync.hpp
@ -133,6 +133,8 @@ struct ht_view
 	onnx_wrap keypoint[2];
 	int view;

+	struct t_camera_extra_info_one_view camera_info;
+
 	cv::Mat distortion;
 	cv::Matx<double, 3, 3> cameraMatrix;
 	cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2
@ -242,7 +244,6 @@ public:


 	struct xrt_pose left_in_right = {};
-	struct t_image_boundary_info image_boundary_info;

 	u_frame_times_widget ft_widget = {};