mirror of
https://gitlab.freedesktop.org/monado/monado.git
synced 2025-01-19 13:18:32 +00:00
tracking/hand: Word choice/clarity
This commit is contained in:
parent
ec3f056199
commit
0c6f342db3
|
@ -64,7 +64,7 @@ refine_center_of_distribution(
|
|||
int max_kern_width = 10;
|
||||
|
||||
|
||||
//!@todo this is stupid and has at least one edge case, make it more readable and link to a jupyter notebook
|
||||
//! @todo this is not good and has at least one edge case, make it more readable and link to a jupyter notebook
|
||||
int kern_width_x = std::max(0, std::min(coarse_x, std::min(max_kern_width, abs(coarse_x - w) - 1)));
|
||||
int kern_width_y = std::max(0, std::min(coarse_y, std::min(max_kern_width, abs(coarse_y - h) - 1)));
|
||||
int min_x = coarse_x - kern_width_x;
|
||||
|
|
|
@ -151,7 +151,7 @@ getCalibration(struct HandTracking *htd, t_stereo_camera_calibration *calibratio
|
|||
|
||||
math_quat_from_matrix_3x3(&s, &tmp);
|
||||
|
||||
// Weird that I have to invert this quat, right? I think at some point - like probably just above this - I must
|
||||
// Weird that I have to invert this quat, right? I think at some point - like probably just before this - I must
|
||||
// have swapped row-major and col-major - remember, if you transpose a rotation matrix, you get its inverse.
|
||||
// Doesn't matter that I don't understand - non-inverted looks definitely wrong, inverted looks definitely
|
||||
// right.
|
||||
|
|
|
@ -151,7 +151,7 @@ struct ht_view
|
|||
cv::Mat debug_out_to_this;
|
||||
|
||||
struct det_output det_outputs[2]; // left, right
|
||||
struct keypoint_estimation_run_info run_info[2]; // Stupid
|
||||
struct keypoint_estimation_run_info run_info[2];
|
||||
|
||||
struct keypoint_output keypoint_outputs[2];
|
||||
};
|
||||
|
@ -198,7 +198,7 @@ public:
|
|||
float baseline = {};
|
||||
struct xrt_quat stereo_camera_to_left_camera = {};
|
||||
|
||||
uint64_t current_frame_timestamp = {}; // SUPER dumb.
|
||||
uint64_t current_frame_timestamp = {};
|
||||
|
||||
// Change this whenever you want
|
||||
volatile bool debug_scribble = true;
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace xrt::tracking::hand::mercury::kine {
|
|||
static void
|
||||
_two_set_ele(Eigen::Matrix<float, 3, 21> &thing, Eigen::Affine3f jt, int idx)
|
||||
{
|
||||
// Stupid and slow
|
||||
// slow
|
||||
thing.col(idx) = jt.translation();
|
||||
}
|
||||
|
||||
|
@ -343,7 +343,7 @@ clamp_proximals(struct kinematic_hand_4f *hand,
|
|||
|
||||
|
||||
if (our_z.z() > 0) {
|
||||
//!@bug We need smarter joint limiting, limiting via tanangles is not acceptable as joints can rotate
|
||||
//!@bug We need smarter joint limiting, limiting using tanangles is not acceptable as joints can rotate
|
||||
//! outside of the 180 degree hemisphere.
|
||||
our_z.z() = -0.000001f;
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ SPDX-License-Identifier: BSL-1.0
|
|||
# What is this?
|
||||
This is a driver to do optical hand tracking. The actual code mostly written by Moses Turner, with tons of help from Marcus Edel, Jakob Bornecrantz, Ryan Pavlik, and Christoph Haag. Jakob Bornecrantz and Marcus Edel are the main people who gathered training data for the initial Collabora models.
|
||||
|
||||
In `main` it only works with Valve Index, although we've used a lot of Luxonis cameras in development. In the future it should work fine with devices like the T265, or PS4/PS5 cam, should there be enough interest for any of those.
|
||||
In `main` it only works with Valve Index, although we've used a lot of Luxonis cameras in development. With additional work, it should work fine with devices like the T265, or PS4/PS5 cam, should there be enough interest for any of those.
|
||||
|
||||
Under good lighting, I would say it's around as good as Oculus Quest 2's hand tracking. Not that I'm trying to make any claims; that's just what I honestly would tell somebody if they are wondering if it's worth testing out.
|
||||
|
||||
|
@ -39,9 +39,9 @@ sudo make install
|
|||
Make sure you have git-lfs installed, then run ./scripts/get-ht-models.sh. Should work fine.
|
||||
|
||||
## Building the driver
|
||||
Once onnxruntime is installed, you should be able to build like normal with CMake or Meson.
|
||||
Once onnxruntime is installed, you should be able to build like normal with CMake or Meson.
|
||||
|
||||
If it properly found everything, - CMake should say
|
||||
If it properly found everything, - CMake should say
|
||||
|
||||
```
|
||||
-- Found ONNXRUNTIME: /usr/local/include/onnxruntime
|
||||
|
@ -75,16 +75,16 @@ You can see if the driver is working with `openxr-simple-playground`, StereoKit,
|
|||
|
||||
This tracking likes to be in a bright, evenly-lit room with multiple light sources. Turn all the lights on, see if you can find any lamps. If the ML models can see well, the tracking quality can get surprisingly nice.
|
||||
|
||||
Sometimes, the tracking fails when it can see more than one hand. As the tracking gets better (we train better ML models and squash more bugs) this should happen less often or not at all. If it does, put one of your hands down, and it should resume tracking the remaining hand just fine.
|
||||
Sometimes, the tracking fails when it can see more than one hand. As the tracking gets better (we train better ML models and squash more bugs) this should happen less often or not at all. If it does, put one of your hands down, and it should resume tracking the remaining hand just fine.
|
||||
|
||||
# Future improvements
|
||||
|
||||
* Get more training data; train better ML models.
|
||||
* Improve the tracking math
|
||||
* Improve the tracking math
|
||||
* Be smarter about keeping tracking lock on a hand
|
||||
* Try predicting the next bounding box based on the estimated keypoints of the last few frames instead of blindly trusting the detection model, and not run the detection model *every single* frame.
|
||||
* Try predicting the next bounding box based on the estimated keypoints of the last few frames instead of uncritically trusting the detection model, and not run the detection model *every single* frame.
|
||||
* Instead of directly doing disparity on the observed keypoints, use a kinematic model of the hand and fit that to the 2D observations - this should get rid of a *lot* of jitter and make it look better to the end user if the ML models fail
|
||||
* Make something that also works with non-stereo (mono, trinocular, or N cameras) camera setups
|
||||
* Optionally run the ML models on GPU - currently, everything's CPU bound which could be dumb under some circumstances
|
||||
* Optionally run the ML models on GPU - currently, everything's CPU bound which could be sub-optimal under some circumstances
|
||||
* Write a lot of generic code so that you can run this on any stereo camera
|
||||
* More advanced prediction/interpolation code that doesn't care at all about the input frame cadence. One-euro filters are pretty good about this, but we can get better!
|
||||
* More advanced prediction/interpolation code that doesn't care at all about the input frame cadence. One-euro filters are pretty good about this, but we can get better!
|
||||
|
|
|
@ -32,7 +32,7 @@ sumOfHandJointDistances(const Hand3D &one, const Hand3D &two)
|
|||
float
|
||||
errHandHistory(const HandHistory3D &history_hand, const Hand3D &present_hand)
|
||||
{
|
||||
// Remember we never have to deal with an empty hand. Can always access the last element.
|
||||
// Remember we never have to deal with an empty hand. Can always read the last element.
|
||||
return sumOfHandJointDistances(history_hand.last_hands_unfiltered.back(), present_hand);
|
||||
}
|
||||
|
||||
|
|
|
@ -480,7 +480,7 @@ getCalibration(struct HandTracking *htd, t_stereo_camera_calibration *calibratio
|
|||
|
||||
math_quat_from_matrix_3x3(&s, &tmp);
|
||||
|
||||
// Weird that I have to invert this quat, right? I think at some point - like probably just above this - I must
|
||||
// Weird that I have to invert this quat, right? I think at some point - like probably just before this - I must
|
||||
// have swapped row-major and col-major - remember, if you transpose a rotation matrix, you get its inverse.
|
||||
// Doesn't matter that I don't understand - non-inverted looks definitely wrong, inverted looks definitely
|
||||
// right.
|
||||
|
@ -1000,7 +1000,7 @@ HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync,
|
|||
htd->histories_3d[past_indices[i]].last_hands_unfiltered.push_back(
|
||||
hands_unfiltered[present_indices[i]]);
|
||||
}
|
||||
// The above may not do anything, because we'll start out with no hand histories! All the numbers of elements
|
||||
// The preceding may not do anything, because we'll start out with no hand histories! All the numbers of elements
|
||||
// should be zero.
|
||||
|
||||
|
||||
|
|
|
@ -258,7 +258,7 @@ public:
|
|||
float baseline = {};
|
||||
struct xrt_quat stereo_camera_to_left_camera = {};
|
||||
|
||||
uint64_t current_frame_timestamp = {}; // SUPER dumb.
|
||||
uint64_t current_frame_timestamp = {};
|
||||
|
||||
std::vector<HandHistory3D> histories_3d = {};
|
||||
|
||||
|
|
Loading…
Reference in a new issue