164 lines
5.1 KiB
Python
Raw Normal View History

2021-08-13 14:47:04 +02:00
import itertools
2021-09-12 14:20:37 +02:00
from numba import jit
2021-08-11 18:10:06 +02:00
import numpy as np
2021-09-06 13:36:14 +02:00
import rospy
2021-08-11 18:10:06 +02:00
2021-09-11 20:49:55 +02:00
from .policy import MultiViewPolicy
2021-08-11 18:10:06 +02:00
2021-09-12 14:20:37 +02:00
from .timer import Timer
@jit(nopython=True)
def get_voxel_at(voxel_size, p):
index = (p / voxel_size).astype(np.int64)
return index if (index >= 0).all() and (index < 40).all() else None
2021-09-12 17:11:42 +02:00
# Note that the jit compilation takes some time the first time raycast is called
2021-09-12 14:20:37 +02:00
@jit(nopython=True)
def raycast(
voxel_size,
tsdf_grid,
ori,
pos,
fx,
fy,
cx,
cy,
u_min,
u_max,
v_min,
v_max,
t_min,
t_max,
t_step,
):
voxel_indices = []
for u in range(u_min, u_max):
for v in range(v_min, v_max):
direction = np.asarray([(u - cx) / fx, (v - cy) / fy, 1.0])
direction = ori @ (direction / np.linalg.norm(direction))
t, tsdf_prev = t_min, -1.0
while t < t_max:
p = pos + t * direction
t += t_step
index = get_voxel_at(voxel_size, p)
if index is not None:
i, j, k = index
tsdf = tsdf_grid[i, j, k]
if tsdf * tsdf_prev < 0 and tsdf_prev > -1: # crossed a surface
break
voxel_indices.append(index)
tsdf_prev = tsdf
return voxel_indices
2021-08-11 18:10:06 +02:00
2021-08-26 11:43:03 +02:00
class NextBestView(MultiViewPolicy):
2021-09-11 20:49:55 +02:00
def __init__(self):
super().__init__()
2021-09-12 12:02:34 +02:00
self.min_z_dist = rospy.get_param("~camera/min_z_dist")
self.max_views = rospy.get_param("nbv_grasp/max_views")
self.min_gain = rospy.get_param("nbv_grasp/min_gain")
2021-09-11 20:49:55 +02:00
def activate(self, bbox, view_sphere):
super().activate(bbox, view_sphere)
2021-09-12 14:40:17 +02:00
def update(self, img, x, q):
if len(self.views) > self.max_views or self.best_grasp_prediction_is_stable():
2021-09-03 17:10:36 +02:00
self.done = True
2021-08-26 11:43:03 +02:00
else:
2021-09-12 11:29:58 +02:00
with Timer("state_update"):
2021-09-12 16:23:10 +02:00
self.integrate(img, x, q)
with Timer("view_generation"):
2021-10-08 14:48:26 +02:00
views = self.generate_views(q)
2021-09-12 11:29:58 +02:00
with Timer("ig_computation"):
2021-09-13 23:31:52 +02:00
gains = [self.ig_fn(v, 10) for v in views]
2021-09-12 11:29:58 +02:00
with Timer("cost_computation"):
costs = [self.cost_fn(v) for v in views]
2021-09-03 17:10:36 +02:00
utilities = gains / np.sum(gains) - costs / np.sum(costs)
self.vis.views(self.base_frame, self.intrinsic, views, utilities)
i = np.argmax(utilities)
2021-09-13 23:15:52 +02:00
nbv, gain = views[i], gains[i]
if gain < self.min_gain:
self.done = True
2021-09-11 20:49:55 +02:00
self.x_d = nbv
2021-08-26 11:43:03 +02:00
def best_grasp_prediction_is_stable(self):
if self.best_grasp:
t = (self.T_task_base * self.best_grasp.pose).translation
i, j, k = (t / self.tsdf.voxel_size).astype(int)
qs = self.qual_hist[:, i, j, k]
if (
np.count_nonzero(qs) == self.T
and np.mean(qs) > 0.9
and np.std(qs) < 0.05
):
return True
return False
2021-10-08 14:48:26 +02:00
def generate_views(self, q):
2021-10-08 15:21:35 +02:00
thetas = np.deg2rad([15, 30])
phis = np.arange(8) * np.deg2rad(45)
view_candidates = []
for theta, phi in itertools.product(thetas, phis):
view = self.view_sphere.get_view(theta, phi)
2021-10-08 14:48:26 +02:00
if self.is_feasible(view, q):
view_candidates.append(view)
return view_candidates
2021-09-13 23:31:52 +02:00
def ig_fn(self, view, downsample):
2021-09-12 12:02:34 +02:00
tsdf_grid, voxel_size = self.tsdf.get_grid(), self.tsdf.voxel_size
tsdf_grid = -1.0 + 2.0 * tsdf_grid # Open3D maps tsdf to [0,1]
2021-09-12 14:20:37 +02:00
# Downsample the sensor resolution
2021-08-13 14:47:17 +02:00
fx = self.intrinsic.fx / downsample
fy = self.intrinsic.fy / downsample
cx = self.intrinsic.cx / downsample
cy = self.intrinsic.cy / downsample
2021-08-11 18:10:06 +02:00
2021-09-12 12:02:34 +02:00
# Project bbox onto the image plane to get better bounds
T_cam_base = view.inv()
corners = np.array([T_cam_base.apply(p) for p in self.bbox.corners]).T
u = (fx * corners[0] / corners[2] + cx).round().astype(int)
v = (fy * corners[1] / corners[2] + cy).round().astype(int)
u_min, u_max = u.min(), u.max()
v_min, v_max = v.min(), v.max()
2021-09-12 12:02:34 +02:00
t_min = self.min_z_dist
2021-08-17 22:12:52 +02:00
t_max = corners[2].max() # TODO This bound might be a bit too short
2021-09-12 12:02:34 +02:00
t_step = np.sqrt(3) * voxel_size # TODO replace with line rasterization
2021-08-17 21:56:05 +02:00
2021-09-12 14:20:37 +02:00
# Cast rays from the camera view (we'll work in the task frame from now on)
view = self.T_task_base * view
ori, pos = view.rotation.as_matrix(), view.translation
voxel_indices = raycast(
voxel_size,
tsdf_grid,
ori,
pos,
fx,
fy,
cx,
cy,
u_min,
u_max,
v_min,
v_max,
t_min,
t_max,
t_step,
)
2021-08-18 10:40:10 +02:00
# Count rear side voxels
i, j, k = np.unique(voxel_indices, axis=0).T
tsdfs = tsdf_grid[i, j, k]
2021-09-12 12:02:34 +02:00
ig = np.logical_and(tsdfs > -1.0, tsdfs < 0.0).sum()
2021-08-18 10:40:10 +02:00
return ig
2021-08-11 18:10:06 +02:00
2021-08-26 11:43:03 +02:00
def cost_fn(self, view):
2021-08-11 18:10:06 +02:00
return 1.0