success
This commit is contained in:
268
baselines/grasping/GSNet/dataset/graspnet_dataset.py
Executable file
268
baselines/grasping/GSNet/dataset/graspnet_dataset.py
Executable file
@@ -0,0 +1,268 @@
|
||||
""" GraspNet dataset processing.
|
||||
Author: chenxi-wang
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import scipy.io as scio
|
||||
from PIL import Image
|
||||
|
||||
import torch
|
||||
import collections.abc as container_abcs
|
||||
from torch.utils.data import Dataset
|
||||
from tqdm import tqdm
|
||||
import MinkowskiEngine as ME
|
||||
from data_utils import CameraInfo, transform_point_cloud, create_point_cloud_from_depth_image, get_workspace_mask
|
||||
|
||||
|
||||
class GraspNetDataset(Dataset):
|
||||
def __init__(self, root, grasp_labels=None, camera='kinect', split='train', num_points=20000,
|
||||
voxel_size=0.005, remove_outlier=True, augment=False, load_label=True):
|
||||
assert (num_points <= 50000)
|
||||
self.root = root
|
||||
self.split = split
|
||||
self.voxel_size = voxel_size
|
||||
self.num_points = num_points
|
||||
self.remove_outlier = remove_outlier
|
||||
self.grasp_labels = grasp_labels
|
||||
self.camera = camera
|
||||
self.augment = augment
|
||||
self.load_label = load_label
|
||||
self.collision_labels = {}
|
||||
|
||||
if split == 'train':
|
||||
self.sceneIds = list(range(100))
|
||||
elif split == 'test':
|
||||
self.sceneIds = list(range(100, 190))
|
||||
elif split == 'test_seen':
|
||||
self.sceneIds = list(range(100, 130))
|
||||
elif split == 'test_similar':
|
||||
self.sceneIds = list(range(130, 160))
|
||||
elif split == 'test_novel':
|
||||
self.sceneIds = list(range(160, 190))
|
||||
self.sceneIds = ['scene_{}'.format(str(x).zfill(4)) for x in self.sceneIds]
|
||||
|
||||
self.depthpath = []
|
||||
self.labelpath = []
|
||||
self.metapath = []
|
||||
self.scenename = []
|
||||
self.frameid = []
|
||||
self.graspnesspath = []
|
||||
for x in tqdm(self.sceneIds, desc='Loading data path and collision labels...'):
|
||||
for img_num in range(256):
|
||||
self.depthpath.append(os.path.join(root, 'scenes', x, camera, 'depth', str(img_num).zfill(4) + '.png'))
|
||||
self.labelpath.append(os.path.join(root, 'scenes', x, camera, 'label', str(img_num).zfill(4) + '.png'))
|
||||
self.metapath.append(os.path.join(root, 'scenes', x, camera, 'meta', str(img_num).zfill(4) + '.mat'))
|
||||
self.graspnesspath.append(os.path.join(root, 'graspness', x, camera, str(img_num).zfill(4) + '.npy'))
|
||||
self.scenename.append(x.strip())
|
||||
self.frameid.append(img_num)
|
||||
if self.load_label:
|
||||
collision_labels = np.load(os.path.join(root, 'collision_label', x.strip(), 'collision_labels.npz'))
|
||||
self.collision_labels[x.strip()] = {}
|
||||
for i in range(len(collision_labels)):
|
||||
self.collision_labels[x.strip()][i] = collision_labels['arr_{}'.format(i)]
|
||||
|
||||
def scene_list(self):
|
||||
return self.scenename
|
||||
|
||||
def __len__(self):
|
||||
return len(self.depthpath)
|
||||
|
||||
def augment_data(self, point_clouds, object_poses_list):
|
||||
# Flipping along the YZ plane
|
||||
if np.random.random() > 0.5:
|
||||
flip_mat = np.array([[-1, 0, 0],
|
||||
[0, 1, 0],
|
||||
[0, 0, 1]])
|
||||
point_clouds = transform_point_cloud(point_clouds, flip_mat, '3x3')
|
||||
for i in range(len(object_poses_list)):
|
||||
object_poses_list[i] = np.dot(flip_mat, object_poses_list[i]).astype(np.float32)
|
||||
|
||||
# Rotation along up-axis/Z-axis
|
||||
rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree
|
||||
c, s = np.cos(rot_angle), np.sin(rot_angle)
|
||||
rot_mat = np.array([[1, 0, 0],
|
||||
[0, c, -s],
|
||||
[0, s, c]])
|
||||
point_clouds = transform_point_cloud(point_clouds, rot_mat, '3x3')
|
||||
for i in range(len(object_poses_list)):
|
||||
object_poses_list[i] = np.dot(rot_mat, object_poses_list[i]).astype(np.float32)
|
||||
|
||||
return point_clouds, object_poses_list
|
||||
|
||||
def __getitem__(self, index):
|
||||
if self.load_label:
|
||||
return self.get_data_label(index)
|
||||
else:
|
||||
return self.get_data(index)
|
||||
|
||||
def get_data(self, index, return_raw_cloud=False):
|
||||
depth = np.array(Image.open(self.depthpath[index]))
|
||||
seg = np.array(Image.open(self.labelpath[index]))
|
||||
meta = scio.loadmat(self.metapath[index])
|
||||
scene = self.scenename[index]
|
||||
try:
|
||||
intrinsic = meta['intrinsic_matrix']
|
||||
factor_depth = meta['factor_depth']
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
print(scene)
|
||||
camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],
|
||||
factor_depth)
|
||||
|
||||
# generate cloud
|
||||
cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
|
||||
|
||||
# get valid points
|
||||
depth_mask = (depth > 0)
|
||||
if self.remove_outlier:
|
||||
camera_poses = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'camera_poses.npy'))
|
||||
align_mat = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'cam0_wrt_table.npy'))
|
||||
trans = np.dot(align_mat, camera_poses[self.frameid[index]])
|
||||
workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)
|
||||
mask = (depth_mask & workspace_mask)
|
||||
else:
|
||||
mask = depth_mask
|
||||
cloud_masked = cloud[mask]
|
||||
|
||||
if return_raw_cloud:
|
||||
return cloud_masked
|
||||
# sample points random
|
||||
if len(cloud_masked) >= self.num_points:
|
||||
idxs = np.random.choice(len(cloud_masked), self.num_points, replace=False)
|
||||
else:
|
||||
idxs1 = np.arange(len(cloud_masked))
|
||||
idxs2 = np.random.choice(len(cloud_masked), self.num_points - len(cloud_masked), replace=True)
|
||||
idxs = np.concatenate([idxs1, idxs2], axis=0)
|
||||
cloud_sampled = cloud_masked[idxs]
|
||||
|
||||
ret_dict = {'point_clouds': cloud_sampled.astype(np.float32),
|
||||
'coors': cloud_sampled.astype(np.float32) / self.voxel_size,
|
||||
'feats': np.ones_like(cloud_sampled).astype(np.float32),
|
||||
}
|
||||
return ret_dict
|
||||
|
||||
def get_data_label(self, index):
|
||||
depth = np.array(Image.open(self.depthpath[index]))
|
||||
seg = np.array(Image.open(self.labelpath[index]))
|
||||
meta = scio.loadmat(self.metapath[index])
|
||||
graspness = np.load(self.graspnesspath[index]) # for each point in workspace masked point cloud
|
||||
scene = self.scenename[index]
|
||||
try:
|
||||
obj_idxs = meta['cls_indexes'].flatten().astype(np.int32)
|
||||
poses = meta['poses']
|
||||
intrinsic = meta['intrinsic_matrix']
|
||||
factor_depth = meta['factor_depth']
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
print(scene)
|
||||
camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],
|
||||
factor_depth)
|
||||
|
||||
# generate cloud
|
||||
cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
|
||||
|
||||
# get valid points
|
||||
depth_mask = (depth > 0)
|
||||
if self.remove_outlier:
|
||||
camera_poses = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'camera_poses.npy'))
|
||||
align_mat = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'cam0_wrt_table.npy'))
|
||||
trans = np.dot(align_mat, camera_poses[self.frameid[index]])
|
||||
workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)
|
||||
mask = (depth_mask & workspace_mask)
|
||||
else:
|
||||
mask = depth_mask
|
||||
cloud_masked = cloud[mask]
|
||||
seg_masked = seg[mask]
|
||||
|
||||
# sample points
|
||||
if len(cloud_masked) >= self.num_points:
|
||||
idxs = np.random.choice(len(cloud_masked), self.num_points, replace=False)
|
||||
else:
|
||||
idxs1 = np.arange(len(cloud_masked))
|
||||
idxs2 = np.random.choice(len(cloud_masked), self.num_points - len(cloud_masked), replace=True)
|
||||
idxs = np.concatenate([idxs1, idxs2], axis=0)
|
||||
cloud_sampled = cloud_masked[idxs]
|
||||
seg_sampled = seg_masked[idxs]
|
||||
graspness_sampled = graspness[idxs]
|
||||
objectness_label = seg_sampled.copy()
|
||||
|
||||
objectness_label[objectness_label > 1] = 1
|
||||
|
||||
object_poses_list = []
|
||||
grasp_points_list = []
|
||||
grasp_widths_list = []
|
||||
grasp_scores_list = []
|
||||
for i, obj_idx in enumerate(obj_idxs):
|
||||
if (seg_sampled == obj_idx).sum() < 50:
|
||||
continue
|
||||
object_poses_list.append(poses[:, :, i])
|
||||
points, widths, scores = self.grasp_labels[obj_idx]
|
||||
collision = self.collision_labels[scene][i] # (Np, V, A, D)
|
||||
|
||||
idxs = np.random.choice(len(points), min(max(int(len(points) / 4), 300), len(points)), replace=False)
|
||||
grasp_points_list.append(points[idxs])
|
||||
grasp_widths_list.append(widths[idxs])
|
||||
collision = collision[idxs].copy()
|
||||
scores = scores[idxs].copy()
|
||||
scores[collision] = 0
|
||||
grasp_scores_list.append(scores)
|
||||
|
||||
if self.augment:
|
||||
cloud_sampled, object_poses_list = self.augment_data(cloud_sampled, object_poses_list)
|
||||
from ipdb import set_trace; set_trace()
|
||||
ret_dict = {'point_clouds': cloud_sampled.astype(np.float32),
|
||||
'coors': cloud_sampled.astype(np.float32) / self.voxel_size,
|
||||
'feats': np.ones_like(cloud_sampled).astype(np.float32),
|
||||
'graspness_label': graspness_sampled.astype(np.float32),
|
||||
'objectness_label': objectness_label.astype(np.int64),
|
||||
'object_poses_list': object_poses_list,
|
||||
'grasp_points_list': grasp_points_list,
|
||||
'grasp_widths_list': grasp_widths_list,
|
||||
'grasp_scores_list': grasp_scores_list}
|
||||
set_trace()
|
||||
return ret_dict
|
||||
|
||||
|
||||
def load_grasp_labels(root):
|
||||
obj_names = list(range(1, 89))
|
||||
grasp_labels = {}
|
||||
for obj_name in tqdm(obj_names, desc='Loading grasping labels...'):
|
||||
label = np.load(os.path.join(root, 'grasp_label_simplified', '{}_labels.npz'.format(str(obj_name - 1).zfill(3))))
|
||||
grasp_labels[obj_name] = (label['points'].astype(np.float32), label['width'].astype(np.float32),
|
||||
label['scores'].astype(np.float32))
|
||||
|
||||
return grasp_labels
|
||||
|
||||
|
||||
def minkowski_collate_fn(list_data):
|
||||
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
|
||||
[d["feats"] for d in list_data])
|
||||
frame_path_batch = [d["frame_path"] for d in list_data]
|
||||
object_name_batch = [d["object_name"] for d in list_data]
|
||||
obj_pcl_dict = [d["obj_pcl_dict"] for d in list_data]
|
||||
coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
|
||||
coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
|
||||
coordinates_batch, features_batch, return_index=True, return_inverse=True)
|
||||
res = {
|
||||
"coors": coordinates_batch,
|
||||
"feats": features_batch,
|
||||
"quantize2original": quantize2original,
|
||||
"obj_pcl_dict": obj_pcl_dict,
|
||||
"frame_path":frame_path_batch,
|
||||
"object_name": object_name_batch
|
||||
}
|
||||
|
||||
def collate_fn_(batch):
|
||||
if type(batch[0]).__module__ == 'numpy':
|
||||
return torch.stack([torch.from_numpy(b) for b in batch], 0)
|
||||
elif isinstance(batch[0], container_abcs.Sequence):
|
||||
return [[torch.from_numpy(sample) for sample in b] for b in batch]
|
||||
elif isinstance(batch[0], container_abcs.Mapping):
|
||||
for key in batch[0]:
|
||||
if key == 'coors' or key == 'feats' or key == "frame_path" or key == "object_name" or key == "obj_pcl_dict":
|
||||
continue
|
||||
res[key] = collate_fn_([d[key] for d in batch])
|
||||
return res
|
||||
res = collate_fn_(list_data)
|
||||
return res
|
Reference in New Issue
Block a user