This commit is contained in:
2024-10-09 16:13:22 +00:00
commit 0ea3f048dc
437 changed files with 44406 additions and 0 deletions

0
utils/__init__.py Executable file
View File

19
utils/cache_util.py Executable file
View File

@@ -0,0 +1,19 @@
from collections import OrderedDict
class LRUCache:
def __init__(self, capacity: int):
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key):
if key not in self.cache:
return None
self.cache.move_to_end(key)
return self.cache[key]
def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
elif len(self.cache) >= self.capacity:
self.cache.popitem(last=False)
self.cache[key] = value

83
utils/file_util.py Executable file
View File

@@ -0,0 +1,83 @@
import os
import pickle
import json
import numpy as np
class FileUtil:
@staticmethod
def get_path(file_name, target_dir=None):
if target_dir is None:
file_path = file_name
else:
file_path = os.path.join(target_dir, file_name)
return file_path
@staticmethod
def load_pickle(file_name, target_dir=None):
file_path = FileUtil.get_path(file_name, target_dir)
with open(file_path, "rb") as f:
return pickle.load(f)
@staticmethod
def save_pickle(data, file_name, target_dir=None):
file_path = FileUtil.get_path(file_name, target_dir)
with open(file_path, "wb") as f:
pickle.dump(data, f)
return True
@staticmethod
def load_json(file_name, target_dir=None):
file_path = FileUtil.get_path(file_name, target_dir)
with open(file_path, "r") as f:
return json.load(f)
@staticmethod
def save_json(data, file_name, target_dir=None):
file_path = FileUtil.get_path(file_name, target_dir)
with open(file_path, "w") as f:
json.dump(data, f)
return True
@staticmethod
def save_np_txt(np_data, file_name, target_dir=None):
if len(np_data.shape) > 2:
raise ValueError("Only 2D arrays are supported.")
file_path = FileUtil.get_path(file_name, target_dir)
np.savetxt(file_path, np_data)
@staticmethod
def load_np_txt(file_name, target_dir=None, shuffle=False):
file_path = FileUtil.get_path(file_name, target_dir)
np_data = np.loadtxt(file_path)
if shuffle:
indices = np.arange(np_data.shape[0])
np.random.shuffle(indices)
np_data_shuffled = np_data[indices]
return np_data_shuffled
else:
return np_data
@staticmethod
def find_object_models(path):
obj_files = {}
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".obj"):
full_path = os.path.join(root, file)
modified_name = full_path.replace(path, "").replace(os.sep, "_").rstrip(".obj")
if modified_name.startswith("_"):
modified_name = modified_name[1:]
obj_files[modified_name] = full_path
return obj_files
''' ------------ Debug ------------ '''
if __name__ == "__main__":
arr2d = np.random.random((4, 3))
print(arr2d)
np.savetxt("test.txt", arr2d)
loaded_arr2d = FileUtil.load_np_txt("test.txt")
print()
print(loaded_arr2d)

124
utils/metric_util.py Executable file
View File

@@ -0,0 +1,124 @@
import numpy as np
class MetricUtil:
@staticmethod
def rotate_around(axis, angle_deg):
angle = angle_deg * np.pi / 180
if axis == "x":
return np.array([[1, 0, 0],
[0, np.cos(angle), -np.sin(angle)],
[0, np.sin(angle), np.cos(angle)]])
elif axis == "y":
return np.array([[np.cos(angle), 0, np.sin(angle)],
[0, 1, 0],
[-np.sin(angle), 0, np.cos(angle)]])
elif axis == "z":
return np.array([[np.cos(angle), -np.sin(angle), 0],
[np.sin(angle), np.cos(angle), 0],
[0, 0, 1]])
else:
raise ValueError("Invalid axis")
@staticmethod
def basic_rot_diff(r0, r1):
mat_diff = np.matmul(r0, r1.swapaxes(-1, -2))
diff = np.trace(mat_diff) - 1
return np.arccos(np.clip(diff / 2.0, a_min=-1.0, a_max=1.0))
@staticmethod
def axis_rot_diff(r0, r1, axis):
axis1, axis2 = r0[..., axis], r1[..., axis]
diff = np.sum(axis1 * axis2, axis=-1)
return np.arccos(np.clip(diff, a_min=-1.0, a_max=1.0))
@staticmethod
def turn_rot_diff(r0, r1, axis, turn_degrees):
diffs = []
for i in turn_degrees:
rotation_matrix = MetricUtil.rotate_around(axis, i)
diffs.append(MetricUtil.basic_rot_diff(np.matmul(r0, rotation_matrix), r1))
return np.min(diffs, axis=0)
@staticmethod
def rot_diff_rad(r0, r1, sym):
axis_map = {0: "x", 1: "y", 2: "z"}
if sym is None or sym == 0: # no symmetry
return MetricUtil.basic_rot_diff(r0, r1)
elif sym in [1, 2, 3]: # free rotation around axis
return MetricUtil.axis_rot_diff(r0, r1, sym - 1)
else: # symmetry
turns = 0
axis_idx = 0
if sym in [4, 5, 6]: # half turn
axis_idx = sym - 4
turns = 2
elif sym in [7, 8, 9]: # quarter turn
axis_idx = sym - 7
turns = 4
turn_degrees = np.arange(0, 360, 360 / turns)
return MetricUtil.turn_rot_diff(r0, r1, axis_map[axis_idx], turn_degrees)
@staticmethod
def collect_metric(pred_pose_mat, gt_pose_mat, sym):
pred_rot_mat = pred_pose_mat[:, :3, :3]
gt_rot_mat = gt_pose_mat[:, :3, :3]
pred_trans = pred_pose_mat[:, :3, 3]
gt_trans = gt_pose_mat[:, :3, 3]
trans_error = []
rot_error = []
for i in range(pred_rot_mat.shape[0]):
tdiff = np.linalg.norm(pred_trans[i] - gt_trans[i], ord=2) * 100
rdiff = MetricUtil.rot_diff_rad(pred_rot_mat[i], gt_rot_mat[i], sym[i]) / np.pi * 180.0
trans_error.append(tdiff)
rot_error.append(rdiff)
rot_error = {
'mean': np.mean(rot_error),
'median': np.median(rot_error),
'item': rot_error,
}
trans_error = {
'mean': np.mean(trans_error),
'median': np.median(trans_error),
'item': trans_error,
}
error = {'rot_error': rot_error,
'trans_error': trans_error}
return error
# -------------- Debug ---------------
def test_MetricUtil():
print("test case 0: no rotation")
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]),
np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), 0) * 180 / np.pi)
print("test case 1: 29 degree rotation around x-axis")
rotation_matrix = MetricUtil.rotate_around("x", 29)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 0) * 180 / np.pi)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 1) * 180 / np.pi)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 8) * 180 / np.pi)
print("test case 2: 90 degree rotation around y-axis")
rotation_matrix = MetricUtil.rotate_around("y", 90)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 0) * 180 / np.pi)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 2) * 180 / np.pi)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 8) * 180 / np.pi)
print("test case 3: 60 degree rotation around y-axis")
rotation_matrix = MetricUtil.rotate_around("y", 60)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 0) * 180 / np.pi)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 2) * 180 / np.pi)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 8) * 180 / np.pi)
print("test case 4: 78 degree rotation around z-axis and 60 degree rotation around x-axis")
rotation_matrix = MetricUtil.rotate_around("z", 78) @ MetricUtil.rotate_around("x", 60)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 0) * 180 / np.pi)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 2) * 180 / np.pi)
print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 8) * 180 / np.pi)
if __name__ == "__main__":
pass
test_MetricUtil()

439
utils/omni_util.py Executable file
View File

@@ -0,0 +1,439 @@
import numpy as np
import pickle
import json
import pickle
import cv2
import os
import re
from scipy.spatial.transform import Rotation as R
class DepthToPCL:
def __new__(cls, *args, **kwargs):
raise RuntimeError(
"Use init_from_disk or init_from_memory to create an instance"
)
@classmethod
def _initialize(
cls,
distance_to_camera_path=None,
rgb_path=None,
camera_params_path=None,
seg_path=None,
seg_label_path=None,
depth=None,
rgb=None,
seg=None,
seg_label=None,
camera_params=None,
):
instance = super().__new__(cls)
instance._distance_to_camera_path = distance_to_camera_path
instance._rgb_path = rgb_path
instance._camera_params_path = camera_params_path
instance._seg_path = seg_path
instance._seg_label_path = seg_label_path
instance._depth = depth
instance._rgb = rgb
instance._seg = seg
instance._seg_label = seg_label
instance._camera_params = camera_params
if any(
path is not None
for path in [
distance_to_camera_path,
rgb_path,
camera_params_path,
seg_path,
seg_label_path,
]
):
instance._load_from_disk()
instance._setup()
return instance
@classmethod
def init_from_disk(
cls,
distance_to_camera_path,
rgb_path,
camera_params_path,
seg_path,
seg_label_path,
):
return cls._initialize(
distance_to_camera_path=distance_to_camera_path,
rgb_path=rgb_path,
camera_params_path=camera_params_path,
seg_path=seg_path,
seg_label_path=seg_label_path,
)
@classmethod
def init_from_memory(cls, depth, rgb, seg, seg_label, camera_params):
return cls._initialize(
depth=depth,
rgb=rgb,
seg=seg,
seg_label=seg_label,
camera_params=camera_params,
)
def _load_from_disk(self):
self._depth = np.load(self._distance_to_camera_path)
self._seg = cv2.imread(self._seg_path, cv2.IMREAD_UNCHANGED)
with open(self._seg_label_path, "r") as f:
self._seg_label = json.load(f)
with open(self._camera_params_path) as f:
self._camera_params = json.load(f)
def _setup(self):
self._read_camera_params()
self._get_intrinsic_matrix()
def _read_camera_params(self):
self._h_aperture = self._camera_params["cameraAperture"][0]
self._v_aperture = self._camera_params["cameraAperture"][1]
self._h_aperture_offset = self._camera_params["cameraApertureOffset"][0]
self._v_aperture_offset = self._camera_params["cameraApertureOffset"][1]
self._focal_length = self._camera_params["cameraFocalLength"]
self._h_resolution = self._camera_params["renderProductResolution"][0]
self._v_resolution = self._camera_params["renderProductResolution"][1]
self._cam_t = self._camera_params["cameraViewTransform"]
def _get_intrinsic_matrix(self):
self._focal_x = self._h_resolution * self._focal_length / self._h_aperture
self._focal_y = self._v_resolution * self._focal_length / self._v_aperture
self._center_x = self._h_resolution / 2
self._center_y = self._v_resolution / 2
self.intrinsic_matrix = np.array(
[
[self._focal_x, 0, self._center_x],
[0, self._focal_y, self._center_y],
[0, 0, 1],
]
)
return self.intrinsic_matrix
def _get_extrinsic_matrix(self):
self._cam_pose = np.linalg.inv(np.resize(self._cam_t, (4, 4))).T.dot(
np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1.0], [0, 0, 0, 1]])
)
return self._cam_pose
def get_pcd(self, target_name=None):
u_indices, v_indices = np.meshgrid(
np.arange(self._h_resolution), np.arange(self._v_resolution)
)
x_factors = (u_indices - self._center_x) / self._focal_x
y_factors = (v_indices - self._center_y) / self._focal_y
if target_name is not None:
if target_name == OmniUtil.FOREGROUND:
unlabelled_mask = self.get_mask_rgba(
self._seg_label, OmniUtil.UNLABELLED
)
background_mask = self.get_mask_rgba(
self._seg_label, OmniUtil.BACKGROUND
)
if unlabelled_mask is None:
target_mask = (self._seg != background_mask).any(axis=2)
else:
target_mask = (self._seg != unlabelled_mask).any(axis=2) & (
self._seg != background_mask
).any(axis=2)
else:
target_mask = (
self._seg == self.get_mask_rgba(self._seg_label, target_name)
).all(axis=2)
else:
target_mask = np.ones((self._v_resolution, self._h_resolution), dtype=bool)
valid_x_factors = x_factors[target_mask]
valid_y_factors = y_factors[target_mask]
valid_z_factors = self._depth[target_mask]
points = np.stack([valid_x_factors, valid_y_factors, valid_z_factors], axis=1)
return points
@staticmethod
def get_mask_rgba(mask_labels, mask_name):
name_list = [name_dict["class"] for name_dict in list(mask_labels.values())]
if mask_name not in name_list:
return None
rgba_list = list(mask_labels.keys())
mask_rgba_str = rgba_list[name_list.index(mask_name)]
r, g, b, a = re.findall("\d+", mask_rgba_str)
r, g, b, a = int(b), int(g), int(r), int(a)
return r, g, b, a
def get_segmented_pcd(self, target_list, N=15000):
u_indices, v_indices = np.meshgrid(
np.arange(self._h_resolution), np.arange(self._v_resolution)
)
x_factors = (u_indices - self._center_x) / self._focal_x
y_factors = (v_indices - self._center_y) / self._focal_y
points_dict = {}
total_points_with_label = []
for target_idx in range(len(target_list)):
target_name = target_list[target_idx]
target_mask = (
self._seg == self.get_mask_rgba(self._seg_label, target_name)
).all(axis=2)
valid_x_factors = x_factors[target_mask]
valid_y_factors = y_factors[target_mask]
valid_z_factors = self._depth[target_mask]
label = np.ones_like(valid_x_factors) * target_idx
target_points_with_label = np.stack(
[valid_x_factors, valid_y_factors, valid_z_factors, label], axis=1
)
total_points_with_label.append(target_points_with_label)
total_points_with_label = np.concatenate(total_points_with_label, axis=0)
total_points_with_label = self.sample_pcl(total_points_with_label, N)
total_points = total_points_with_label[:, :3]
for target_idx in range(len(target_list)):
target_name = target_list[target_idx]
pts_seg = total_points_with_label[:, 3] == target_idx
points_dict[target_name] = total_points_with_label[pts_seg, :3]
return total_points, points_dict
def get_rgb(self):
return self._rgb
@staticmethod
def sample_pcl(pcl, n_pts=1024):
indices = np.random.choice(pcl.shape[0], n_pts, replace=pcl.shape[0] < n_pts)
return pcl[indices, :]
class OmniUtil:
FOREGROUND = "FOREGROUND"
BACKGROUND = "BACKGROUND"
UNLABELLED = "UNLABELLED"
NON_OBJECT_LIST = ['chair_028', 'chair_029', 'chair_026', 'chair_027', 'table_025', 'table_027', 'table_026', 'table_028', 'sofa_014', 'sofa_013', 'picnic_basket_010', 'picnic_basket_011', 'cabinet_009', 'flower_pot_023', 'flower_pot_022', 'flower_pot_021', 'chair_017', 'chair_020', 'chair_012', 'chair_010', 'chair_018', 'chair_025', 'chair_024', 'chair_011', 'chair_001', 'chair_013', 'chair_004', 'chair_021', 'chair_023', 'chair_006', 'chair_014', 'chair_007', 'chair_003', 'chair_009', 'chair_022', 'chair_015', 'chair_016', 'chair_008', 'chair_005', 'chair_019', 'chair_002', 'table_004', 'table_023', 'table_014', 'table_024', 'table_019', 'table_022', 'table_007', 'table_017', 'table_013', 'table_002', 'table_016', 'table_009', 'table_008', 'table_003', 'table_015', 'table_001', 'table_018', 'table_005', 'table_020', 'table_021', 'sofa_001', 'sofa_005', 'sofa_012', 'sofa_009', 'sofa_006', 'sofa_008', 'sofa_011', 'sofa_004', 'sofa_003', 'sofa_002', 'sofa_007', 'sofa_010', 'picnic_basket_005', 'picnic_basket_004', 'picnic_basket_001', 'picnic_basket_008', 'picnic_basket_002', 'picnic_basket_009', 'picnic_basket_006', 'picnic_basket_003', 'picnic_basket_007', 'cabinet_006', 'cabinet_008', 'cabinet_002', 'cabinet_001', 'cabinet_005', 'cabinet_007', 'flower_pot_013', 'flower_pot_005', 'flower_pot_008', 'flower_pot_001', 'flower_pot_003', 'flower_pot_020', 'flower_pot_006', 'flower_pot_012', 'flower_pot_018', 'flower_pot_007', 'flower_pot_002', 'flower_pot_011', 'flower_pot_010', 'flower_pot_016', 'flower_pot_004', 'flower_pot_014', 'flower_pot_017', 'flower_pot_019']
CAMERA_PARAMS_TEMPLATE = "camera_params_{}.json"
DISTANCE_TEMPLATE = "distance_to_image_plane_{}.npy"
RGB_TEMPLATE = "rgb_{}.png"
MASK_TEMPLATE = "semantic_segmentation_{}.png"
MASK_LABELS_TEMPLATE = "semantic_segmentation_labels_{}.json"
SCORE_LABEL_TEMPLATE = "label_{}.json"
RGB_FEAT_TEMPLATE = "rgb_feat_{}.npy"
@staticmethod
def get_depth_to_pointcloud_instance(path):
root, idx = path[:-4], path[-4:]
distance2plane_path = os.path.join(root, OmniUtil.DISTANCE_TEMPLATE.format(idx))
rgb_path = os.path.join(root, OmniUtil.RGB_TEMPLATE.format(idx))
cam_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
depth_to_pcd = DepthToPCL.init_from_disk(
distance2plane_path, rgb_path, cam_params_path, seg_path, seg_labels_path
)
return depth_to_pcd
@staticmethod
def get_points(path, object_name=None):
depth_to_pcd = OmniUtil.get_depth_to_pointcloud_instance(path)
pcd = depth_to_pcd.get_pcd(object_name)
points = np.asarray(pcd, dtype=np.float32)
return points
@staticmethod
def get_segmented_points(path, target_list):
depth_to_pcd = OmniUtil.get_depth_to_pointcloud_instance(path)
total_points, target_points_dict = depth_to_pcd.get_segmented_pcd(target_list)
return total_points, target_points_dict
@staticmethod
def get_object_list(path, contains_non_obj=False):
root, idx = path[:-4], path[-4:]
seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
with open(seg_labels_path, "r") as f:
seg_labels = json.load(f)
object_list = [v["class"] for v in seg_labels.values()]
object_list.remove(OmniUtil.BACKGROUND)
if OmniUtil.UNLABELLED in object_list:
object_list.remove(OmniUtil.UNLABELLED)
occluder_list = pickle.load(open(os.path.join(root,"occluder.pickle"), "rb"))
fall_objects_list = pickle.load(open(os.path.join(root,"fall_objects.pickle"), "rb"))
non_obj_list = occluder_list + fall_objects_list
if not contains_non_obj:
for non_obj in non_obj_list:
if non_obj in object_list:
object_list.remove(non_obj)
return object_list
@staticmethod
def get_rotation_mat(path):
root, idx = os.path.split(path)
camera_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
with open(camera_params_path, "r") as f:
raw_camera_params = json.load(f)
cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
(4, 4)
)
cam_rot_mat = cam_transform[:3, :3].dot(
np.mat([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
)
return cam_rot_mat
@staticmethod
def get_rgb(path):
root, idx = os.path.split(path)
rgb_path = os.path.join(root, OmniUtil.RGB_TEMPLATE.format(idx))
rgb = cv2.imread(rgb_path)
return cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
@staticmethod
def get_depth(path):
root, idx = os.path.split(path)
depth_path = os.path.join(root, OmniUtil.DISTANCE_TEMPLATE.format(idx))
depth = np.load(depth_path)
return depth
@staticmethod
def get_seg_data(path):
root, idx = os.path.split(path)
seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
with open(seg_labels_path, "r") as f:
seg_labels = json.load(f)
seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
seg = cv2.imread(seg_path, cv2.IMREAD_UNCHANGED)
return seg, seg_labels
@staticmethod
def get_single_seg(path, object_name):
root, idx = os.path.split(path)
seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
with open(seg_labels_path, "r") as f:
seg_labels = json.load(f)
seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
seg = cv2.imread(seg_path, cv2.IMREAD_UNCHANGED)
object_mask = (
seg == OmniUtil.get_mask_rgba(seg_labels, object_name)
).all(axis=2)
return object_mask
@staticmethod
def get_mask_rgba(mask_labels, mask_name):
name_list = [name_dict["class"] for name_dict in list(mask_labels.values())]
if mask_name not in name_list:
return None
rgba_list = list(mask_labels.keys())
mask_rgba_str = rgba_list[name_list.index(mask_name)]
r, g, b, a = re.findall("\d+", mask_rgba_str)
r, g, b, a = int(b), int(g), int(r), int(a)
return r, g, b, a
@staticmethod
def get_rgb_feat(path):
root, idx = os.path.split(path)
rgb_feat_path = os.path.join(root, OmniUtil.RGB_FEAT_TEMPLATE.format(idx))
rgb_feat = np.load(rgb_feat_path)
return rgb_feat
@staticmethod
def get_target_object_list(path):
return OmniUtil.get_object_list(path, contains_non_obj=False) # TODO: generalize this
@staticmethod
def get_transform_mat(path):
root, idx = os.path.split(path)
camera_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
with open(camera_params_path, "r") as f:
raw_camera_params = json.load(f)
cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
(4, 4)
)
real_cam_transform = np.linalg.inv(cam_transform).T
real_cam_transform = real_cam_transform.dot(
np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
)
return real_cam_transform
@staticmethod
def get_intrinsic_matrix(path):
root, idx = os.path.split(path)
camera_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
with open(camera_params_path, "r") as f:
raw_camera_params = json.load(f)
h_aperture = raw_camera_params["cameraAperture"][0]
v_aperture = raw_camera_params["cameraAperture"][1]
focal_length = raw_camera_params["cameraFocalLength"]
h_resolution = raw_camera_params["renderProductResolution"][0]
v_resolution = raw_camera_params["renderProductResolution"][1]
focal_x = h_resolution * focal_length / h_aperture
focal_y = v_resolution * focal_length / v_aperture
center_x = h_resolution / 2
center_y = v_resolution / 2
intrinsic_matrix = np.array(
[
[focal_x, 0, center_x],
[0, focal_y, center_y],
[0, 0, 1],
]
)
return intrinsic_matrix
@staticmethod
def get_extrinsic_matrix(path):
root, idx = os.path.split(path)
camera_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
with open(camera_params_path, "r") as f:
raw_camera_params = json.load(f)
cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
(4, 4)
)
real_cam_transform = np.linalg.inv(cam_transform).T
real_cam_transform = real_cam_transform.dot(
np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
)
return real_cam_transform
@staticmethod
def get_scene_data(path):
root, _ = os.path.split(path)
scene_data_path = os.path.join(
root, "scene.pickle"
)
with open(scene_data_path, "rb") as f:
scene_data = pickle.load(f)
return scene_data
@staticmethod
def get_o2c_pose(path, object_name):
scene_data = OmniUtil.get_scene_data(path)
cam_pose = OmniUtil.get_extrinsic_matrix(path)
pos = scene_data[object_name]["position"]
quat = scene_data[object_name]["rotation"]
rot = R.from_quat(quat).as_matrix()
obj_pose = np.eye(4)
obj_pose[:3, :3] = rot
obj_pose[:3, 3] = pos
obj_cam_pose = np.linalg.inv(cam_pose) @ obj_pose
return np.asarray(obj_cam_pose)
if __name__ == "__main__":
test_path = r"/mnt/h/AI/Datasets/nbv1/sample_one/scene_0/0050"
obj_list = OmniUtil.get_object_list(test_path, contains_non_obj=True)
print(obj_list)
pts = OmniUtil.get_segmented_points(test_path, target_list=obj_list)
np.savetxt("pts1.txt", pts)

78
utils/pcl_util.py Executable file
View File

@@ -0,0 +1,78 @@
import numpy as np
import torch
from scipy.spatial.distance import cdist
class PclUtil:
CHAMFER = 1
@staticmethod
def transform(pts, pose=np.eye(4), scale=np.ones(3), inverse=False):
aug_scale = np.ones(4)
aug_scale[:3] = scale
aug_scale_mat = np.diag(aug_scale)
scale_pose = pose @ aug_scale_mat
aug_pts = np.hstack((pts, np.ones((pts.shape[0], 1))))
if inverse:
scale_pose = np.linalg.inv(scale_pose)
transformed_pts = scale_pose @ aug_pts.T
return transformed_pts.T[:, :3]
@staticmethod
def cam2canonical(cam_pts, cam2canonical_pose):
aug_pts = np.hstack((cam_pts, np.ones((cam_pts.shape[0], 1))))
transformed_pts = cam2canonical_pose @ aug_pts.T
return transformed_pts.T[:, :3]
@staticmethod
def transform_batch(pts, pose, scale, inverse=False):
batch_size = pts.shape[0]
aug_scale_mat = torch.eye(4).unsqueeze(0).repeat(batch_size, 1, 1)
for i in range(3):
aug_scale_mat[..., i, i] = scale[..., i]
scale_pose = pose @ aug_scale_mat
aug_pts = torch.cat((pts, torch.ones_like(pts[..., :1])), dim=-1)
if inverse:
scale_pose = torch.inverse(scale_pose)
transformers_pts = scale_pose @ aug_pts.transpose(1, 2)
return transformers_pts.transpose(1, 2)[..., :3]
@staticmethod
def transform_n_batch(pts, pose, scale=None, inverse=False):
transformed_pts_shape = (pts.shape[0], pose.shape[1], pts.shape[1], pts.shape[2])
transformed_pts = np.zeros(transformed_pts_shape)
batch_size = pose.shape[0]
n = pose.shape[1]
if scale is None:
scale = np.ones((batch_size, n, 3))
for batch_i in range(batch_size):
for i in range(n):
transformed_pts[batch_i, i, :, :] = PclUtil.transform(pts[batch_i], pose[batch_i, i],
scale[batch_i, i], inverse)
return transformed_pts
@staticmethod
def chamfer_distance(pts1, pts2):
dist_matrix1 = cdist(pts1, pts2, 'euclidean')
dist_matrix2 = cdist(pts2, pts1, 'euclidean')
chamfer_dist = np.mean(np.min(dist_matrix1, axis=1)) + np.mean(np.min(dist_matrix2, axis=1))
return chamfer_dist
@staticmethod
def distance(pts1, pts2, eval_type=1):
if eval_type == PclUtil.CHAMFER:
return PclUtil.chamfer_distance(pts1, pts2)
else:
raise ValueError('Unknown evaluation type:', eval_type)
@staticmethod
def sample_pcl(pcl, n_pts=1024):
indices = np.random.choice(pcl.shape[0], n_pts, replace=pcl.shape[0] < n_pts)
return pcl[indices, :]
if __name__ == '__main__':
batch_pts = np.random.random((2, 16, 3))
batch_n_pose = np.random.random((2, 3, 4, 4))
batch_n_scale = np.random.random((2, 3, 3))
poses = PclUtil.transform_n_batch(batch_pts, batch_n_pose, batch_n_scale)

188
utils/pose_util.py Executable file
View File

@@ -0,0 +1,188 @@
import numpy as np
import torch
import torch.nn.functional as F
class PoseUtil:
ROTATION = 1
TRANSLATION = 2
SCALE = 3
@staticmethod
def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False):
if isinstance(trans_m_min, list):
x_min, y_min, z_min = trans_m_min
x_max, y_max, z_max = trans_m_max
else:
x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min
x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max
x = np.random.uniform(x_min, x_max)
y = np.random.uniform(y_min, y_max)
z = np.random.uniform(z_min, z_max)
translation = np.array([x, y, z])
if trans_unit == "cm":
translation = translation / 100
if debug:
print("uniform translation:", translation)
return translation
@staticmethod
def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False):
axis = np.random.randn(3)
axis /= np.linalg.norm(axis)
theta = np.random.uniform(rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi)
K = np.array([[0, -axis[2], axis[1]],
[axis[2], 0, -axis[0]],
[-axis[1], axis[0], 0]])
R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K)
if debug:
print("uniform rotation:", theta * 180 / np.pi)
return R
@staticmethod
def get_uniform_pose(trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False):
translation = PoseUtil.get_uniform_translation(trans_min, trans_max, trans_unit, debug)
rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
pose = np.eye(4)
pose[:3, :3] = rotation
pose[:3, 3] = translation
return pose
@staticmethod
def get_n_uniform_pose(trans_min, trans_max, rot_min=0, rot_max=180, n=1,
trans_unit="cm", fix=None, contain_canonical=True, debug=False):
if fix == PoseUtil.ROTATION:
translations = np.zeros((n, 3))
for i in range(n):
translations[i] = PoseUtil.get_uniform_translation(trans_min, trans_max, trans_unit, debug)
if contain_canonical:
translations[0] = np.zeros(3)
rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
elif fix == PoseUtil.TRANSLATION:
rotations = np.zeros((n, 3, 3))
for i in range(n):
rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
if contain_canonical:
rotations[0] = np.eye(3)
translations = PoseUtil.get_uniform_translation(trans_min, trans_max, trans_unit, debug)
else:
translations = np.zeros((n, 3))
rotations = np.zeros((n, 3, 3))
for i in range(n):
translations[i] = PoseUtil.get_uniform_translation(trans_min, trans_max, trans_unit, debug)
for i in range(n):
rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
if contain_canonical:
translations[0] = np.zeros(3)
rotations[0] = np.eye(3)
pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0)
pose[:, :3, :3] = rotations
pose[:, :3, 3] = translations
return pose
@staticmethod
def get_n_uniform_pose_batch(trans_min, trans_max, rot_min=0, rot_max=180, n=1, batch_size=1,
trans_unit="cm", fix=None, contain_canonical=False, debug=False):
batch_poses = []
for i in range(batch_size):
pose = PoseUtil.get_n_uniform_pose(trans_min, trans_max, rot_min, rot_max, n,
trans_unit, fix, contain_canonical, debug)
batch_poses.append(pose)
pose_batch = np.stack(batch_poses, axis=0)
return pose_batch
@staticmethod
def get_uniform_scale(scale_min, scale_max, debug=False):
if isinstance(scale_min, list):
x_min, y_min, z_min = scale_min
x_max, y_max, z_max = scale_max
else:
x_min, y_min, z_min = scale_min, scale_min, scale_min
x_max, y_max, z_max = scale_max, scale_max, scale_max
x = np.random.uniform(x_min, x_max)
y = np.random.uniform(y_min, y_max)
z = np.random.uniform(z_min, z_max)
scale = np.array([x, y, z])
if debug:
print("uniform scale:", scale)
return scale
@staticmethod
def normalize_rotation(rotation, rotation_mode):
if rotation_mode == 'quat_wxyz' or rotation_mode == 'quat_xyzw':
rotation /= torch.norm(rotation, dim=-1, keepdim=True)
elif rotation_mode == 'rot_matrix':
rot_matrix = PoseUtil.rotation_6d_to_matrix_tensor_batch(rotation)
rotation[:, :3] = rot_matrix[:, 0, :]
rotation[:, 3:6] = rot_matrix[:, 1, :]
elif rotation_mode == 'euler_xyz_sx_cx':
rot_sin_theta = rotation[:, :3]
rot_cos_theta = rotation[:, 3:6]
theta = torch.atan2(rot_sin_theta, rot_cos_theta)
rotation[:, :3] = torch.sin(theta)
rotation[:, 3:6] = torch.cos(theta)
elif rotation_mode == 'euler_xyz':
pass
else:
raise NotImplementedError
return rotation
@staticmethod
def get_pose_dim(rot_mode):
assert rot_mode in ['quat_wxyz', 'quat_xyzw', 'euler_xyz', 'euler_xyz_sx_cx', 'rot_matrix'], \
f"the rotation mode {rot_mode} is not supported!"
if rot_mode == 'quat_wxyz' or rot_mode == 'quat_xyzw':
pose_dim = 4
elif rot_mode == 'euler_xyz':
pose_dim = 3
elif rot_mode == 'euler_xyz_sx_cx' or rot_mode == 'rot_matrix':
pose_dim = 6
else:
raise NotImplementedError
return pose_dim
@staticmethod
def rotation_6d_to_matrix_tensor_batch(d6: torch.Tensor) -> torch.Tensor:
a1, a2 = d6[..., :3], d6[..., 3:]
b1 = F.normalize(a1, dim=-1)
b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
b2 = F.normalize(b2, dim=-1)
b3 = torch.cross(b1, b2, dim=-1)
return torch.stack((b1, b2, b3), dim=-2)
@staticmethod
def matrix_to_rotation_6d_tensor_batch(matrix: torch.Tensor) -> torch.Tensor:
batch_dim = matrix.size()[:-2]
return matrix[..., :2, :].clone().reshape(batch_dim + (6,))
@staticmethod
def rotation_6d_to_matrix_numpy(d6):
a1, a2 = d6[:3], d6[3:]
b1 = a1 / np.linalg.norm(a1)
b2 = a2 - np.dot(b1, a2) * b1
b2 = b2 / np.linalg.norm(b2)
b3 = np.cross(b1, b2)
return np.stack((b1, b2, b3),axis=-2)
@staticmethod
def matrix_to_rotation_6d_numpy(matrix):
return np.copy(matrix[:2, :]).reshape((6,))
''' ------------ Debug ------------ '''
if __name__ == '__main__':
for _ in range(1):
PoseUtil.get_uniform_pose(trans_min=[-25, -25, 10], trans_max=[25, 25, 60],
rot_min=0, rot_max=10, debug=True)
PoseUtil.get_uniform_scale(scale_min=0.25, scale_max=0.30, debug=True)
PoseUtil.get_n_uniform_pose_batch(trans_min=[-25, -25, 10], trans_max=[25, 25, 60],
rot_min=0, rot_max=10, batch_size=2, n=2, fix=PoseUtil.TRANSLATION, debug=True)

47
utils/tensorboard_util.py Executable file
View File

@@ -0,0 +1,47 @@
import torch
class TensorboardWriter:
@staticmethod
def write_tensorboard(writer, panel, data_dict, step):
complex_dict = False
if "scalars" in data_dict:
scalar_data_dict = data_dict["scalars"]
TensorboardWriter.write_scalar_tensorboard(writer, panel, scalar_data_dict, step)
complex_dict = True
if "images" in data_dict:
image_data_dict = data_dict["images"]
TensorboardWriter.write_image_tensorboard(writer, panel, image_data_dict, step)
complex_dict = True
if "points" in data_dict:
point_data_dict = data_dict["points"]
TensorboardWriter.write_points_tensorboard(writer, panel, point_data_dict, step)
complex_dict = True
if not complex_dict:
TensorboardWriter.write_scalar_tensorboard(writer, panel, data_dict, step)
@staticmethod
def write_scalar_tensorboard(writer, panel, data_dict, step):
for key, value in data_dict.items():
if isinstance(value, dict):
writer.add_scalars(f'{panel}/{key}', value, step)
else:
writer.add_scalar(f'{panel}/{key}', value, step)
@staticmethod
def write_image_tensorboard(writer, panel, data_dict, step):
pass
@staticmethod
def write_points_tensorboard(writer, panel, data_dict, step):
for key, value in data_dict.items():
if value.shape[-1] == 3:
colors = torch.zeros_like(value)
vertices = torch.cat([value, colors], dim=-1)
elif value.shape[-1] == 6:
vertices = value
else:
raise ValueError(f'Unexpected value shape: {value.shape}')
faces = None
writer.add_mesh(f'{panel}/{key}', vertices=vertices, faces=faces, global_step=step)

239
utils/view_util.py Executable file
View File

@@ -0,0 +1,239 @@
import json
import numpy as np
import requests
import torch
from PIL import Image
from utils.cache_util import LRUCache
class ViewUtil:
view_cache = LRUCache(1024)
def load_camera_pose_from_frame(camera_params_path):
with open(camera_params_path, "r") as f:
camera_params = json.load(f)
view_transform = camera_params["cameraViewTransform"]
view_transform = np.resize(view_transform, (4,4))
view_transform = np.linalg.inv(view_transform).T
offset = np.mat([[1,0,0,0],[0,-1,0,0],[0,0,-1,0],[0,0,0,1]])
view_transform = view_transform.dot(offset)
return view_transform
def save_image(rgb, filename):
if rgb.dtype != np.uint8:
rgb = rgb.astype(np.uint8)
img = Image.fromarray(rgb, 'RGB')
img.save(filename)
def save_depth(depth, filename):
if depth.dtype != np.uint16:
depth = depth.astype(np.uint16)
depth_img = Image.fromarray(depth)
depth_img.save(filename)
def save_segmentation(seg, filename):
if seg.dtype != np.uint8:
seg = seg.astype(np.uint8)
seg_img = Image.fromarray(seg)
seg_img.save(filename)
@staticmethod
def get_view(camera_pose,source, data_type,scene,port):
camera_pose_tuple = tuple(map(tuple, camera_pose.tolist()))
cache_key = (camera_pose_tuple, source, data_type, scene, port)
cached_result = ViewUtil.view_cache.get(cache_key)
if cached_result:
print("Cache hit")
return cached_result
url = f"http://127.0.0.1:{port}/get_images"
headers = {
'Content-Type': 'application/json'
}
data = {
'camera_pose': camera_pose.tolist(),
'data_type': data_type,
'source': source,
'scene': scene
}
response = requests.post(url, headers=headers, data=json.dumps(data))
if response.status_code == 200:
results = response.json()
rgb = np.asarray(results['rgb'],dtype=np.uint8)
depth = np.asarray(results['depth'])/1000
seg = np.asarray(results['segmentation'])
seg_labels = results['segmentation_labels']
camera_params = results['camera_params']
ViewUtil.view_cache.put(cache_key, (rgb, depth, seg, seg_labels, camera_params))
return rgb, depth, seg, seg_labels, camera_params
else:
return None
@staticmethod
def get_object_pose_batch(K, mesh, rgb_batch, depth_batch, mask_batch, gt_pose_batch ,port):
url = f"http://127.0.0.1:{port}/predict_estimation_batch"
headers = {
'Content-Type': 'application/json'
}
mesh_data = {
'vertices': mesh.vertices.tolist(),
'faces': mesh.faces.tolist()
}
data = {
'K': K.tolist(),
'rgb_batch': rgb_batch.tolist(),
'depth_batch': depth_batch.tolist(),
'mask_batch': mask_batch.tolist(),
'mesh': mesh_data,
'gt_pose_batch': gt_pose_batch.tolist()
}
response = requests.post(url, headers=headers, data=json.dumps(data))
if response.status_code == 200:
results = response.json()
pose_batch = np.array(results['pose_batch'])
results_batch = results["eval_result_batch"]
return pose_batch, results_batch
else:
return None
@staticmethod
def get_visualized_result(K, mesh, rgb, pose ,port):
url = f"http://127.0.0.1:{port}/get_visualized_result"
headers = {
'Content-Type': 'application/json'
}
mesh_data = {
'vertices': mesh.vertices.tolist(),
'faces': mesh.faces.tolist()
}
data = {
'K': K.tolist(),
'rgb': rgb.tolist(),
'mesh': mesh_data,
'pose': pose.tolist()
}
response = requests.post(url, headers=headers, data=json.dumps(data))
if response.status_code == 200:
results = response.json()
vis_rgb = np.array(results['vis_rgb'])
return vis_rgb
else:
return None
@staticmethod
def get_object_pose(K, mesh, rgb, depth, mask, gt_pose ,port):
url = f"http://127.0.0.1:{port}/predict_estimation"
headers = {
'Content-Type': 'application/json'
}
mesh_data = {
'vertices': mesh.vertices.tolist(),
'faces': mesh.faces.tolist()
}
data = {
'K': K.tolist(),
'rgb': rgb.tolist(),
'depth': depth.tolist(),
'mask': mask.tolist(),
'mesh': mesh_data,
'gt_pose': gt_pose.tolist()
}
response = requests.post(url, headers=headers, data=json.dumps(data))
if response.status_code == 200:
results = response.json()
pose_batch = np.array(results['pose_batch'])
results_batch = results["eval_result_batch"]
return pose_batch, results_batch
else:
return None
def get_pts_dict(depth, seg, seg_labels, camera_params):
cx = camera_params['cx']
cy = camera_params['cy']
fx = camera_params['fx']
fy = camera_params['fy']
width = camera_params['width']
height = camera_params['height']
pts_dict = {name: [] for name in seg_labels.values()}
u = np.arange(width)
v = np.arange(height)
u, v = np.meshgrid(u, v)
Z = depth
X = (u - cx) * Z / fx
Y = (v - cy) * Z / fy
points = np.stack((X, Y, Z), axis=-1).reshape(-1, 3)
labels = seg.reshape(-1)
for label, name in seg_labels.items():
mask = labels == int(label)
pts_dict[name] = points[mask]
return pts_dict
def get_object_center_from_pts_dict(obj,pts_dict):
if obj is None:
for _, pts in pts_dict.items():
if pts.size != 0:
obj_pts = pts
break
else:
obj_pts = pts_dict[obj]
if obj_pts.size == 0:
for _, pts in pts_dict.items():
if pts.size != 0:
obj_pts = pts
break
obj_center = obj_pts.mean(axis=0)
return obj_center
def get_pts_center(pts):
pts_center = pts.mean(axis=0)
return pts_center
def get_scene_pts(pts_dict):
if any(isinstance(pts, torch.Tensor) for pts in pts_dict.values()):
scene_pts = torch.cat([pts for _, pts in pts_dict.items()], dim=0)
return scene_pts
else:
scene_pts = np.concatenate([pts for _, pts in pts_dict.items()])
return scene_pts
def crop_pts(scene_pts, crop_center, radius=0.2):
if isinstance(scene_pts, torch.Tensor):
crop_mask = torch.norm(scene_pts - crop_center, dim=1) < radius
return scene_pts[crop_mask]
else:
crop_mask = np.linalg.norm(scene_pts - crop_center, axis=1) < radius
return scene_pts[crop_mask]
def crop_pts_dict(pts_dict, crop_center, radius=0.2, min_pts_num = 5000):
crop_dict = {}
max_loop = 100
loop = 0
while(loop<=max_loop):
croped_length = 0
for obj, pts in pts_dict.items():
if isinstance(pts, torch.Tensor):
crop_mask = torch.norm(pts - crop_center, dim=1) < radius
crop_dict[obj] = pts[crop_mask]
else:
crop_mask = np.linalg.norm(pts - crop_center, axis=1) < radius
crop_dict[obj] = pts[crop_mask]
croped_length += crop_dict[obj].shape[0]
if croped_length >= min_pts_num:
break
radius += 0.02
loop += 1
return crop_dict
def get_cam_pose_focused_on_point(point_w, cam_pose_w, old_camera_center_w):
distance = np.linalg.norm(point_w-old_camera_center_w)
z_axis_camera = cam_pose_w[:3, 2].reshape(-1)
new_camera_position_w = point_w - distance * z_axis_camera
new_camera_pose_w = cam_pose_w.copy()
new_camera_pose_w[:3, 3] = new_camera_position_w.reshape((3,1))
return new_camera_pose_w