first commit

This commit is contained in:
2025-05-13 09:03:38 +08:00
commit b98753bfbb
121 changed files with 8665 additions and 0 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

59
utils/control.py Normal file
View File

@@ -0,0 +1,59 @@
import numpy as np
from scipy.spatial.transform import Rotation as R
import time
class ControlUtil:
curr_rotation = 0
@staticmethod
def check_limit(new_cam_to_world):
if new_cam_to_world[0,3] < 0 or new_cam_to_world[1,3] > 0:
# if new_cam_to_world[0,3] > 0:
return False
x = abs(new_cam_to_world[0,3])
y = abs(new_cam_to_world[1,3])
tan_y_x = y/x
min_angle = 0 / 180 * np.pi
max_angle = 90 / 180 * np.pi
if tan_y_x < np.tan(min_angle) or tan_y_x > np.tan(max_angle):
return False
return True
@staticmethod
def solve_display_table_rot_and_cam_to_world(cam_to_world: np.ndarray) -> tuple:
if ControlUtil.check_limit(cam_to_world):
return 0, cam_to_world
else:
min_display_table_rot = 180
min_new_cam_to_world = None
for display_table_rot in np.linspace(0.1,360, 1800):
new_world_to_world = ControlUtil.get_z_axis_rot_mat(display_table_rot)
new_cam_to_new_world = cam_to_world
new_cam_to_world = new_world_to_world @ new_cam_to_new_world
if ControlUtil.check_limit(new_cam_to_world):
if display_table_rot < min_display_table_rot:
min_display_table_rot, min_new_cam_to_world = display_table_rot, new_cam_to_world
if abs(display_table_rot - 360) < min_display_table_rot:
min_display_table_rot, min_new_cam_to_world = display_table_rot - 360, new_cam_to_world
if min_new_cam_to_world is None:
raise ValueError("No valid display table rotation found")
delta_degree = min_display_table_rot - ControlUtil.curr_rotation
ControlUtil.curr_rotation = min_display_table_rot
return delta_degree, min_new_cam_to_world
@staticmethod
def get_z_axis_rot_mat(degree):
radian = np.radians(degree)
return np.array([
[np.cos(radian), -np.sin(radian), 0, 0],
[np.sin(radian), np.cos(radian), 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
])

391
utils/data_load.py Normal file
View File

@@ -0,0 +1,391 @@
import os
import numpy as np
import json
import cv2
import trimesh
import torch
import OpenEXR
import Imath
from utils.pts import PtsUtil
class DataLoadUtil:
TABLE_POSITION = np.asarray([0, 0, 0.8215])
@staticmethod
def load_exr_image(file_path):
exr_file = OpenEXR.InputFile(file_path)
header = exr_file.header()
dw = header['dataWindow']
width = dw.max.x - dw.min.x + 1
height = dw.max.y - dw.min.y + 1
float_channels = ['R', 'G', 'B']
img_data = []
for channel in float_channels:
channel_data = exr_file.channel(channel)
img_data.append(np.frombuffer(channel_data, dtype=np.float16).reshape((height, width)))
img = np.stack(img_data, axis=-1)
return img
@staticmethod
def get_display_table_info(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
display_table_info = scene_info["display_table"]
return display_table_info
@staticmethod
def get_display_table_top(root, scene_name):
display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
"height"
]
display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
[0, 0, display_table_height]
)
return display_table_top
@staticmethod
def get_path(root, scene_name, frame_idx):
path = os.path.join(root, scene_name, f"{frame_idx}")
return path
@staticmethod
def get_label_num(root, scene_name):
label_dir = os.path.join(root, scene_name, "label")
if not os.path.exists(label_dir):
return 0
return len(os.listdir(label_dir))
@staticmethod
def get_label_path(root, scene_name, seq_idx):
label_dir = os.path.join(root, scene_name, "label")
if not os.path.exists(label_dir):
os.makedirs(label_dir)
path = os.path.join(label_dir, f"{seq_idx}.json")
return path
@staticmethod
def get_scene_seq_length(root, scene_name):
camera_params_path = os.path.join(root, scene_name, "camera_params")
return len(os.listdir(camera_params_path))
@staticmethod
def load_mesh_at(model_dir, object_name, world_object_pose):
model_path = os.path.join(model_dir, object_name, "mesh.obj")
mesh = trimesh.load(model_path)
mesh.apply_transform(world_object_pose)
return mesh
@staticmethod
def get_bbox_diag(model_dir, object_name):
model_path = os.path.join(model_dir, object_name, "mesh.obj")
mesh = trimesh.load(model_path)
bbox = mesh.bounding_box.extents
diagonal_length = np.linalg.norm(bbox)
return diagonal_length
@staticmethod
def load_scene_info(root, scene_name):
scene_info_path = os.path.join(root, scene_name, "scene_info.json")
with open(scene_info_path, "r") as f:
scene_info = json.load(f)
return scene_info
@staticmethod
def load_target_pts_num_dict(root, scene_name):
target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
with open(target_pts_num_path, "r") as f:
target_pts_num_dict = json.load(f)
return target_pts_num_dict
@staticmethod
def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
def load_depth_from_real_path(real_path, min_depth, max_depth):
depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
depth = depth.astype(np.float32) / 65535.0
min_depth = min_depth
max_depth = max_depth
depth_meters = min_depth + (max_depth - min_depth) * depth
return depth_meters
if binocular:
depth_path_L = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
)
depth_path_R = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
)
depth_meters_L = load_depth_from_real_path(
depth_path_L, min_depth, max_depth
)
depth_meters_R = load_depth_from_real_path(
depth_path_R, min_depth, max_depth
)
return depth_meters_L, depth_meters_R
else:
depth_path = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + ".png"
)
depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
return depth_meters
@staticmethod
def load_seg(path, binocular=False, left_only=False):
if binocular and not left_only:
def clean_mask(mask_image):
green = [0, 255, 0]
red = [255, 0, 0]
threshold = 2
mask_image = np.where(
np.abs(mask_image - green) <= threshold, green, mask_image
)
mask_image = np.where(
np.abs(mask_image - red) <= threshold, red, mask_image
)
return mask_image
mask_path_L = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
mask_path_R = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
)
mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
return mask_image_L, mask_image_R
else:
if binocular and left_only:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
else:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + ".png"
)
mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
return mask_image
@staticmethod
def load_normal(path, binocular=False, left_only=False, file_type="exr"):
if binocular and not left_only:
normal_path_L = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
)
normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
)
normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
normalized_normal_image_L = normal_image_L * 2.0 - 1.0
normalized_normal_image_R = normal_image_R * 2.0 - 1.0
return normalized_normal_image_L, normalized_normal_image_R
else:
if binocular and left_only:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
)
else:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
)
normal_image = DataLoadUtil.load_exr_image(normal_path)
normalized_normal_image = normal_image * 2.0 - 1.0
return normalized_normal_image
@staticmethod
def load_label(path):
with open(path, "r") as f:
label_data = json.load(f)
return label_data
@staticmethod
def load_from_preprocessed_pts(path, file_type="npy"):
npy_path = os.path.join(
os.path.dirname(path), "pts", os.path.basename(path) + "." + file_type
)
if file_type == "txt":
pts = np.loadtxt(npy_path)
else:
pts = np.load(npy_path)
return pts
@staticmethod
def load_from_preprocessed_nrm(path, file_type="npy"):
npy_path = os.path.join(
os.path.dirname(path), "nrm", os.path.basename(path) + "." + file_type
)
if file_type == "txt":
nrm = np.loadtxt(npy_path)
else:
nrm = np.load(npy_path)
return nrm
@staticmethod
def cam_pose_transformation(cam_pose_before):
offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
cam_pose_after = cam_pose_before @ offset
return cam_pose_after
@staticmethod
def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
scene_dir = os.path.dirname(path)
root_dir = os.path.dirname(scene_dir)
scene_name = os.path.basename(scene_dir)
camera_params_path = os.path.join(
os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
)
with open(camera_params_path, "r") as f:
label_data = json.load(f)
cam_to_world = np.asarray(label_data["extrinsic"])
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
if display_table_as_world_space_origin:
world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
cam_to_world = np.dot(world_to_display_table, cam_to_world)
cam_intrinsic = np.asarray(label_data["intrinsic"])
cam_info = {
"cam_to_world": cam_to_world,
"cam_intrinsic": cam_intrinsic,
"far_plane": label_data["far_plane"],
"near_plane": label_data["near_plane"],
}
if binocular:
cam_to_world_R = np.asarray(label_data["extrinsic_R"])
cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
if display_table_as_world_space_origin:
cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
cam_info["cam_to_world_O"] = cam_to_world_O
cam_info["cam_to_world_R"] = cam_to_world_R
return cam_info
@staticmethod
def get_real_cam_O_from_cam_L(
cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
):
root_dir = os.path.dirname(scene_path)
scene_name = os.path.basename(scene_path)
if isinstance(cam_L, torch.Tensor):
cam_L = cam_L.cpu().numpy()
nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
if display_table_as_world_space_origin:
display_table_to_world = np.eye(4)
display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
return nO_to_world_pose
@staticmethod
def get_target_point_cloud(
depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
mask = mask.reshape(-1, 4)
target_mask = (mask == target_mask_label).all(axis=-1)
target_points_camera = points_camera[target_mask]
target_points_camera_aug = np.concatenate(
[target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
)
target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
data = {
"points_world": target_points_world,
"points_camera": target_points_camera,
}
return data
@staticmethod
def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
points_camera_aug = np.concatenate(
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
)
points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return {"points_world": points_world, "points_camera": points_camera}
@staticmethod
def get_target_point_cloud_world_from_path(
path,
binocular=False,
random_downsample_N=65536,
voxel_size=0.005,
target_mask_label=(0, 255, 0, 255),
display_table_mask_label=(0, 0, 255, 255),
get_display_table_pts=False,
require_normal=False,
):
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
if binocular:
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
point_cloud_L = DataLoadUtil.get_target_point_cloud(
depth_L,
cam_info["cam_intrinsic"],
cam_info["cam_to_world"],
mask_L,
target_mask_label,
)["points_world"]
point_cloud_R = DataLoadUtil.get_target_point_cloud(
depth_R,
cam_info["cam_intrinsic"],
cam_info["cam_to_world_R"],
mask_R,
target_mask_label,
)["points_world"]
point_cloud_L = PtsUtil.random_downsample_point_cloud(
point_cloud_L, random_downsample_N
)
point_cloud_R = PtsUtil.random_downsample_point_cloud(
point_cloud_R, random_downsample_N
)
overlap_points = PtsUtil.get_overlapping_points(
point_cloud_L, point_cloud_R, voxel_size
)
return overlap_points
else:
depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"]
)
mask = DataLoadUtil.load_seg(path)
point_cloud = DataLoadUtil.get_target_point_cloud(
depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
)["points_world"]
return point_cloud
@staticmethod
def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
points_path = os.path.join(root, scene_name, "points_and_normals.txt")
points_normals = np.loadtxt(points_path)
if display_table_as_world_space_origin:
points_normals[:, :3] = points_normals[
:, :3
] - DataLoadUtil.get_display_table_top(root, scene_name)
return points_normals

253
utils/pose.py Normal file
View File

@@ -0,0 +1,253 @@
import numpy as np
import torch
import torch.nn.functional as F
class PoseUtil:
ROTATION = 1
TRANSLATION = 2
SCALE = 3
@staticmethod
def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False):
if isinstance(trans_m_min, list):
x_min, y_min, z_min = trans_m_min
x_max, y_max, z_max = trans_m_max
else:
x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min
x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max
x = np.random.uniform(x_min, x_max)
y = np.random.uniform(y_min, y_max)
z = np.random.uniform(z_min, z_max)
translation = np.array([x, y, z])
if trans_unit == "cm":
translation = translation / 100
if debug:
print("uniform translation:", translation)
return translation
@staticmethod
def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False):
axis = np.random.randn(3)
axis /= np.linalg.norm(axis)
theta = np.random.uniform(
rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi
)
K = np.array(
[[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]]
)
R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K)
if debug:
print("uniform rotation:", theta * 180 / np.pi)
return R
@staticmethod
def get_uniform_pose(
trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False
):
translation = PoseUtil.get_uniform_translation(
trans_min, trans_max, trans_unit, debug
)
rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
pose = np.eye(4)
pose[:3, :3] = rotation
pose[:3, 3] = translation
return pose
@staticmethod
def get_n_uniform_pose(
trans_min,
trans_max,
rot_min=0,
rot_max=180,
n=1,
trans_unit="cm",
fix=None,
contain_canonical=True,
debug=False,
):
if fix == PoseUtil.ROTATION:
translations = np.zeros((n, 3))
for i in range(n):
translations[i] = PoseUtil.get_uniform_translation(
trans_min, trans_max, trans_unit, debug
)
if contain_canonical:
translations[0] = np.zeros(3)
rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
elif fix == PoseUtil.TRANSLATION:
rotations = np.zeros((n, 3, 3))
for i in range(n):
rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
if contain_canonical:
rotations[0] = np.eye(3)
translations = PoseUtil.get_uniform_translation(
trans_min, trans_max, trans_unit, debug
)
else:
translations = np.zeros((n, 3))
rotations = np.zeros((n, 3, 3))
for i in range(n):
translations[i] = PoseUtil.get_uniform_translation(
trans_min, trans_max, trans_unit, debug
)
for i in range(n):
rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
if contain_canonical:
translations[0] = np.zeros(3)
rotations[0] = np.eye(3)
pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0)
pose[:, :3, :3] = rotations
pose[:, :3, 3] = translations
return pose
@staticmethod
def get_n_uniform_pose_batch(
trans_min,
trans_max,
rot_min=0,
rot_max=180,
n=1,
batch_size=1,
trans_unit="cm",
fix=None,
contain_canonical=False,
debug=False,
):
batch_poses = []
for i in range(batch_size):
pose = PoseUtil.get_n_uniform_pose(
trans_min,
trans_max,
rot_min,
rot_max,
n,
trans_unit,
fix,
contain_canonical,
debug,
)
batch_poses.append(pose)
pose_batch = np.stack(batch_poses, axis=0)
return pose_batch
@staticmethod
def get_uniform_scale(scale_min, scale_max, debug=False):
if isinstance(scale_min, list):
x_min, y_min, z_min = scale_min
x_max, y_max, z_max = scale_max
else:
x_min, y_min, z_min = scale_min, scale_min, scale_min
x_max, y_max, z_max = scale_max, scale_max, scale_max
x = np.random.uniform(x_min, x_max)
y = np.random.uniform(y_min, y_max)
z = np.random.uniform(z_min, z_max)
scale = np.array([x, y, z])
if debug:
print("uniform scale:", scale)
return scale
@staticmethod
def normalize_rotation(rotation, rotation_mode):
if rotation_mode == "quat_wxyz" or rotation_mode == "quat_xyzw":
rotation /= torch.norm(rotation, dim=-1, keepdim=True)
elif rotation_mode == "rot_matrix":
rot_matrix = PoseUtil.rotation_6d_to_matrix_tensor_batch(rotation)
rotation[:, :3] = rot_matrix[:, 0, :]
rotation[:, 3:6] = rot_matrix[:, 1, :]
elif rotation_mode == "euler_xyz_sx_cx":
rot_sin_theta = rotation[:, :3]
rot_cos_theta = rotation[:, 3:6]
theta = torch.atan2(rot_sin_theta, rot_cos_theta)
rotation[:, :3] = torch.sin(theta)
rotation[:, 3:6] = torch.cos(theta)
elif rotation_mode == "euler_xyz":
pass
else:
raise NotImplementedError
return rotation
@staticmethod
def get_pose_dim(rot_mode):
assert rot_mode in [
"quat_wxyz",
"quat_xyzw",
"euler_xyz",
"euler_xyz_sx_cx",
"rot_matrix",
], f"the rotation mode {rot_mode} is not supported!"
if rot_mode == "quat_wxyz" or rot_mode == "quat_xyzw":
pose_dim = 7
elif rot_mode == "euler_xyz":
pose_dim = 6
elif rot_mode == "euler_xyz_sx_cx" or rot_mode == "rot_matrix":
pose_dim = 9
else:
raise NotImplementedError
return pose_dim
@staticmethod
def rotation_6d_to_matrix_tensor_batch(d6: torch.Tensor) -> torch.Tensor:
a1, a2 = d6[..., :3], d6[..., 3:]
b1 = F.normalize(a1, dim=-1)
b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
b2 = F.normalize(b2, dim=-1)
b3 = torch.cross(b1, b2, dim=-1)
return torch.stack((b1, b2, b3), dim=-2)
@staticmethod
def matrix_to_rotation_6d_tensor_batch(matrix: torch.Tensor) -> torch.Tensor:
batch_dim = matrix.size()[:-2]
return matrix[..., :2, :].clone().reshape(batch_dim + (6,))
@staticmethod
def rotation_6d_to_matrix_numpy(d6):
a1, a2 = d6[:3], d6[3:]
b1 = a1 / np.linalg.norm(a1)
b2 = a2 - np.dot(b1, a2) * b1
b2 = b2 / np.linalg.norm(b2)
b3 = np.cross(b1, b2)
return np.stack((b1, b2, b3), axis=-2)
@staticmethod
def matrix_to_rotation_6d_numpy(matrix):
return np.copy(matrix[:2, :]).reshape((6,))
@staticmethod
def rotation_angle_distance(R1, R2):
R = torch.matmul(R1, R2.transpose(1, 2))
trace = torch.diagonal(R, dim1=1, dim2=2).sum(-1)
angle = torch.acos(torch.clamp((trace - 1) / 2, -1.0, 1.0))/torch.pi*180
return angle
""" ------------ Debug ------------ """
if __name__ == "__main__":
for _ in range(1):
PoseUtil.get_uniform_pose(
trans_min=[-25, -25, 10],
trans_max=[25, 25, 60],
rot_min=0,
rot_max=10,
debug=True,
)
PoseUtil.get_uniform_scale(scale_min=0.25, scale_max=0.30, debug=True)
PoseUtil.get_n_uniform_pose_batch(
trans_min=[-25, -25, 10],
trans_max=[25, 25, 60],
rot_min=0,
rot_max=10,
batch_size=2,
n=2,
fix=PoseUtil.TRANSLATION,
debug=True,
)

117
utils/pts.py Normal file
View File

@@ -0,0 +1,117 @@
import numpy as np
import open3d as o3d
import torch
class PtsUtil:
@staticmethod
def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005, require_idx=False):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
if require_idx:
_, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
return downsampled_points, idx_unique
else:
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
return unique_voxels[0]*voxel_size
@staticmethod
def voxel_downsample_point_cloud_random(point_cloud, voxel_size=0.005, require_idx=False):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
if require_idx:
return downsampled_points, inverse
return downsampled_points
@staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):
if point_cloud.shape[0] == 0:
if require_idx:
return point_cloud, np.array([])
return point_cloud
idx = np.random.choice(len(point_cloud), num_points, replace=True)
if require_idx:
return point_cloud[idx], idx
return point_cloud[idx]
@staticmethod
def fps_downsample_point_cloud(point_cloud, num_points, require_idx=False):
N = point_cloud.shape[0]
mask = np.zeros(N, dtype=bool)
sampled_indices = np.zeros(num_points, dtype=int)
sampled_indices[0] = np.random.randint(0, N)
distances = np.linalg.norm(point_cloud - point_cloud[sampled_indices[0]], axis=1)
for i in range(1, num_points):
farthest_index = np.argmax(distances)
sampled_indices[i] = farthest_index
mask[farthest_index] = True
new_distances = np.linalg.norm(point_cloud - point_cloud[farthest_index], axis=1)
distances = np.minimum(distances, new_distances)
sampled_points = point_cloud[sampled_indices]
if require_idx:
return sampled_points, sampled_indices
return sampled_points
@staticmethod
def random_downsample_point_cloud_tensor(point_cloud, num_points):
idx = torch.randint(0, len(point_cloud), (num_points,))
return point_cloud[idx]
@staticmethod
def voxelize_points(points, voxel_size):
voxel_indices = np.floor(points / voxel_size).astype(np.int32)
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
return unique_voxels
@staticmethod
def transform_point_cloud(points, pose_mat):
points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1)
points_h = np.dot(pose_mat, points_h.T).T
return points_h[:, :3]
@staticmethod
def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005, require_idx=False):
voxels_L, indices_L = PtsUtil.voxelize_points(point_cloud_L, voxel_size)
voxels_R, _ = PtsUtil.voxelize_points(point_cloud_R, voxel_size)
voxel_indices_L = voxels_L.view([("", voxels_L.dtype)] * 3)
voxel_indices_R = voxels_R.view([("", voxels_R.dtype)] * 3)
overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
mask_L = np.isin(
indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0]
)
overlapping_points = point_cloud_L[mask_L]
if require_idx:
return overlapping_points, mask_L
return overlapping_points
@staticmethod
def filter_points(points, normals, cam_pose, theta_limit=45, z_range=(0.2, 0.45)):
""" filter with normal """
normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
cos_theta = np.dot(normals_normalized, np.array([0, 0, 1]))
theta = np.arccos(cos_theta) * 180 / np.pi
idx = theta < theta_limit
filtered_sampled_points = points[idx]
filtered_normals = normals[idx]
""" filter with z range """
points_cam = PtsUtil.transform_point_cloud(filtered_sampled_points, np.linalg.inv(cam_pose))
idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
z_filtered_points = filtered_sampled_points[idx]
z_filtered_normals = filtered_normals[idx]
return z_filtered_points[:, :3], z_filtered_normals
@staticmethod
def point_to_hash(point, voxel_size):
return tuple(np.floor(point / voxel_size).astype(int))

267
utils/reconstruction.py Normal file
View File

@@ -0,0 +1,267 @@
import numpy as np
from scipy.spatial import cKDTree
from utils.pts import PtsUtil
class ReconstructionUtil:
@staticmethod
def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
kdtree = cKDTree(combined_point_cloud)
distances, _ = kdtree.query(target_point_cloud)
covered_points_num = np.sum(distances < threshold*2)
coverage_rate = covered_points_num / target_point_cloud.shape[0]
return coverage_rate, covered_points_num
@staticmethod
def compute_coverage_rate_with_normal(target_point_cloud, combined_point_cloud, target_normal, combined_normal, threshold=0.01, normal_threshold=0.1):
kdtree = cKDTree(combined_point_cloud)
distances, indices = kdtree.query(target_point_cloud)
is_covered_by_distance = distances < threshold*2
normal_dots = np.einsum('ij,ij->i', target_normal, combined_normal[indices])
is_covered_by_normal = normal_dots > normal_threshold
pts_nrm_target = np.hstack([target_point_cloud, target_normal])
np.savetxt("pts_nrm_target.txt", pts_nrm_target)
pts_nrm_combined = np.hstack([combined_point_cloud, combined_normal])
np.savetxt("pts_nrm_combined.txt", pts_nrm_combined)
import ipdb; ipdb.set_trace()
covered_points_num = np.sum(is_covered_by_distance & is_covered_by_normal)
coverage_rate = covered_points_num / target_point_cloud.shape[0]
return coverage_rate, covered_points_num
@staticmethod
def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01, require_new_added_pts_num=False):
kdtree = cKDTree(combined_point_cloud)
distances, _ = kdtree.query(new_point_cloud)
overlapping_points_num = np.sum(distances < voxel_size*2)
cm = 0.01
voxel_size_cm = voxel_size / cm
overlap_area = overlapping_points_num * voxel_size_cm * voxel_size_cm
if require_new_added_pts_num:
return overlap_area > overlap_area_threshold, len(new_point_cloud)-np.sum(distances < voxel_size*1.2)
return overlap_area > overlap_area_threshold
@staticmethod
def get_new_added_points(old_combined_pts, new_pts, threshold=0.005):
if old_combined_pts.size == 0:
return new_pts
if new_pts.size == 0:
return np.array([])
tree = cKDTree(old_combined_pts)
distances, _ = tree.query(new_pts, k=1)
new_added_points = new_pts[distances > threshold]
return new_added_points
@staticmethod
def compute_next_best_view_sequence(target_point_cloud, point_cloud_list, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, init_view = 0, scan_points_threshold=5, status_info=None):
selected_views = [init_view]
combined_point_cloud = point_cloud_list[init_view]
history_indices = [scan_points_indices_list[init_view]]
max_rec_pts = np.vstack(point_cloud_list)
downsampled_max_rec_pts = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold)
combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud, threshold)
max_rec_pts_num = downsampled_max_rec_pts.shape[0]
max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate(target_point_cloud, downsampled_max_rec_pts, threshold)
new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, combined_point_cloud, threshold)
current_coverage = new_coverage
current_covered_num = new_covered_num
remaining_views = list(range(len(point_cloud_list)))
view_sequence = [(init_view, current_coverage)]
cnt_processed_view = 0
remaining_views.remove(init_view)
curr_rec_pts_num = combined_point_cloud.shape[0]
drop_output_ratio = 0.4
import time
while remaining_views:
best_view = None
best_coverage_increase = -1
best_combined_point_cloud = None
best_covered_num = 0
for view_index in remaining_views:
if np.random.rand() < drop_output_ratio:
continue
if point_cloud_list[view_index].shape[0] == 0:
continue
if selected_views:
new_scan_points_indices = scan_points_indices_list[view_index]
if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
curr_overlap_area_threshold = overlap_area_threshold
else:
curr_overlap_area_threshold = overlap_area_threshold * 0.5
if not ReconstructionUtil.check_overlap(point_cloud_list[view_index], combined_point_cloud, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
continue
new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
new_downsampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold)
new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, threshold)
coverage_increase = new_coverage - current_coverage
if coverage_increase > best_coverage_increase:
best_coverage_increase = coverage_increase
best_view = view_index
best_covered_num = new_covered_num
best_combined_point_cloud = new_downsampled_combined_point_cloud
if best_view is not None:
if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
break
selected_views.append(best_view)
best_rec_pts_num = best_combined_point_cloud.shape[0]
print(f"Current rec pts num: {curr_rec_pts_num}, Best rec pts num: {best_rec_pts_num}, Best cover pts: {best_covered_num}, Max rec pts num: {max_rec_pts_num}")
print(f"Current coverage: {current_coverage+best_coverage_increase}, Best coverage increase: {best_coverage_increase}, Max Real coverage: {max_real_rec_pts_coverage}")
current_covered_num = best_covered_num
curr_rec_pts_num = best_rec_pts_num
combined_point_cloud = best_combined_point_cloud
remaining_views.remove(best_view)
history_indices.append(scan_points_indices_list[best_view])
current_coverage += best_coverage_increase
cnt_processed_view += 1
if status_info is not None:
sm = status_info["status_manager"]
app_name = status_info["app_name"]
runner_name = status_info["runner_name"]
sm.set_status(app_name, runner_name, "current coverage", current_coverage)
sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list))
view_sequence.append((best_view, current_coverage))
else:
break
if status_info is not None:
sm = status_info["status_manager"]
app_name = status_info["app_name"]
runner_name = status_info["runner_name"]
sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
return view_sequence, remaining_views, combined_point_cloud
@staticmethod
def compute_next_best_view_sequence_with_normal(target_point_cloud, target_normal, point_cloud_list, normal_list, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, init_view = 0, scan_points_threshold=5, status_info=None):
selected_views = [init_view]
combined_point_cloud = point_cloud_list[init_view]
combined_normal = normal_list[init_view]
history_indices = [scan_points_indices_list[init_view]]
max_rec_pts = np.vstack(point_cloud_list)
max_rec_nrm = np.vstack(normal_list)
downsampled_max_rec_pts, idx = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold, require_idx=True)
downsampled_max_rec_nrm = max_rec_nrm[idx]
max_rec_pts_num = downsampled_max_rec_pts.shape[0]
try:
max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate_with_normal(target_point_cloud, downsampled_max_rec_pts, target_normal, downsampled_max_rec_nrm, threshold)
except:
import ipdb; ipdb.set_trace()
new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate_with_normal(downsampled_max_rec_pts, combined_point_cloud, downsampled_max_rec_nrm, combined_normal, threshold)
current_coverage = new_coverage
current_covered_num = new_covered_num
remaining_views = list(range(len(point_cloud_list)))
view_sequence = [(init_view, current_coverage)]
cnt_processed_view = 0
remaining_views.remove(init_view)
curr_rec_pts_num = combined_point_cloud.shape[0]
while remaining_views:
best_view = None
best_coverage_increase = -1
best_combined_point_cloud = None
best_combined_normal = None
best_covered_num = 0
for view_index in remaining_views:
if point_cloud_list[view_index].shape[0] == 0:
continue
if selected_views:
new_scan_points_indices = scan_points_indices_list[view_index]
if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
curr_overlap_area_threshold = overlap_area_threshold
else:
curr_overlap_area_threshold = overlap_area_threshold * 0.5
if not ReconstructionUtil.check_overlap(point_cloud_list[view_index], combined_point_cloud, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
continue
new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
new_combined_normal = np.vstack([combined_normal, normal_list[view_index]])
new_downsampled_combined_point_cloud, idx = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold, require_idx=True)
new_downsampled_combined_normal = new_combined_normal[idx]
new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate_with_normal(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, downsampled_max_rec_nrm, new_downsampled_combined_normal, threshold)
coverage_increase = new_coverage - current_coverage
if coverage_increase > best_coverage_increase:
best_coverage_increase = coverage_increase
best_view = view_index
best_covered_num = new_covered_num
best_combined_point_cloud = new_downsampled_combined_point_cloud
best_combined_normal = new_downsampled_combined_normal
if best_view is not None:
if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
break
selected_views.append(best_view)
best_rec_pts_num = best_combined_point_cloud.shape[0]
print(f"Current rec pts num: {curr_rec_pts_num}, Best rec pts num: {best_rec_pts_num}, Best cover pts: {best_covered_num}, Max rec pts num: {max_rec_pts_num}")
print(f"Current coverage: {current_coverage}, Best coverage increase: {best_coverage_increase}, Max Real coverage: {max_real_rec_pts_coverage}")
current_covered_num = best_covered_num
curr_rec_pts_num = best_rec_pts_num
combined_point_cloud = best_combined_point_cloud
combined_normal = best_combined_normal
remaining_views.remove(best_view)
history_indices.append(scan_points_indices_list[best_view])
current_coverage += best_coverage_increase
cnt_processed_view += 1
if status_info is not None:
sm = status_info["status_manager"]
app_name = status_info["app_name"]
runner_name = status_info["runner_name"]
sm.set_status(app_name, runner_name, "current coverage", current_coverage)
sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list))
view_sequence.append((best_view, current_coverage))
else:
break
if status_info is not None:
sm = status_info["status_manager"]
app_name = status_info["app_name"]
runner_name = status_info["runner_name"]
sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
return view_sequence, remaining_views, combined_point_cloud
@staticmethod
def generate_scan_points(display_table_top, display_table_radius, min_distance=0.03, max_points_num = 500, max_attempts = 1000):
points = []
attempts = 0
while len(points) < max_points_num and attempts < max_attempts:
angle = np.random.uniform(0, 2 * np.pi)
r = np.random.uniform(0, display_table_radius)
x = r * np.cos(angle)
y = r * np.sin(angle)
z = display_table_top
new_point = (x, y, z)
if all(np.linalg.norm(np.array(new_point) - np.array(existing_point)) >= min_distance for existing_point in points):
points.append(new_point)
attempts += 1
return points
@staticmethod
def check_scan_points_overlap(history_indices, indices2, threshold=5):
for indices1 in history_indices:
if len(set(indices1).intersection(set(indices2))) >= threshold:
return True
return False

136
utils/render.py Normal file
View File

@@ -0,0 +1,136 @@
import os
import json
import time
import subprocess
import tempfile
import shutil
import numpy as np
from utils.data_load import DataLoadUtil
from utils.reconstruction import ReconstructionUtil
from utils.pts import PtsUtil
class RenderUtil:
target_mask_label = (0, 255, 0)
display_table_mask_label = (0, 0, 255)
random_downsample_N = 32768
min_z = 0.2
max_z = 0.5
@staticmethod
def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[mask].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
return np.zeros((0, 3)), np.zeros((0, 3))
sampled_normal_camera = normal_camera[idx]
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world, sampled_normal_camera
@staticmethod
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
sampled_target_points = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N
)
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world
@staticmethod
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
points_image_homogeneous /= points_image_homogeneous[:, 2:]
pixel_x = points_image_homogeneous[:, 0].astype(int)
pixel_y = points_image_homogeneous[:, 1].astype(int)
h, w = mask.shape[:2]
valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
return selected_points_indices
@staticmethod
def render_pts(cam_pose, scene_path, script_path, scan_points, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
#import ipdb; ipdb.set_trace()
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
with tempfile.TemporaryDirectory() as temp_dir:
params = {
"cam_pose": nO_to_world_pose.tolist(),
"scene_path": scene_path
}
scene_info_path = os.path.join(scene_path, "scene_info.json")
shutil.copy(scene_info_path, os.path.join(temp_dir, "scene_info.json"))
params_data_path = os.path.join(temp_dir, "params.json")
with open(params_data_path, 'w') as f:
json.dump(params, f)
result = subprocess.run([
'/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', script_path, '--', temp_dir
], capture_output=True, text=True)
#print(result)
#import ipdb; ipdb.set_trace()
path = os.path.join(temp_dir, "tmp")
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
''' target points '''
mask_img_L = mask_L
mask_img_R = mask_R
target_mask_img_L = (mask_L == RenderUtil.target_mask_label).all(axis=-1)
target_mask_img_R = (mask_R == RenderUtil.target_mask_label).all(axis=-1)
sampled_target_points_L, sampled_target_normal_L = RenderUtil.get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], RenderUtil.random_downsample_N)
sampled_target_points_R = RenderUtil.get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], RenderUtil.random_downsample_N )
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points, overlap_idx = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_threshold, require_idx=True
)
sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
target_points, target_normals = PtsUtil.filter_points(
target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(RenderUtil.min_z, RenderUtil.max_z)
)
scan_points_indices_L = RenderUtil.get_scan_points_indices(scan_points, mask_img_L, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
scan_points_indices_R = RenderUtil.get_scan_points_indices(scan_points, mask_img_R, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
if not has_points:
target_points = np.zeros((0, 3))
target_normals = np.zeros((0, 3))
#import ipdb; ipdb.set_trace()
return target_points, target_normals, scan_points_indices

208
utils/vis.py Normal file
View File

@@ -0,0 +1,208 @@
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import trimesh
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.data_load import DataLoadUtil
from utils.pts import PtsUtil
from utils.pose import PoseUtil
class visualizeUtil:
@staticmethod
def save_all_cam_pos_and_cam_axis(root, scene, output_dir):
length = DataLoadUtil.get_scene_seq_length(root, scene)
all_cam_pos = []
all_cam_axis = []
for i in range(length):
path = DataLoadUtil.get_path(root, scene, i)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
cam_pose = cam_info["cam_to_world"]
cam_pos = cam_pose[:3, 3]
cam_axis = cam_pose[:3, 2]
num_samples = 10
sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
sample_points = np.array(sample_points)
all_cam_pos.append(cam_pos)
all_cam_axis.append(sample_points)
all_cam_pos = np.array(all_cam_pos)
all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
np.savetxt(os.path.join(output_dir, "all_cam_pos.txt"), all_cam_pos)
np.savetxt(os.path.join(output_dir, "all_cam_axis.txt"), all_cam_axis)
@staticmethod
def get_cam_pose_and_cam_axis(cam_pose, is_6d_pose):
if is_6d_pose:
matrix_cam_pose = np.eye(4)
matrix_cam_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(cam_pose[:6])
matrix_cam_pose[:3, 3] = cam_pose[6:]
else:
matrix_cam_pose = cam_pose
cam_pos = matrix_cam_pose[:3, 3]
cam_axis = matrix_cam_pose[:3, 2]
num_samples = 10
sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
sample_points = np.array(sample_points)
return cam_pos, sample_points
@staticmethod
def save_all_combined_pts(root, scene, output_dir):
length = DataLoadUtil.get_scene_seq_length(root, scene)
all_combined_pts = []
for i in range(length):
path = DataLoadUtil.get_path(root, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
np.savetxt(os.path.join(output_dir, "all_combined_pts.txt"), downsampled_all_pts)
@staticmethod
def save_seq_cam_pos_and_cam_axis(root, scene, frame_idx_list, output_dir):
all_cam_pos = []
all_cam_axis = []
for i in frame_idx_list:
path = DataLoadUtil.get_path(root, scene, i)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
cam_pose = cam_info["cam_to_world"]
cam_pos = cam_pose[:3, 3]
cam_axis = cam_pose[:3, 2]
num_samples = 10
sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
sample_points = np.array(sample_points)
all_cam_pos.append(cam_pos)
all_cam_axis.append(sample_points)
all_cam_pos = np.array(all_cam_pos)
all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
np.savetxt(os.path.join(output_dir, "seq_cam_pos.txt"), all_cam_pos)
np.savetxt(os.path.join(output_dir, "seq_cam_axis.txt"), all_cam_axis)
@staticmethod
def save_seq_combined_pts(root, scene, frame_idx_list, output_dir):
all_combined_pts = []
for i in frame_idx_list:
path = DataLoadUtil.get_path(root, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
np.savetxt(os.path.join(output_dir, "seq_combined_pts.txt"), downsampled_all_pts)
@staticmethod
def save_target_mesh_at_world_space(
root, model_dir, scene_name, display_table_as_world_space_origin=True
):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
if display_table_as_world_space_origin:
location = transformation["location"] - DataLoadUtil.get_display_table_top(
root, scene_name
)
else:
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
mesh_dir = os.path.join(root, scene_name, "mesh")
if not os.path.exists(mesh_dir):
os.makedirs(mesh_dir)
model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
mesh.export(model_path)
@staticmethod
def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False):
target_mask_label = (0, 255, 0, 255)
path = DataLoadUtil.get_path(root, scene, frame_idx)
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False)
depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=binocular,
)
if isinstance(depth, tuple):
depth = depth[0]
mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True)
normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True)
''' target points '''
if mask is None:
target_mask_img = np.ones_like(depth, dtype=bool)
else:
target_mask_img = (mask == target_mask_label).all(axis=-1)
cam_intrinsic = cam_info["cam_intrinsic"]
z = depth[target_mask_img]
i, j = np.nonzero(target_mask_img)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
random_downsample_N = 1000
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[target_mask_img].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
print("No target points")
sampled_normal_camera = normal_camera[idx]
sampled_visualized_normal = []
sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2]
sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1]
num_samples = 10
for i in range(len(sampled_target_points)):
sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
sampled_visualized_normal = np.array(sampled_visualized_normal).reshape(-1, 3)
np.savetxt(os.path.join(output_dir, "target_pts.txt"), sampled_target_points)
np.savetxt(os.path.join(output_dir, "target_normal.txt"), sampled_visualized_normal)
@staticmethod
def save_pts_nrm(root, scene, frame_idx, output_dir, binocular=False):
path = DataLoadUtil.get_path(root, scene, frame_idx)
pts_world = DataLoadUtil.load_from_preprocessed_pts(path, "npy")
nrm_camera = DataLoadUtil.load_from_preprocessed_nrm(path, "npy")
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
cam_to_world = cam_info["cam_to_world"]
nrm_world = nrm_camera @ cam_to_world[:3, :3].T
visualized_nrm = []
num_samples = 10
for i in range(len(pts_world)):
for t in range(num_samples):
visualized_nrm.append(pts_world[i] - 0.02 * t * nrm_world[i])
visualized_nrm = np.array(visualized_nrm)
np.savetxt(os.path.join(output_dir, "nrm.txt"), visualized_nrm)
np.savetxt(os.path.join(output_dir, "pts.txt"), pts_world)
# ------ Debug ------
if __name__ == "__main__":
root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
scene = "box"
output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
#visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
# visualizeUtil.save_all_combined_pts(root, scene, output_dir)
# visualizeUtil.save_seq_combined_pts(root, scene, [0, 121, 286, 175, 111,366,45,230,232,225,255,17,199,78,60], output_dir)
# visualizeUtil.save_seq_cam_pos_and_cam_axis(root, scene, [0, 121, 286, 175, 111,366,45,230,232,225,255,17,199,78,60], output_dir)
# visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
#visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)
visualizeUtil.save_pts_nrm(root, scene, "116", output_dir, binocular=True)