inference

This commit is contained in:
2024-11-03 02:18:59 +08:00
parent 7b28cf9e91
commit 7cd8bfebd3
4 changed files with 314 additions and 123 deletions

13
app.py
View File

@@ -140,6 +140,10 @@ def get_frame_data():
camera_params_path = os.path.join(scene_path, 'camera_params') camera_params_path = os.path.join(scene_path, 'camera_params')
depth_path = os.path.join(scene_path, 'depth') depth_path = os.path.join(scene_path, 'depth')
mask_path = os.path.join(scene_path, 'mask') mask_path = os.path.join(scene_path, 'mask')
voxel_threshold = 0.005
# model_points_normals = DataLoadUtil.load_points_normals(ROOT, scene_name)
# model_pts = model_points_normals[:,:3]
# down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, )
model_points_normals = DataLoadUtil.load_points_normals(root, scene_name) model_points_normals = DataLoadUtil.load_points_normals(root, scene_name)
model_points = model_points_normals[:, :3] model_points = model_points_normals[:, :3]
@@ -172,17 +176,18 @@ def get_frame_data():
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
sampled_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_params['cam_to_world'], theta=75) sampled_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_params['cam_to_world'], voxel_size=voxel_threshold, theta=75)
sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(sampled_point_cloud, 0.01) #sampled_point_cloud = point_cloud
sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(sampled_point_cloud, voxel_threshold)
frame_data['new_point_cloud'] = sampled_point_cloud.tolist() frame_data['new_point_cloud'] = sampled_point_cloud.tolist()
frame_data['combined_point_cloud'] = combined_point_cloud.tolist() frame_data['combined_point_cloud'] = combined_point_cloud.tolist()
new_added_pts = ReconstructionUtil.get_new_added_points(combined_point_cloud, sampled_point_cloud) new_added_pts = ReconstructionUtil.get_new_added_points(combined_point_cloud, sampled_point_cloud, threshold=voxel_threshold)
frame_data["new_added_pts"] = new_added_pts.tolist() frame_data["new_added_pts"] = new_added_pts.tolist()
combined_point_cloud = np.concatenate([combined_point_cloud, sampled_point_cloud], axis=0) combined_point_cloud = np.concatenate([combined_point_cloud, sampled_point_cloud], axis=0)
combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud, 0.01) combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud, voxel_threshold)
frame_data["coverage_rate"] = frame_info.get('coverage_rate') frame_data["coverage_rate"] = frame_info.get('coverage_rate')
delta_CR = frame_data["coverage_rate"] - last_CR delta_CR = frame_data["coverage_rate"] - last_CR

View File

@@ -6,43 +6,61 @@ import trimesh
import torch import torch
from pts import PtsUtil from pts import PtsUtil
class DataLoadUtil: class DataLoadUtil:
DISPLAY_TABLE_POSITION = np.asarray([0,0,0.895]) TABLE_POSITION = np.asarray([0, 0, 0.8215])
@staticmethod
def get_display_table_info(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
display_table_info = scene_info["display_table"]
return display_table_info
@staticmethod
def get_display_table_top(root, scene_name):
display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
"height"
]
display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
[0, 0, display_table_height]
)
return display_table_top
@staticmethod @staticmethod
def get_path(root, scene_name, frame_idx): def get_path(root, scene_name, frame_idx):
path = os.path.join(root, scene_name, f"{frame_idx}") path = os.path.join(root, scene_name, f"{frame_idx}")
return path return path
@staticmethod @staticmethod
def get_label_num(root, scene_name): def get_label_num(root, scene_name):
label_dir = os.path.join(root,scene_name,"label") label_dir = os.path.join(root, scene_name, "label")
return len(os.listdir(label_dir)) return len(os.listdir(label_dir))
@staticmethod @staticmethod
def get_label_path(root, scene_name, seq_idx): def get_label_path(root, scene_name, seq_idx):
label_dir = os.path.join(root,scene_name,"label") label_dir = os.path.join(root, scene_name, "label")
if not os.path.exists(label_dir): if not os.path.exists(label_dir):
os.makedirs(label_dir) os.makedirs(label_dir)
path = os.path.join(label_dir,f"{seq_idx}.json") path = os.path.join(label_dir, f"{seq_idx}.json")
return path return path
@staticmethod @staticmethod
def get_label_path_old(root, scene_name): def get_label_path_old(root, scene_name):
path = os.path.join(root,scene_name,"label.json") path = os.path.join(root, scene_name, "label.json")
return path return path
@staticmethod @staticmethod
def get_scene_seq_length(root, scene_name): def get_scene_seq_length(root, scene_name):
camera_params_path = os.path.join(root, scene_name, "camera_params") camera_params_path = os.path.join(root, scene_name, "camera_params")
return len(os.listdir(camera_params_path)) return len(os.listdir(camera_params_path))
@staticmethod @staticmethod
def load_mesh_at(model_dir, object_name, world_object_pose): def load_mesh_at(model_dir, object_name, world_object_pose):
model_path = os.path.join(model_dir, object_name, "mesh.obj") model_path = os.path.join(model_dir, object_name, "mesh.obj")
mesh = trimesh.load(model_path) mesh = trimesh.load(model_path)
mesh.apply_transform(world_object_pose) mesh.apply_transform(world_object_pose)
return mesh return mesh
@staticmethod @staticmethod
def get_bbox_diag(model_dir, object_name): def get_bbox_diag(model_dir, object_name):
model_path = os.path.join(model_dir, object_name, "mesh.obj") model_path = os.path.join(model_dir, object_name, "mesh.obj")
@@ -50,8 +68,7 @@ class DataLoadUtil:
bbox = mesh.bounding_box.extents bbox = mesh.bounding_box.extents
diagonal_length = np.linalg.norm(bbox) diagonal_length = np.linalg.norm(bbox)
return diagonal_length return diagonal_length
@staticmethod @staticmethod
def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose): def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose) mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
@@ -59,12 +76,16 @@ class DataLoadUtil:
mesh.export(model_path) mesh.export(model_path)
@staticmethod @staticmethod
def save_target_mesh_at_world_space(root, model_dir, scene_name, display_table_as_world_space_origin=True): def save_target_mesh_at_world_space(
root, model_dir, scene_name, display_table_as_world_space_origin=True
):
scene_info = DataLoadUtil.load_scene_info(root, scene_name) scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"] target_name = scene_info["target_name"]
transformation = scene_info[target_name] transformation = scene_info[target_name]
if display_table_as_world_space_origin: if display_table_as_world_space_origin:
location = transformation["location"] - DataLoadUtil.DISPLAY_TABLE_POSITION location = transformation["location"] - DataLoadUtil.get_display_table_top(
root, scene_name
)
else: else:
location = transformation["location"] location = transformation["location"]
rotation_euler = transformation["rotation_euler"] rotation_euler = transformation["rotation_euler"]
@@ -77,14 +98,21 @@ class DataLoadUtil:
os.makedirs(mesh_dir) os.makedirs(mesh_dir)
model_path = os.path.join(mesh_dir, "world_target_mesh.obj") model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
mesh.export(model_path) mesh.export(model_path)
@staticmethod @staticmethod
def load_scene_info(root, scene_name): def load_scene_info(root, scene_name):
scene_info_path = os.path.join(root, scene_name, "scene_info.json") scene_info_path = os.path.join(root, scene_name, "scene_info.json")
with open(scene_info_path, "r") as f: with open(scene_info_path, "r") as f:
scene_info = json.load(f) scene_info = json.load(f)
return scene_info return scene_info
@staticmethod
def load_target_pts_num_dict(root, scene_name):
target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
with open(target_pts_num_path, "r") as f:
target_pts_num_dict = json.load(f)
return target_pts_num_dict
@staticmethod @staticmethod
def load_target_object_pose(root, scene_name): def load_target_object_pose(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name) scene_info = DataLoadUtil.load_scene_info(root, scene_name)
@@ -95,10 +123,10 @@ class DataLoadUtil:
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler) pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location pose_mat[:3, 3] = location
return pose_mat return pose_mat
@staticmethod @staticmethod
def load_depth(path, min_depth=0.01,max_depth=5.0,binocular=False): def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
def load_depth_from_real_path(real_path, min_depth, max_depth): def load_depth_from_real_path(real_path, min_depth, max_depth):
depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED) depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
depth = depth.astype(np.float32) / 65535.0 depth = depth.astype(np.float32) / 65535.0
@@ -106,75 +134,136 @@ class DataLoadUtil:
max_depth = max_depth max_depth = max_depth
depth_meters = min_depth + (max_depth - min_depth) * depth depth_meters = min_depth + (max_depth - min_depth) * depth
return depth_meters return depth_meters
if binocular: if binocular:
depth_path_L = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_L.png") depth_path_L = os.path.join(
depth_path_R = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_R.png") os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
depth_meters_L = load_depth_from_real_path(depth_path_L, min_depth, max_depth) )
depth_meters_R = load_depth_from_real_path(depth_path_R, min_depth, max_depth) depth_path_R = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
)
depth_meters_L = load_depth_from_real_path(
depth_path_L, min_depth, max_depth
)
depth_meters_R = load_depth_from_real_path(
depth_path_R, min_depth, max_depth
)
return depth_meters_L, depth_meters_R return depth_meters_L, depth_meters_R
else: else:
depth_path = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + ".png") depth_path = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + ".png"
)
depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth) depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
return depth_meters return depth_meters
@staticmethod @staticmethod
def load_seg(path, binocular=False): def load_seg(path, binocular=False, left_only=False):
if binocular: if binocular and not left_only:
def clean_mask(mask_image): def clean_mask(mask_image):
green = [0, 255, 0, 255] green = [0, 255, 0, 255]
red = [255, 0, 0, 255] red = [255, 0, 0, 255]
threshold = 2 threshold = 2
mask_image = np.where(np.abs(mask_image - green) <= threshold, green, mask_image) mask_image = np.where(
mask_image = np.where(np.abs(mask_image - red) <= threshold, red, mask_image) np.abs(mask_image - green) <= threshold, green, mask_image
)
mask_image = np.where(
np.abs(mask_image - red) <= threshold, red, mask_image
)
return mask_image return mask_image
mask_path_L = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_L.png")
mask_path_L = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED)) mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
mask_path_R = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_R.png") mask_path_R = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
)
mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED)) mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
return mask_image_L, mask_image_R return mask_image_L, mask_image_R
else: else:
mask_path = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + ".png") if binocular and left_only:
mask_image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
else:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + ".png"
)
mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
return mask_image return mask_image
@staticmethod
def load_normal(path, binocular=False, left_only=False):
if binocular and not left_only:
normal_path_L = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
)
normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
)
normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
return normalized_normal_image_L, normalized_normal_image_R
else:
if binocular and left_only:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
)
else:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + ".png"
)
normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
return normalized_normal_image
@staticmethod @staticmethod
def load_label(path): def load_label(path):
with open(path, 'r') as f: with open(path, "r") as f:
label_data = json.load(f) label_data = json.load(f)
return label_data return label_data
@staticmethod @staticmethod
def load_rgb(path): def load_rgb(path):
rgb_path = os.path.join(os.path.dirname(path), "rgb", os.path.basename(path) + ".png") rgb_path = os.path.join(
os.path.dirname(path), "rgb", os.path.basename(path) + ".png"
)
rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR) rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
return rgb_image return rgb_image
@staticmethod @staticmethod
def load_from_preprocessed_pts(path): def load_from_preprocessed_pts(path):
npy_path = os.path.join(os.path.dirname(path), "points", os.path.basename(path) + ".npy") npy_path = os.path.join(
os.path.dirname(path), "pts", os.path.basename(path) + ".npy"
)
pts = np.load(npy_path) pts = np.load(npy_path)
return pts return pts
@staticmethod @staticmethod
def cam_pose_transformation(cam_pose_before): def cam_pose_transformation(cam_pose_before):
offset = np.asarray([ offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
[1, 0, 0, 0], cam_pose_after = cam_pose_before @ offset
[0, -1, 0, 0],
[0, 0, -1, 0],
[0, 0, 0, 1]])
cam_pose_after = cam_pose_before @ offset
return cam_pose_after return cam_pose_after
@staticmethod @staticmethod
def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True): def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json") scene_dir = os.path.dirname(path)
with open(camera_params_path, 'r') as f: root_dir = os.path.dirname(scene_dir)
scene_name = os.path.basename(scene_dir)
camera_params_path = os.path.join(
os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
)
with open(camera_params_path, "r") as f:
label_data = json.load(f) label_data = json.load(f)
cam_to_world = np.asarray(label_data["extrinsic"]) cam_to_world = np.asarray(label_data["extrinsic"])
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
world_to_display_table = np.eye(4) world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = - DataLoadUtil.DISPLAY_TABLE_POSITION world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
if display_table_as_world_space_origin: if display_table_as_world_space_origin:
cam_to_world = np.dot(world_to_display_table, cam_to_world) cam_to_world = np.dot(world_to_display_table, cam_to_world)
cam_intrinsic = np.asarray(label_data["intrinsic"]) cam_intrinsic = np.asarray(label_data["intrinsic"])
@@ -182,7 +271,7 @@ class DataLoadUtil:
"cam_to_world": cam_to_world, "cam_to_world": cam_to_world,
"cam_intrinsic": cam_intrinsic, "cam_intrinsic": cam_intrinsic,
"far_plane": label_data["far_plane"], "far_plane": label_data["far_plane"],
"near_plane": label_data["near_plane"] "near_plane": label_data["near_plane"],
} }
if binocular: if binocular:
cam_to_world_R = np.asarray(label_data["extrinsic_R"]) cam_to_world_R = np.asarray(label_data["extrinsic_R"])
@@ -195,102 +284,127 @@ class DataLoadUtil:
cam_info["cam_to_world_O"] = cam_to_world_O cam_info["cam_to_world_O"] = cam_to_world_O
cam_info["cam_to_world_R"] = cam_to_world_R cam_info["cam_to_world_R"] = cam_to_world_R
return cam_info return cam_info
@staticmethod @staticmethod
def get_real_cam_O_from_cam_L(cam_L, cam_O_to_cam_L, display_table_as_world_space_origin=True): def get_real_cam_O_from_cam_L(
cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
):
root_dir = os.path.dirname(scene_path)
scene_name = os.path.basename(scene_path)
if isinstance(cam_L, torch.Tensor): if isinstance(cam_L, torch.Tensor):
cam_L = cam_L.cpu().numpy() cam_L = cam_L.cpu().numpy()
nO_to_display_table_pose = cam_L @ cam_O_to_cam_L nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
if display_table_as_world_space_origin: if display_table_as_world_space_origin:
display_table_to_world = np.eye(4) display_table_to_world = np.eye(4)
display_table_to_world[:3, 3] = DataLoadUtil.DISPLAY_TABLE_POSITION display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose) nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose) nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
return nO_to_world_pose return nO_to_world_pose
@staticmethod @staticmethod
def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0,255,0,255)): def get_target_point_cloud(
depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
):
h, w = depth.shape h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
mask = mask.reshape(-1,4)
target_mask = (mask == target_mask_label).all(axis=-1) points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
mask = mask.reshape(-1, 4)
target_mask = (mask == target_mask_label).all(axis=-1)
target_points_camera = points_camera[target_mask] target_points_camera = points_camera[target_mask]
target_points_camera_aug = np.concatenate([target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1) target_points_camera_aug = np.concatenate(
[target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
)
target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3] target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
return { data = {
"points_world": target_points_world, "points_world": target_points_world,
"points_camera": target_points_camera "points_camera": target_points_camera,
} }
return data
@staticmethod @staticmethod
def get_point_cloud(depth, cam_intrinsic, cam_extrinsic): def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
h, w = depth.shape h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1) points_camera_aug = np.concatenate(
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
)
points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return { return {"points_world": points_world, "points_camera": points_camera}
"points_world": points_world,
"points_camera": points_camera
}
@staticmethod @staticmethod
def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)): def get_target_point_cloud_world_from_path(
path,
binocular=False,
random_downsample_N=65536,
voxel_size=0.005,
target_mask_label=(0, 255, 0, 255),
display_table_mask_label=(0, 0, 255, 255),
get_display_table_pts=False,
require_normal=False,
):
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular) cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
if binocular: if binocular:
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True) mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
point_cloud_L = DataLoadUtil.get_target_point_cloud(depth_L, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask_L, target_mask_label)['points_world'] point_cloud_L = DataLoadUtil.get_target_point_cloud(
point_cloud_R = DataLoadUtil.get_target_point_cloud(depth_R, cam_info['cam_intrinsic'], cam_info['cam_to_world_R'], mask_R, target_mask_label)['points_world'] depth_L,
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, random_downsample_N) cam_info["cam_intrinsic"],
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, random_downsample_N) cam_info["cam_to_world"],
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size) mask_L,
target_mask_label,
)["points_world"]
point_cloud_R = DataLoadUtil.get_target_point_cloud(
depth_R,
cam_info["cam_intrinsic"],
cam_info["cam_to_world_R"],
mask_R,
target_mask_label,
)["points_world"]
point_cloud_L = PtsUtil.random_downsample_point_cloud(
point_cloud_L, random_downsample_N
)
point_cloud_R = PtsUtil.random_downsample_point_cloud(
point_cloud_R, random_downsample_N
)
overlap_points = PtsUtil.get_overlapping_points(
point_cloud_L, point_cloud_R, voxel_size
)
return overlap_points return overlap_points
else: else:
depth = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane']) depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"]
)
mask = DataLoadUtil.load_seg(path) mask = DataLoadUtil.load_seg(path)
point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask)['points_world'] point_cloud = DataLoadUtil.get_target_point_cloud(
depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
)["points_world"]
return point_cloud return point_cloud
@staticmethod
def voxelize_points(points, voxel_size):
voxel_indices = np.floor(points / voxel_size).astype(np.int32)
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
return unique_voxels
@staticmethod
def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005):
voxels_L, indices_L = DataLoadUtil.voxelize_points(point_cloud_L, voxel_size)
voxels_R, _ = DataLoadUtil.voxelize_points(point_cloud_R, voxel_size)
voxel_indices_L = voxels_L.view([('', voxels_L.dtype)]*3)
voxel_indices_R = voxels_R.view([('', voxels_R.dtype)]*3)
overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
mask_L = np.isin(indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0])
overlapping_points = point_cloud_L[mask_L]
return overlapping_points
@staticmethod @staticmethod
def load_points_normals(root, scene_name, display_table_as_world_space_origin=True): def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
points_path = os.path.join(root, scene_name, "points_and_normals.txt") points_path = os.path.join(root, scene_name, "points_and_normals.txt")
points_normals = np.loadtxt(points_path) points_normals = np.loadtxt(points_path)
if display_table_as_world_space_origin: if display_table_as_world_space_origin:
points_normals[:,:3] = points_normals[:,:3] - DataLoadUtil.DISPLAY_TABLE_POSITION points_normals[:, :3] = points_normals[
return points_normals :, :3
] - DataLoadUtil.get_display_table_top(root, scene_name)
return points_normals

87
pts.py
View File

@@ -1,6 +1,7 @@
import numpy as np import numpy as np
import open3d as o3d import open3d as o3d
import torch import torch
from scipy.spatial import cKDTree
class PtsUtil: class PtsUtil:
@@ -11,6 +12,49 @@ class PtsUtil:
downsampled_pc = o3d_pc.voxel_down_sample(voxel_size) downsampled_pc = o3d_pc.voxel_down_sample(voxel_size)
return np.asarray(downsampled_pc.points) return np.asarray(downsampled_pc.points)
@staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):
if point_cloud.shape[0] == 0:
if require_idx:
return point_cloud, np.array([])
return point_cloud
idx = np.random.choice(len(point_cloud), num_points, replace=True)
if require_idx:
return point_cloud[idx], idx
return point_cloud[idx]
@staticmethod
def fps_downsample_point_cloud(point_cloud, num_points, require_idx=False):
N = point_cloud.shape[0]
mask = np.zeros(N, dtype=bool)
sampled_indices = np.zeros(num_points, dtype=int)
sampled_indices[0] = np.random.randint(0, N)
distances = np.linalg.norm(point_cloud - point_cloud[sampled_indices[0]], axis=1)
for i in range(1, num_points):
farthest_index = np.argmax(distances)
sampled_indices[i] = farthest_index
mask[farthest_index] = True
new_distances = np.linalg.norm(point_cloud - point_cloud[farthest_index], axis=1)
distances = np.minimum(distances, new_distances)
sampled_points = point_cloud[sampled_indices]
if require_idx:
return sampled_points, sampled_indices
return sampled_points
@staticmethod
def random_downsample_point_cloud_tensor(point_cloud, num_points):
idx = torch.randint(0, len(point_cloud), (num_points,))
return point_cloud[idx]
@staticmethod
def voxelize_points(points, voxel_size):
voxel_indices = np.floor(points / voxel_size).astype(np.int32)
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
return unique_voxels
@staticmethod @staticmethod
def transform_point_cloud(points, pose_mat): def transform_point_cloud(points, pose_mat):
points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1) points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1)
@@ -18,13 +62,40 @@ class PtsUtil:
return points_h[:, :3] return points_h[:, :3]
@staticmethod @staticmethod
def random_downsample_point_cloud(point_cloud, num_points): def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005, require_idx=False):
if point_cloud.shape[0] == 0: voxels_L, indices_L = PtsUtil.voxelize_points(point_cloud_L, voxel_size)
return point_cloud voxels_R, _ = PtsUtil.voxelize_points(point_cloud_R, voxel_size)
idx = np.random.choice(len(point_cloud), num_points, replace=True)
return point_cloud[idx] voxel_indices_L = voxels_L.view([("", voxels_L.dtype)] * 3)
voxel_indices_R = voxels_R.view([("", voxels_R.dtype)] * 3)
overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
mask_L = np.isin(
indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0]
)
overlapping_points = point_cloud_L[mask_L]
if require_idx:
return overlapping_points, mask_L
return overlapping_points
@staticmethod @staticmethod
def random_downsample_point_cloud_tensor(point_cloud, num_points): def filter_points(points, points_normals, cam_pose, voxel_size=0.002, theta=45, z_range=(0.2, 0.45)):
idx = torch.randint(0, len(point_cloud), (num_points,))
return point_cloud[idx] """ filter with z range """
points_cam = PtsUtil.transform_point_cloud(points, np.linalg.inv(cam_pose))
idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
z_filtered_points = points[idx]
""" filter with normal """
sampled_points = PtsUtil.voxel_downsample_point_cloud(z_filtered_points, voxel_size)
kdtree = cKDTree(points_normals[:,:3])
_, indices = kdtree.query(sampled_points)
nearest_points = points_normals[indices]
normals = nearest_points[:, 3:]
camera_axis = -cam_pose[:3, 2]
normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
cos_theta = np.dot(normals_normalized, camera_axis)
theta_rad = np.deg2rad(theta)
idx = cos_theta > np.cos(theta_rad)
filtered_sampled_points= sampled_points[idx]
return filtered_sampled_points[:, :3]

View File

@@ -18,6 +18,7 @@ class ReconstructionUtil:
@staticmethod @staticmethod
def filter_points(points, points_normals, cam_pose, voxel_size=0.005, theta=45): def filter_points(points, points_normals, cam_pose, voxel_size=0.005, theta=45):
sampled_points = PtsUtil.voxel_downsample_point_cloud(points, voxel_size) sampled_points = PtsUtil.voxel_downsample_point_cloud(points, voxel_size)
#sampled_points = points
kdtree = cKDTree(points_normals[:,:3]) kdtree = cKDTree(points_normals[:,:3])
_, indices = kdtree.query(sampled_points) _, indices = kdtree.query(sampled_points)
nearest_points = points_normals[indices] nearest_points = points_normals[indices]