first commit

2025-05-13 09:03:38 +08:00
commit b98753bfbb
121 changed files with 8665 additions and 0 deletions
--- a/utils/pycache/control.cpython-39.pyc
+++ b/utils/pycache/control.cpython-39.pyc
--- a/utils/pycache/data_load.cpython-39.pyc
+++ b/utils/pycache/data_load.cpython-39.pyc
--- a/utils/pycache/pose.cpython-39.pyc
+++ b/utils/pycache/pose.cpython-39.pyc
--- a/utils/pycache/pts.cpython-39.pyc
+++ b/utils/pycache/pts.cpython-39.pyc
--- a/utils/pycache/reconstruction.cpython-39.pyc
+++ b/utils/pycache/reconstruction.cpython-39.pyc
--- a/utils/pycache/render.cpython-39.pyc
+++ b/utils/pycache/render.cpython-39.pyc
--- a/utils/pycache/vis.cpython-39.pyc
+++ b/utils/pycache/vis.cpython-39.pyc
--- a/utils/control.py
+++ b/utils/control.py
@@ -0,0 +1,59 @@
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+import time
+
+class ControlUtil:
+    
+    curr_rotation = 0
+    
+    @staticmethod
+    def check_limit(new_cam_to_world):
+        if new_cam_to_world[0,3] < 0 or new_cam_to_world[1,3] > 0:
+        # if new_cam_to_world[0,3] > 0:
+            return False
+        x = abs(new_cam_to_world[0,3])
+        y = abs(new_cam_to_world[1,3])
+        tan_y_x = y/x
+        min_angle = 0 / 180 * np.pi
+        max_angle = 90 / 180 * np.pi
+        if tan_y_x < np.tan(min_angle) or tan_y_x > np.tan(max_angle):
+            return False
+         
+        return True
+    
+    @staticmethod
+    def solve_display_table_rot_and_cam_to_world(cam_to_world: np.ndarray) -> tuple:   
+        if ControlUtil.check_limit(cam_to_world):
+            return 0, cam_to_world
+        else:
+            min_display_table_rot = 180
+            min_new_cam_to_world = None
+            for display_table_rot in np.linspace(0.1,360, 1800):
+                new_world_to_world = ControlUtil.get_z_axis_rot_mat(display_table_rot)
+                new_cam_to_new_world = cam_to_world
+                new_cam_to_world = new_world_to_world @ new_cam_to_new_world
+                
+                if ControlUtil.check_limit(new_cam_to_world):
+                    if display_table_rot < min_display_table_rot:
+                        min_display_table_rot, min_new_cam_to_world = display_table_rot, new_cam_to_world
+                    if abs(display_table_rot - 360) < min_display_table_rot:
+                        min_display_table_rot, min_new_cam_to_world = display_table_rot - 360, new_cam_to_world
+        
+        if min_new_cam_to_world is None:
+            raise ValueError("No valid display table rotation found")
+        
+        delta_degree = min_display_table_rot - ControlUtil.curr_rotation
+        ControlUtil.curr_rotation = min_display_table_rot
+        return delta_degree, min_new_cam_to_world
+                
+    @staticmethod
+    def get_z_axis_rot_mat(degree):
+        radian = np.radians(degree)
+        return np.array([
+            [np.cos(radian), -np.sin(radian), 0, 0],
+            [np.sin(radian), np.cos(radian), 0, 0],
+            [0, 0, 1, 0],
+            [0, 0, 0, 1]
+            ])
+            
+            
--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -0,0 +1,391 @@
+import os
+import numpy as np
+import json
+import cv2
+import trimesh
+import torch
+import OpenEXR
+import Imath
+from utils.pts import PtsUtil
+
+
+class DataLoadUtil:
+    TABLE_POSITION = np.asarray([0, 0, 0.8215])
+    
+    @staticmethod
+    def load_exr_image(file_path):
+        exr_file = OpenEXR.InputFile(file_path)
+        header = exr_file.header()
+        dw = header['dataWindow']
+        width = dw.max.x - dw.min.x + 1
+        height = dw.max.y - dw.min.y + 1
+        float_channels = ['R', 'G', 'B']
+        img_data = []
+        for channel in float_channels:
+            channel_data = exr_file.channel(channel)
+            img_data.append(np.frombuffer(channel_data, dtype=np.float16).reshape((height, width)))
+        img = np.stack(img_data, axis=-1)
+        return img
+
+    @staticmethod
+    def get_display_table_info(root, scene_name):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        display_table_info = scene_info["display_table"]
+        return display_table_info
+
+    @staticmethod
+    def get_display_table_top(root, scene_name):
+        display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
+            "height"
+        ]
+        display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
+            [0, 0, display_table_height]
+        )
+        return display_table_top
+
+    @staticmethod
+    def get_path(root, scene_name, frame_idx):
+        path = os.path.join(root, scene_name, f"{frame_idx}")
+        return path
+
+    @staticmethod
+    def get_label_num(root, scene_name):
+        label_dir = os.path.join(root, scene_name, "label")
+        if not os.path.exists(label_dir):
+            return 0
+        return len(os.listdir(label_dir))
+
+    @staticmethod
+    def get_label_path(root, scene_name, seq_idx):
+        label_dir = os.path.join(root, scene_name, "label")
+        if not os.path.exists(label_dir):
+            os.makedirs(label_dir)
+        path = os.path.join(label_dir, f"{seq_idx}.json")
+        return path
+
+    @staticmethod
+    def get_scene_seq_length(root, scene_name):
+        camera_params_path = os.path.join(root, scene_name, "camera_params")
+        return len(os.listdir(camera_params_path))
+
+    @staticmethod
+    def load_mesh_at(model_dir, object_name, world_object_pose):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        mesh.apply_transform(world_object_pose)
+        return mesh
+
+    @staticmethod
+    def get_bbox_diag(model_dir, object_name):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        bbox = mesh.bounding_box.extents
+        diagonal_length = np.linalg.norm(bbox)
+        return diagonal_length
+
+    @staticmethod
+    def load_scene_info(root, scene_name):
+        scene_info_path = os.path.join(root, scene_name, "scene_info.json")
+        with open(scene_info_path, "r") as f:
+            scene_info = json.load(f)
+        return scene_info
+
+    @staticmethod
+    def load_target_pts_num_dict(root, scene_name):
+        target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
+        with open(target_pts_num_path, "r") as f:
+            target_pts_num_dict = json.load(f)
+        return target_pts_num_dict
+
+    @staticmethod
+    def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
+
+        def load_depth_from_real_path(real_path, min_depth, max_depth):
+            depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
+            depth = depth.astype(np.float32) / 65535.0
+            min_depth = min_depth
+            max_depth = max_depth
+            depth_meters = min_depth + (max_depth - min_depth) * depth
+            return depth_meters
+
+        if binocular:
+            depth_path_L = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
+            )
+            depth_path_R = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
+            )
+            depth_meters_L = load_depth_from_real_path(
+                depth_path_L, min_depth, max_depth
+            )
+            depth_meters_R = load_depth_from_real_path(
+                depth_path_R, min_depth, max_depth
+            )
+            return depth_meters_L, depth_meters_R
+        else:
+            depth_path = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + ".png"
+            )
+            depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
+            return depth_meters
+
+    @staticmethod
+    def load_seg(path, binocular=False, left_only=False):
+        if binocular and not left_only:
+
+            def clean_mask(mask_image):
+                green = [0, 255, 0]
+                red = [255, 0, 0]
+                threshold = 2
+                mask_image = np.where(
+                    np.abs(mask_image - green) <= threshold, green, mask_image
+                )
+                mask_image = np.where(
+                    np.abs(mask_image - red) <= threshold, red, mask_image
+                )
+                return mask_image
+
+            mask_path_L = os.path.join(
+                os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
+            )
+            mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
+            mask_path_R = os.path.join(
+                os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
+            )
+            mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
+            return mask_image_L, mask_image_R
+        else:
+            if binocular and left_only:
+                mask_path = os.path.join(
+                    os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
+                )
+            else:
+                mask_path = os.path.join(
+                    os.path.dirname(path), "mask", os.path.basename(path) + ".png"
+                )
+            mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
+            return mask_image
+        
+    @staticmethod
+    def load_normal(path, binocular=False, left_only=False, file_type="exr"):
+        if binocular and not left_only:
+            normal_path_L = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
+            )
+            normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
+            
+            normal_path_R = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
+            )
+            normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
+            normalized_normal_image_L = normal_image_L * 2.0 - 1.0
+            normalized_normal_image_R = normal_image_R * 2.0 - 1.0
+            return normalized_normal_image_L, normalized_normal_image_R
+        else:
+            if binocular and left_only:
+                normal_path = os.path.join(
+                    os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
+                )
+            else:
+                normal_path = os.path.join(
+                    os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
+                )
+            normal_image = DataLoadUtil.load_exr_image(normal_path)
+            normalized_normal_image = normal_image * 2.0 - 1.0
+            return normalized_normal_image
+
+    @staticmethod
+    def load_label(path):
+        with open(path, "r") as f:
+            label_data = json.load(f)
+        return label_data
+
+    @staticmethod
+    def load_from_preprocessed_pts(path, file_type="npy"):
+        npy_path = os.path.join(
+            os.path.dirname(path), "pts", os.path.basename(path) + "." + file_type
+        )
+        if file_type == "txt":
+            pts = np.loadtxt(npy_path)
+        else:
+            pts = np.load(npy_path)
+        return pts
+    
+    @staticmethod
+    def load_from_preprocessed_nrm(path, file_type="npy"):
+        npy_path = os.path.join(
+            os.path.dirname(path), "nrm", os.path.basename(path) + "." + file_type
+        )
+        if file_type == "txt":
+            nrm = np.loadtxt(npy_path)
+        else:
+            nrm = np.load(npy_path)
+        return nrm
+
+    @staticmethod
+    def cam_pose_transformation(cam_pose_before):
+        offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+        cam_pose_after = cam_pose_before @ offset
+        return cam_pose_after
+
+    @staticmethod
+    def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
+        scene_dir = os.path.dirname(path)
+        root_dir = os.path.dirname(scene_dir)
+        scene_name = os.path.basename(scene_dir)
+        camera_params_path = os.path.join(
+            os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
+        )
+        with open(camera_params_path, "r") as f:
+            label_data = json.load(f)
+        cam_to_world = np.asarray(label_data["extrinsic"])
+        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
+        
+        if display_table_as_world_space_origin:
+            world_to_display_table = np.eye(4)
+            world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
+                root_dir, scene_name
+            )
+            cam_to_world = np.dot(world_to_display_table, cam_to_world)
+        cam_intrinsic = np.asarray(label_data["intrinsic"])
+        cam_info = {
+            "cam_to_world": cam_to_world,
+            "cam_intrinsic": cam_intrinsic,
+            "far_plane": label_data["far_plane"],
+            "near_plane": label_data["near_plane"],
+        }
+        if binocular:
+            cam_to_world_R = np.asarray(label_data["extrinsic_R"])
+            cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
+            cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
+            cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
+            if display_table_as_world_space_origin:
+                cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
+                cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
+            cam_info["cam_to_world_O"] = cam_to_world_O
+            cam_info["cam_to_world_R"] = cam_to_world_R
+        return cam_info
+
+    @staticmethod
+    def get_real_cam_O_from_cam_L(
+        cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
+    ):
+        root_dir = os.path.dirname(scene_path)
+        scene_name = os.path.basename(scene_path)
+        if isinstance(cam_L, torch.Tensor):
+            cam_L = cam_L.cpu().numpy()
+        nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
+        if display_table_as_world_space_origin:
+            display_table_to_world = np.eye(4)
+            display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
+                root_dir, scene_name
+            )
+            nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
+        nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
+        return nO_to_world_pose
+
+    @staticmethod
+    def get_target_point_cloud(
+        depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
+    ):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        mask = mask.reshape(-1, 4)
+
+        target_mask = (mask == target_mask_label).all(axis=-1)
+
+        target_points_camera = points_camera[target_mask]
+        target_points_camera_aug = np.concatenate(
+            [target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
+        )
+
+        target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
+        data = {
+            "points_world": target_points_world,
+            "points_camera": target_points_camera,
+        }
+        return data
+
+    @staticmethod
+    def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        points_camera_aug = np.concatenate(
+            [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
+        )
+
+        points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+        return {"points_world": points_world, "points_camera": points_camera}
+
+    @staticmethod
+    def get_target_point_cloud_world_from_path(
+        path,
+        binocular=False,
+        random_downsample_N=65536,
+        voxel_size=0.005,
+        target_mask_label=(0, 255, 0, 255),
+        display_table_mask_label=(0, 0, 255, 255),
+        get_display_table_pts=False,
+        require_normal=False,
+    ):
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
+        if binocular:
+            depth_L, depth_R = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
+            )
+            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+            point_cloud_L = DataLoadUtil.get_target_point_cloud(
+                depth_L,
+                cam_info["cam_intrinsic"],
+                cam_info["cam_to_world"],
+                mask_L,
+                target_mask_label,
+            )["points_world"]
+            point_cloud_R = DataLoadUtil.get_target_point_cloud(
+                depth_R,
+                cam_info["cam_intrinsic"],
+                cam_info["cam_to_world_R"],
+                mask_R,
+                target_mask_label,
+            )["points_world"]
+            point_cloud_L = PtsUtil.random_downsample_point_cloud(
+                point_cloud_L, random_downsample_N
+            )
+            point_cloud_R = PtsUtil.random_downsample_point_cloud(
+                point_cloud_R, random_downsample_N
+            )
+            overlap_points = PtsUtil.get_overlapping_points(
+                point_cloud_L, point_cloud_R, voxel_size
+            )
+            return overlap_points
+        else:
+            depth = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], cam_info["far_plane"]
+            )
+            mask = DataLoadUtil.load_seg(path)
+            point_cloud = DataLoadUtil.get_target_point_cloud(
+                depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
+            )["points_world"]
+            return point_cloud
+
+    @staticmethod
+    def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
+        points_path = os.path.join(root, scene_name, "points_and_normals.txt")
+        points_normals = np.loadtxt(points_path)
+        if display_table_as_world_space_origin:
+            points_normals[:, :3] = points_normals[
+                :, :3
+            ] - DataLoadUtil.get_display_table_top(root, scene_name)
+        return points_normals
--- a/utils/pose.py
+++ b/utils/pose.py
@@ -0,0 +1,253 @@
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+
+class PoseUtil:
+    ROTATION = 1
+    TRANSLATION = 2
+    SCALE = 3
+
+    @staticmethod
+    def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False):
+        if isinstance(trans_m_min, list):
+            x_min, y_min, z_min = trans_m_min
+            x_max, y_max, z_max = trans_m_max
+        else:
+            x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min
+            x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max
+
+        x = np.random.uniform(x_min, x_max)
+        y = np.random.uniform(y_min, y_max)
+        z = np.random.uniform(z_min, z_max)
+        translation = np.array([x, y, z])
+        if trans_unit == "cm":
+            translation = translation / 100
+        if debug:
+            print("uniform translation:", translation)
+        return translation
+
+    @staticmethod
+    def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False):
+        axis = np.random.randn(3)
+        axis /= np.linalg.norm(axis)
+        theta = np.random.uniform(
+            rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi
+        )
+
+        K = np.array(
+            [[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]]
+        )
+        R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K)
+        if debug:
+            print("uniform rotation:", theta * 180 / np.pi)
+        return R
+
+    @staticmethod
+    def get_uniform_pose(
+        trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False
+    ):
+        translation = PoseUtil.get_uniform_translation(
+            trans_min, trans_max, trans_unit, debug
+        )
+        rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+        pose = np.eye(4)
+        pose[:3, :3] = rotation
+        pose[:3, 3] = translation
+        return pose
+
+    @staticmethod
+    def get_n_uniform_pose(
+        trans_min,
+        trans_max,
+        rot_min=0,
+        rot_max=180,
+        n=1,
+        trans_unit="cm",
+        fix=None,
+        contain_canonical=True,
+        debug=False,
+    ):
+        if fix == PoseUtil.ROTATION:
+            translations = np.zeros((n, 3))
+            for i in range(n):
+                translations[i] = PoseUtil.get_uniform_translation(
+                    trans_min, trans_max, trans_unit, debug
+                )
+            if contain_canonical:
+                translations[0] = np.zeros(3)
+            rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+        elif fix == PoseUtil.TRANSLATION:
+            rotations = np.zeros((n, 3, 3))
+            for i in range(n):
+                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+            if contain_canonical:
+                rotations[0] = np.eye(3)
+            translations = PoseUtil.get_uniform_translation(
+                trans_min, trans_max, trans_unit, debug
+            )
+        else:
+            translations = np.zeros((n, 3))
+            rotations = np.zeros((n, 3, 3))
+            for i in range(n):
+                translations[i] = PoseUtil.get_uniform_translation(
+                    trans_min, trans_max, trans_unit, debug
+                )
+            for i in range(n):
+                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+            if contain_canonical:
+                translations[0] = np.zeros(3)
+                rotations[0] = np.eye(3)
+
+        pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0)
+        pose[:, :3, :3] = rotations
+        pose[:, :3, 3] = translations
+
+        return pose
+
+    @staticmethod
+    def get_n_uniform_pose_batch(
+        trans_min,
+        trans_max,
+        rot_min=0,
+        rot_max=180,
+        n=1,
+        batch_size=1,
+        trans_unit="cm",
+        fix=None,
+        contain_canonical=False,
+        debug=False,
+    ):
+
+        batch_poses = []
+        for i in range(batch_size):
+            pose = PoseUtil.get_n_uniform_pose(
+                trans_min,
+                trans_max,
+                rot_min,
+                rot_max,
+                n,
+                trans_unit,
+                fix,
+                contain_canonical,
+                debug,
+            )
+            batch_poses.append(pose)
+        pose_batch = np.stack(batch_poses, axis=0)
+        return pose_batch
+
+    @staticmethod
+    def get_uniform_scale(scale_min, scale_max, debug=False):
+        if isinstance(scale_min, list):
+            x_min, y_min, z_min = scale_min
+            x_max, y_max, z_max = scale_max
+        else:
+            x_min, y_min, z_min = scale_min, scale_min, scale_min
+            x_max, y_max, z_max = scale_max, scale_max, scale_max
+
+        x = np.random.uniform(x_min, x_max)
+        y = np.random.uniform(y_min, y_max)
+        z = np.random.uniform(z_min, z_max)
+        scale = np.array([x, y, z])
+        if debug:
+            print("uniform scale:", scale)
+        return scale
+
+    @staticmethod
+    def normalize_rotation(rotation, rotation_mode):
+        if rotation_mode == "quat_wxyz" or rotation_mode == "quat_xyzw":
+            rotation /= torch.norm(rotation, dim=-1, keepdim=True)
+        elif rotation_mode == "rot_matrix":
+            rot_matrix = PoseUtil.rotation_6d_to_matrix_tensor_batch(rotation)
+            rotation[:, :3] = rot_matrix[:, 0, :]
+            rotation[:, 3:6] = rot_matrix[:, 1, :]
+        elif rotation_mode == "euler_xyz_sx_cx":
+            rot_sin_theta = rotation[:, :3]
+            rot_cos_theta = rotation[:, 3:6]
+            theta = torch.atan2(rot_sin_theta, rot_cos_theta)
+            rotation[:, :3] = torch.sin(theta)
+            rotation[:, 3:6] = torch.cos(theta)
+        elif rotation_mode == "euler_xyz":
+            pass
+        else:
+            raise NotImplementedError
+        return rotation
+
+    @staticmethod
+    def get_pose_dim(rot_mode):
+        assert rot_mode in [
+            "quat_wxyz",
+            "quat_xyzw",
+            "euler_xyz",
+            "euler_xyz_sx_cx",
+            "rot_matrix",
+        ], f"the rotation mode {rot_mode} is not supported!"
+
+        if rot_mode == "quat_wxyz" or rot_mode == "quat_xyzw":
+            pose_dim = 7
+        elif rot_mode == "euler_xyz":
+            pose_dim = 6
+        elif rot_mode == "euler_xyz_sx_cx" or rot_mode == "rot_matrix":
+            pose_dim = 9
+        else:
+            raise NotImplementedError
+        return pose_dim
+
+    @staticmethod
+    def rotation_6d_to_matrix_tensor_batch(d6: torch.Tensor) -> torch.Tensor:
+
+        a1, a2 = d6[..., :3], d6[..., 3:]
+        b1 = F.normalize(a1, dim=-1)
+        b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
+        b2 = F.normalize(b2, dim=-1)
+        b3 = torch.cross(b1, b2, dim=-1)
+        return torch.stack((b1, b2, b3), dim=-2)
+
+    @staticmethod
+    def matrix_to_rotation_6d_tensor_batch(matrix: torch.Tensor) -> torch.Tensor:
+        batch_dim = matrix.size()[:-2]
+        return matrix[..., :2, :].clone().reshape(batch_dim + (6,))
+
+    @staticmethod
+    def rotation_6d_to_matrix_numpy(d6):
+        a1, a2 = d6[:3], d6[3:]
+        b1 = a1 / np.linalg.norm(a1)
+        b2 = a2 - np.dot(b1, a2) * b1
+        b2 = b2 / np.linalg.norm(b2)
+        b3 = np.cross(b1, b2)
+        return np.stack((b1, b2, b3), axis=-2)
+
+    @staticmethod
+    def matrix_to_rotation_6d_numpy(matrix):
+        return np.copy(matrix[:2, :]).reshape((6,))
+    
+    @staticmethod
+    def rotation_angle_distance(R1, R2):
+        R = torch.matmul(R1, R2.transpose(1, 2))
+        trace = torch.diagonal(R, dim1=1, dim2=2).sum(-1)
+        angle = torch.acos(torch.clamp((trace - 1) / 2, -1.0, 1.0))/torch.pi*180
+        return angle
+
+
+""" ------------ Debug ------------ """
+
+if __name__ == "__main__":
+    for _ in range(1):
+        PoseUtil.get_uniform_pose(
+            trans_min=[-25, -25, 10],
+            trans_max=[25, 25, 60],
+            rot_min=0,
+            rot_max=10,
+            debug=True,
+        )
+        PoseUtil.get_uniform_scale(scale_min=0.25, scale_max=0.30, debug=True)
+    PoseUtil.get_n_uniform_pose_batch(
+        trans_min=[-25, -25, 10],
+        trans_max=[25, 25, 60],
+        rot_min=0,
+        rot_max=10,
+        batch_size=2,
+        n=2,
+        fix=PoseUtil.TRANSLATION,
+        debug=True,
+    )
--- a/utils/pts.py
+++ b/utils/pts.py
@@ -0,0 +1,117 @@
+import numpy as np
+import open3d as o3d
+import torch
+
+class PtsUtil:
+
+    @staticmethod
+    def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005, require_idx=False):
+        voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
+        if require_idx:
+            _, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
+            idx_sort = np.argsort(inverse)
+            idx_unique = idx_sort[np.cumsum(counts)-counts]
+            downsampled_points = point_cloud[idx_unique]
+            return downsampled_points, idx_unique
+        else:
+            unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
+            return unique_voxels[0]*voxel_size
+        
+    @staticmethod
+    def voxel_downsample_point_cloud_random(point_cloud, voxel_size=0.005, require_idx=False):
+        voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
+        unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
+        idx_sort = np.argsort(inverse)
+        idx_unique = idx_sort[np.cumsum(counts)-counts]
+        downsampled_points = point_cloud[idx_unique]
+        if require_idx:
+            return downsampled_points, inverse
+        return downsampled_points
+    
+    @staticmethod
+    def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):
+        if point_cloud.shape[0] == 0:
+            if require_idx:
+                return point_cloud, np.array([])
+            return point_cloud
+        idx = np.random.choice(len(point_cloud), num_points, replace=True)
+        if require_idx:
+            return point_cloud[idx], idx
+        return point_cloud[idx]
+    
+    @staticmethod
+    def fps_downsample_point_cloud(point_cloud, num_points, require_idx=False):
+        N = point_cloud.shape[0]
+        mask = np.zeros(N, dtype=bool)
+
+        sampled_indices = np.zeros(num_points, dtype=int)
+        sampled_indices[0] = np.random.randint(0, N)
+        distances = np.linalg.norm(point_cloud - point_cloud[sampled_indices[0]], axis=1)
+        for i in range(1, num_points):
+            farthest_index = np.argmax(distances)
+            sampled_indices[i] = farthest_index
+            mask[farthest_index] = True
+
+            new_distances = np.linalg.norm(point_cloud - point_cloud[farthest_index], axis=1)
+            distances = np.minimum(distances, new_distances)
+        
+        sampled_points = point_cloud[sampled_indices]
+        if require_idx:
+            return sampled_points, sampled_indices
+        return sampled_points
+        
+    @staticmethod
+    def random_downsample_point_cloud_tensor(point_cloud, num_points):
+        idx = torch.randint(0, len(point_cloud), (num_points,))
+        return point_cloud[idx]
+    
+    @staticmethod
+    def voxelize_points(points, voxel_size):
+        voxel_indices = np.floor(points / voxel_size).astype(np.int32)
+        unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
+        return unique_voxels
+
+    @staticmethod
+    def transform_point_cloud(points, pose_mat):
+        points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1)
+        points_h = np.dot(pose_mat, points_h.T).T
+        return points_h[:, :3]
+    
+    @staticmethod
+    def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005, require_idx=False):
+        voxels_L, indices_L = PtsUtil.voxelize_points(point_cloud_L, voxel_size)
+        voxels_R, _ = PtsUtil.voxelize_points(point_cloud_R, voxel_size)
+
+        voxel_indices_L = voxels_L.view([("", voxels_L.dtype)] * 3)
+        voxel_indices_R = voxels_R.view([("", voxels_R.dtype)] * 3)
+        overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
+        mask_L = np.isin(
+            indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0]
+        )
+        overlapping_points = point_cloud_L[mask_L]
+        if require_idx:
+            return overlapping_points, mask_L
+        return overlapping_points
+    
+    @staticmethod
+    def filter_points(points, normals, cam_pose, theta_limit=45, z_range=(0.2, 0.45)):
+        
+        """ filter with normal """ 
+        normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
+        cos_theta = np.dot(normals_normalized, np.array([0, 0, 1]))
+        theta = np.arccos(cos_theta) * 180 / np.pi
+        idx = theta < theta_limit
+        filtered_sampled_points = points[idx]
+        filtered_normals = normals[idx]
+        
+        """ filter with z range """
+        points_cam = PtsUtil.transform_point_cloud(filtered_sampled_points, np.linalg.inv(cam_pose))
+        idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
+        z_filtered_points = filtered_sampled_points[idx]
+        z_filtered_normals = filtered_normals[idx]
+        return z_filtered_points[:, :3], z_filtered_normals
+    
+    @staticmethod
+    def point_to_hash(point, voxel_size):
+        return tuple(np.floor(point / voxel_size).astype(int))
+    
--- a/utils/reconstruction.py
+++ b/utils/reconstruction.py
@@ -0,0 +1,267 @@
+import numpy as np
+from scipy.spatial import cKDTree
+from utils.pts import PtsUtil
+
+class ReconstructionUtil:
+
+    @staticmethod
+    def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
+        kdtree = cKDTree(combined_point_cloud)
+        distances, _ = kdtree.query(target_point_cloud)
+        covered_points_num = np.sum(distances < threshold*2)
+        coverage_rate = covered_points_num / target_point_cloud.shape[0]
+        return coverage_rate, covered_points_num
+    
+    @staticmethod
+    def compute_coverage_rate_with_normal(target_point_cloud, combined_point_cloud, target_normal, combined_normal, threshold=0.01, normal_threshold=0.1):
+        kdtree = cKDTree(combined_point_cloud)
+        distances, indices = kdtree.query(target_point_cloud)
+        is_covered_by_distance = distances < threshold*2
+        normal_dots = np.einsum('ij,ij->i', target_normal, combined_normal[indices])
+        is_covered_by_normal = normal_dots > normal_threshold
+
+        pts_nrm_target = np.hstack([target_point_cloud, target_normal])
+        np.savetxt("pts_nrm_target.txt",  pts_nrm_target)
+        pts_nrm_combined = np.hstack([combined_point_cloud, combined_normal])
+        np.savetxt("pts_nrm_combined.txt", pts_nrm_combined)
+        import ipdb; ipdb.set_trace()
+        covered_points_num = np.sum(is_covered_by_distance & is_covered_by_normal)
+        coverage_rate = covered_points_num / target_point_cloud.shape[0]
+
+        return coverage_rate, covered_points_num
+        
+    
+    @staticmethod
+    def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01, require_new_added_pts_num=False):
+        kdtree = cKDTree(combined_point_cloud)
+        distances, _ = kdtree.query(new_point_cloud)
+        overlapping_points_num = np.sum(distances < voxel_size*2)
+        cm = 0.01
+        voxel_size_cm = voxel_size / cm
+        overlap_area = overlapping_points_num * voxel_size_cm * voxel_size_cm
+        if require_new_added_pts_num:
+            return overlap_area > overlap_area_threshold, len(new_point_cloud)-np.sum(distances < voxel_size*1.2)
+        return overlap_area > overlap_area_threshold
+
+
+    @staticmethod
+    def get_new_added_points(old_combined_pts, new_pts, threshold=0.005):
+        if old_combined_pts.size == 0:
+            return new_pts
+        if new_pts.size == 0:
+            return np.array([])
+
+        tree = cKDTree(old_combined_pts)
+        distances, _ = tree.query(new_pts, k=1)
+        new_added_points = new_pts[distances > threshold]
+        return new_added_points
+    
+    @staticmethod
+    def compute_next_best_view_sequence(target_point_cloud, point_cloud_list, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, init_view = 0, scan_points_threshold=5, status_info=None):
+        selected_views = [init_view]
+        combined_point_cloud = point_cloud_list[init_view]
+        history_indices = [scan_points_indices_list[init_view]]
+        
+        max_rec_pts = np.vstack(point_cloud_list)
+        downsampled_max_rec_pts = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold)
+        combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud, threshold)
+        max_rec_pts_num = downsampled_max_rec_pts.shape[0]
+        max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate(target_point_cloud, downsampled_max_rec_pts, threshold)
+        
+        new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, combined_point_cloud, threshold)
+        current_coverage = new_coverage
+        current_covered_num = new_covered_num
+        
+        remaining_views = list(range(len(point_cloud_list)))
+        view_sequence = [(init_view, current_coverage)]
+        cnt_processed_view = 0
+        remaining_views.remove(init_view)
+        curr_rec_pts_num = combined_point_cloud.shape[0]
+        drop_output_ratio = 0.4
+        
+        import time
+        while remaining_views:
+            best_view = None
+            best_coverage_increase = -1
+            best_combined_point_cloud = None
+            best_covered_num = 0
+        
+            for view_index in remaining_views:
+                if np.random.rand() < drop_output_ratio:
+                    continue
+                if point_cloud_list[view_index].shape[0] == 0:
+                    continue
+                if selected_views:
+                    new_scan_points_indices = scan_points_indices_list[view_index]
+                    if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
+                        curr_overlap_area_threshold = overlap_area_threshold
+                    else:
+                        curr_overlap_area_threshold = overlap_area_threshold * 0.5
+                        
+                    if not ReconstructionUtil.check_overlap(point_cloud_list[view_index], combined_point_cloud, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
+                        continue
+                
+                new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
+                new_downsampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold)
+                new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, threshold)
+                coverage_increase = new_coverage - current_coverage
+                if coverage_increase > best_coverage_increase:
+                    best_coverage_increase = coverage_increase
+                    best_view = view_index
+                    best_covered_num = new_covered_num
+                    best_combined_point_cloud = new_downsampled_combined_point_cloud
+                    
+            
+            if best_view is not None:
+                if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
+                    break
+                
+                selected_views.append(best_view)
+                best_rec_pts_num = best_combined_point_cloud.shape[0]
+                print(f"Current rec pts num: {curr_rec_pts_num}, Best rec pts num: {best_rec_pts_num}, Best cover pts: {best_covered_num}, Max rec pts num: {max_rec_pts_num}")
+                print(f"Current coverage: {current_coverage+best_coverage_increase}, Best coverage increase: {best_coverage_increase}, Max Real coverage: {max_real_rec_pts_coverage}")
+                current_covered_num = best_covered_num
+                curr_rec_pts_num = best_rec_pts_num
+                combined_point_cloud = best_combined_point_cloud
+                remaining_views.remove(best_view)
+                history_indices.append(scan_points_indices_list[best_view])
+                current_coverage += best_coverage_increase
+                cnt_processed_view += 1
+                if status_info is not None:
+                    sm = status_info["status_manager"]
+                    app_name = status_info["app_name"]
+                    runner_name = status_info["runner_name"]
+                    sm.set_status(app_name, runner_name, "current coverage", current_coverage)
+                    sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list))
+            
+                view_sequence.append((best_view, current_coverage))
+                
+            else:
+                break
+        if status_info is not None:
+            sm = status_info["status_manager"]
+            app_name = status_info["app_name"]
+            runner_name = status_info["runner_name"]
+            sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
+        return view_sequence, remaining_views, combined_point_cloud
+    
+    @staticmethod
+    def compute_next_best_view_sequence_with_normal(target_point_cloud, target_normal, point_cloud_list, normal_list, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, init_view = 0, scan_points_threshold=5, status_info=None):
+        selected_views = [init_view]
+        combined_point_cloud = point_cloud_list[init_view]
+        combined_normal = normal_list[init_view]
+        history_indices = [scan_points_indices_list[init_view]]
+        
+        max_rec_pts = np.vstack(point_cloud_list)
+        max_rec_nrm = np.vstack(normal_list)
+        downsampled_max_rec_pts, idx = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold, require_idx=True)
+        downsampled_max_rec_nrm = max_rec_nrm[idx]  
+        max_rec_pts_num = downsampled_max_rec_pts.shape[0]
+        try:
+            max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate_with_normal(target_point_cloud, downsampled_max_rec_pts, target_normal, downsampled_max_rec_nrm, threshold)
+        except:
+            import ipdb; ipdb.set_trace()
+        
+        new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate_with_normal(downsampled_max_rec_pts, combined_point_cloud, downsampled_max_rec_nrm, combined_normal, threshold)
+        current_coverage = new_coverage
+        current_covered_num = new_covered_num
+        
+        remaining_views = list(range(len(point_cloud_list)))
+        view_sequence = [(init_view, current_coverage)]
+        cnt_processed_view = 0
+        remaining_views.remove(init_view)
+        curr_rec_pts_num = combined_point_cloud.shape[0]
+        
+        while remaining_views:
+            best_view = None
+            best_coverage_increase = -1
+            best_combined_point_cloud = None
+            best_combined_normal = None
+            best_covered_num = 0
+        
+            for view_index in remaining_views:
+                if point_cloud_list[view_index].shape[0] == 0:
+                    continue
+                if selected_views:
+                    new_scan_points_indices = scan_points_indices_list[view_index]
+                    if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
+                        curr_overlap_area_threshold = overlap_area_threshold
+                    else:
+                        curr_overlap_area_threshold = overlap_area_threshold * 0.5
+                        
+                    if not ReconstructionUtil.check_overlap(point_cloud_list[view_index], combined_point_cloud, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
+                        continue
+                
+                new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
+                new_combined_normal = np.vstack([combined_normal, normal_list[view_index]])
+                new_downsampled_combined_point_cloud, idx = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold, require_idx=True)
+                new_downsampled_combined_normal = new_combined_normal[idx]
+                new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate_with_normal(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, downsampled_max_rec_nrm, new_downsampled_combined_normal, threshold)
+                coverage_increase = new_coverage - current_coverage
+                if coverage_increase > best_coverage_increase:
+                    best_coverage_increase = coverage_increase
+                    best_view = view_index
+                    best_covered_num = new_covered_num
+                    best_combined_point_cloud = new_downsampled_combined_point_cloud
+                    best_combined_normal = new_downsampled_combined_normal
+                    
+            
+            if best_view is not None:
+                if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
+                    break
+                
+                selected_views.append(best_view)
+                best_rec_pts_num = best_combined_point_cloud.shape[0]
+                print(f"Current rec pts num: {curr_rec_pts_num}, Best rec pts num: {best_rec_pts_num}, Best cover pts: {best_covered_num}, Max rec pts num: {max_rec_pts_num}")
+                print(f"Current coverage: {current_coverage}, Best coverage increase: {best_coverage_increase}, Max Real coverage: {max_real_rec_pts_coverage}")
+                current_covered_num = best_covered_num
+                curr_rec_pts_num = best_rec_pts_num
+                combined_point_cloud = best_combined_point_cloud
+                combined_normal = best_combined_normal
+                remaining_views.remove(best_view)
+                history_indices.append(scan_points_indices_list[best_view])
+                current_coverage += best_coverage_increase
+                cnt_processed_view += 1
+                if status_info is not None:
+                    sm = status_info["status_manager"]
+                    app_name = status_info["app_name"]
+                    runner_name = status_info["runner_name"]
+                    sm.set_status(app_name, runner_name, "current coverage", current_coverage)
+                    sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list))
+            
+                view_sequence.append((best_view, current_coverage))
+                
+            else:
+                break
+        if status_info is not None:
+            sm = status_info["status_manager"]
+            app_name = status_info["app_name"]
+            runner_name = status_info["runner_name"]
+            sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
+        return view_sequence, remaining_views, combined_point_cloud
+
+    
+    @staticmethod
+    def generate_scan_points(display_table_top, display_table_radius, min_distance=0.03, max_points_num = 500, max_attempts = 1000):
+        points = []
+        attempts = 0
+        while len(points) < max_points_num and attempts < max_attempts:
+            angle = np.random.uniform(0, 2 * np.pi)
+            r = np.random.uniform(0, display_table_radius)
+            x = r * np.cos(angle)
+            y = r * np.sin(angle)
+            z = display_table_top
+            new_point = (x, y, z)
+            if all(np.linalg.norm(np.array(new_point) - np.array(existing_point)) >= min_distance for existing_point in points):
+                points.append(new_point)
+            attempts += 1
+        return points
+    
+    @staticmethod
+    def check_scan_points_overlap(history_indices, indices2, threshold=5):
+        for indices1 in history_indices:
+            if len(set(indices1).intersection(set(indices2))) >= threshold:
+                return True
+        return False
+
+        
--- a/utils/render.py
+++ b/utils/render.py
@@ -0,0 +1,136 @@
+
+import os
+import json
+import time
+import subprocess
+import tempfile
+import shutil
+import numpy as np
+from utils.data_load import DataLoadUtil
+from utils.reconstruction import ReconstructionUtil
+from utils.pts import PtsUtil
+class RenderUtil:
+    target_mask_label = (0, 255, 0)
+    display_table_mask_label = (0, 0, 255)
+    random_downsample_N = 32768
+    min_z = 0.2
+    max_z = 0.5
+
+    @staticmethod
+    def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N):
+        z = depth[mask]
+        i, j = np.nonzero(mask)
+        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        normal_camera = normal[mask].reshape(-1, 3)
+        sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
+                    points_camera, random_downsample_N, require_idx=True
+                )
+        if len(sampled_target_points) == 0:
+            return np.zeros((0, 3)), np.zeros((0, 3))
+        sampled_normal_camera  = normal_camera[idx]
+
+        points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
+        points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+        
+        return points_camera_world, sampled_normal_camera
+    
+    @staticmethod
+    def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
+        z = depth[mask]
+        i, j = np.nonzero(mask)
+        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        sampled_target_points = PtsUtil.random_downsample_point_cloud(
+                    points_camera, random_downsample_N
+                )
+        points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
+        points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+        
+        return points_camera_world
+    
+    @staticmethod
+    def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
+        scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
+        points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
+        points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
+        points_image_homogeneous /= points_image_homogeneous[:, 2:]
+        pixel_x = points_image_homogeneous[:, 0].astype(int)
+        pixel_y = points_image_homogeneous[:, 1].astype(int)
+        h, w = mask.shape[:2]
+        valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
+        mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
+        selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
+        selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
+        return selected_points_indices
+
+    @staticmethod
+    def render_pts(cam_pose, scene_path, script_path, scan_points, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
+        #import ipdb; ipdb.set_trace()
+        nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
+
+    
+        with tempfile.TemporaryDirectory() as temp_dir:
+            params = {
+                "cam_pose": nO_to_world_pose.tolist(),
+                "scene_path": scene_path
+            }
+            scene_info_path = os.path.join(scene_path, "scene_info.json")
+            shutil.copy(scene_info_path, os.path.join(temp_dir, "scene_info.json"))
+            params_data_path = os.path.join(temp_dir, "params.json")
+            with open(params_data_path, 'w') as f:
+                json.dump(params, f) 
+            result = subprocess.run([
+                '/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', script_path, '--', temp_dir
+            ], capture_output=True, text=True)
+            #print(result)
+            #import ipdb; ipdb.set_trace()
+            path = os.path.join(temp_dir, "tmp")
+            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+            depth_L, depth_R = DataLoadUtil.load_depth(
+                    path, cam_info["near_plane"], 
+                    cam_info["far_plane"], 
+                    binocular=True
+                )
+            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+            normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
+            ''' target points '''
+            mask_img_L = mask_L
+            mask_img_R = mask_R
+
+            target_mask_img_L = (mask_L == RenderUtil.target_mask_label).all(axis=-1)
+            target_mask_img_R = (mask_R == RenderUtil.target_mask_label).all(axis=-1)
+
+            
+            sampled_target_points_L, sampled_target_normal_L = RenderUtil.get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"],  RenderUtil.random_downsample_N)
+            sampled_target_points_R = RenderUtil.get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"],  RenderUtil.random_downsample_N  )
+
+
+            has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
+            if has_points:
+                target_points, overlap_idx = PtsUtil.get_overlapping_points(
+                        sampled_target_points_L, sampled_target_points_R, voxel_threshold, require_idx=True
+                    )
+                sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
+
+            if has_points:
+                has_points = target_points.shape[0] > 0
+
+            if has_points:
+                target_points, target_normals = PtsUtil.filter_points(
+                    target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(RenderUtil.min_z, RenderUtil.max_z)
+                    )
+                
+            
+            scan_points_indices_L = RenderUtil.get_scan_points_indices(scan_points, mask_img_L, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"]) 
+            scan_points_indices_R = RenderUtil.get_scan_points_indices(scan_points, mask_img_R, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
+            scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
+            if not has_points:
+                target_points = np.zeros((0, 3))
+                target_normals = np.zeros((0, 3))
+            #import ipdb; ipdb.set_trace()
+            return target_points, target_normals, scan_points_indices
--- a/utils/vis.py
+++ b/utils/vis.py
@@ -0,0 +1,208 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import sys
+import os
+import trimesh
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from utils.data_load import DataLoadUtil
+from utils.pts import PtsUtil
+from utils.pose import PoseUtil
+
+class visualizeUtil:
+    
+    @staticmethod
+    def save_all_cam_pos_and_cam_axis(root, scene, output_dir):
+        length = DataLoadUtil.get_scene_seq_length(root, scene)
+        all_cam_pos = []
+        all_cam_axis = []
+        for i in range(length):
+            path = DataLoadUtil.get_path(root, scene, i)
+            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+            cam_pose = cam_info["cam_to_world"]
+            cam_pos = cam_pose[:3, 3]
+            cam_axis = cam_pose[:3, 2] 
+            
+            num_samples = 10
+            sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
+            sample_points = np.array(sample_points)
+        
+            all_cam_pos.append(cam_pos)
+            all_cam_axis.append(sample_points)
+        
+        all_cam_pos = np.array(all_cam_pos)
+        all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
+        np.savetxt(os.path.join(output_dir, "all_cam_pos.txt"), all_cam_pos)
+        np.savetxt(os.path.join(output_dir, "all_cam_axis.txt"), all_cam_axis)
+    
+    @staticmethod
+    def get_cam_pose_and_cam_axis(cam_pose, is_6d_pose):
+        if is_6d_pose:
+            matrix_cam_pose = np.eye(4)
+            matrix_cam_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(cam_pose[:6])
+            matrix_cam_pose[:3, 3] = cam_pose[6:]
+        else:
+            matrix_cam_pose = cam_pose
+        cam_pos = matrix_cam_pose[:3, 3]
+        cam_axis = matrix_cam_pose[:3, 2]
+        num_samples = 10
+        sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
+        sample_points = np.array(sample_points)
+        return cam_pos, sample_points
+    
+    @staticmethod
+    def save_all_combined_pts(root, scene, output_dir):
+        length = DataLoadUtil.get_scene_seq_length(root, scene)
+        all_combined_pts = []   
+        for i in range(length):
+            path = DataLoadUtil.get_path(root, scene, i)
+            pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
+            if pts.shape[0] == 0:
+                continue
+            all_combined_pts.append(pts)
+        all_combined_pts = np.vstack(all_combined_pts)
+        downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
+        np.savetxt(os.path.join(output_dir, "all_combined_pts.txt"), downsampled_all_pts)
+
+    @staticmethod
+    def save_seq_cam_pos_and_cam_axis(root, scene, frame_idx_list, output_dir):
+        all_cam_pos = []
+        all_cam_axis = []
+        for i in frame_idx_list:
+            path = DataLoadUtil.get_path(root, scene, i)
+            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+            cam_pose = cam_info["cam_to_world"]
+            cam_pos = cam_pose[:3, 3]
+            cam_axis = cam_pose[:3, 2] 
+            
+            num_samples = 10
+            sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
+            sample_points = np.array(sample_points)
+        
+            all_cam_pos.append(cam_pos)
+            all_cam_axis.append(sample_points)
+        
+        all_cam_pos = np.array(all_cam_pos)
+        all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
+        np.savetxt(os.path.join(output_dir, "seq_cam_pos.txt"), all_cam_pos)
+        np.savetxt(os.path.join(output_dir, "seq_cam_axis.txt"), all_cam_axis)
+        
+    @staticmethod
+    def save_seq_combined_pts(root, scene, frame_idx_list, output_dir):
+        all_combined_pts = []   
+        for i in frame_idx_list:
+            path = DataLoadUtil.get_path(root, scene, i)
+            pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
+            if pts.shape[0] == 0:
+                continue
+            all_combined_pts.append(pts)
+        all_combined_pts = np.vstack(all_combined_pts)
+        downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
+        np.savetxt(os.path.join(output_dir, "seq_combined_pts.txt"), downsampled_all_pts)
+        
+    @staticmethod
+    def save_target_mesh_at_world_space(
+        root, model_dir, scene_name, display_table_as_world_space_origin=True
+    ):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        target_name = scene_info["target_name"]
+        transformation = scene_info[target_name]
+        if display_table_as_world_space_origin:
+            location = transformation["location"] - DataLoadUtil.get_display_table_top(
+                root, scene_name
+            )
+        else:
+            location = transformation["location"]
+        rotation_euler = transformation["rotation_euler"]
+        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
+        pose_mat[:3, 3] = location
+
+        mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
+        mesh_dir = os.path.join(root, scene_name, "mesh")
+        if not os.path.exists(mesh_dir):
+            os.makedirs(mesh_dir)
+        model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
+        mesh.export(model_path)
+
+    @staticmethod
+    def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False):
+        target_mask_label = (0, 255, 0, 255)
+        path = DataLoadUtil.get_path(root, scene, frame_idx)
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False)
+        depth = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], 
+                cam_info["far_plane"], 
+                binocular=binocular,
+            )
+        if isinstance(depth, tuple):
+            depth = depth[0]
+            
+        mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True)
+        normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True)
+        ''' target points '''
+        if mask is None:
+            target_mask_img = np.ones_like(depth, dtype=bool)
+        else:
+            target_mask_img = (mask == target_mask_label).all(axis=-1)
+        cam_intrinsic = cam_info["cam_intrinsic"]
+        z = depth[target_mask_img]
+        i, j = np.nonzero(target_mask_img)
+        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+        
+        random_downsample_N = 1000
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        normal_camera = normal[target_mask_img].reshape(-1, 3)
+        sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
+                    points_camera, random_downsample_N, require_idx=True
+                )
+        if len(sampled_target_points) == 0:
+            print("No target points")
+            
+
+        sampled_normal_camera  = normal_camera[idx]
+        sampled_visualized_normal = []
+        sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2]
+        sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1]
+        num_samples = 10
+        for i in range(len(sampled_target_points)):
+            sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
+            
+        sampled_visualized_normal = np.array(sampled_visualized_normal).reshape(-1, 3)
+        np.savetxt(os.path.join(output_dir, "target_pts.txt"), sampled_target_points)
+        np.savetxt(os.path.join(output_dir, "target_normal.txt"), sampled_visualized_normal)
+
+    @staticmethod
+    def save_pts_nrm(root, scene, frame_idx, output_dir, binocular=False):
+        path = DataLoadUtil.get_path(root, scene, frame_idx)
+        pts_world = DataLoadUtil.load_from_preprocessed_pts(path, "npy")
+        nrm_camera = DataLoadUtil.load_from_preprocessed_nrm(path, "npy")
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
+        cam_to_world = cam_info["cam_to_world"]
+        nrm_world = nrm_camera @ cam_to_world[:3, :3].T
+        visualized_nrm = []
+        num_samples = 10
+        for i in range(len(pts_world)):
+            for t in range(num_samples):
+                visualized_nrm.append(pts_world[i] - 0.02 * t * nrm_world[i])
+        
+        visualized_nrm = np.array(visualized_nrm)
+        np.savetxt(os.path.join(output_dir, "nrm.txt"), visualized_nrm)
+        np.savetxt(os.path.join(output_dir, "pts.txt"), pts_world)
+        
+# ------ Debug ------
+
+if __name__ == "__main__":
+    root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
+    model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
+    scene = "box"
+    output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
+    
+    #visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
+    # visualizeUtil.save_all_combined_pts(root, scene, output_dir)
+    # visualizeUtil.save_seq_combined_pts(root, scene, [0, 121, 286, 175, 111,366,45,230,232,225,255,17,199,78,60], output_dir)
+    # visualizeUtil.save_seq_cam_pos_and_cam_axis(root, scene, [0, 121, 286, 175, 111,366,45,230,232,225,255,17,199,78,60], output_dir)
+    # visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
+    #visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)
+    visualizeUtil.save_pts_nrm(root, scene, "116", output_dir, binocular=True)