success

2024-10-09 16:13:22 +00:00
commit 0ea3f048dc
437 changed files with 44406 additions and 0 deletions
--- a/utils/init.py
+++ b/utils/init.py
--- a/utils/cache_util.py
+++ b/utils/cache_util.py
@@ -0,0 +1,19 @@
+from collections import OrderedDict
+
+class LRUCache:
+    def __init__(self, capacity: int):
+        self.cache = OrderedDict()
+        self.capacity = capacity
+
+    def get(self, key):
+        if key not in self.cache:
+            return None
+        self.cache.move_to_end(key)
+        return self.cache[key]
+
+    def put(self, key, value):
+        if key in self.cache:
+            self.cache.move_to_end(key)
+        elif len(self.cache) >= self.capacity:
+            self.cache.popitem(last=False)
+        self.cache[key] = value
--- a/utils/file_util.py
+++ b/utils/file_util.py
@@ -0,0 +1,83 @@
+import os
+import pickle
+import json
+
+import numpy as np
+
+
+class FileUtil:
+    @staticmethod
+    def get_path(file_name, target_dir=None):
+        if target_dir is None:
+            file_path = file_name
+        else:
+            file_path = os.path.join(target_dir, file_name)
+        return file_path
+
+    @staticmethod
+    def load_pickle(file_name, target_dir=None):
+        file_path = FileUtil.get_path(file_name, target_dir)
+        with open(file_path, "rb") as f:
+            return pickle.load(f)
+
+    @staticmethod
+    def save_pickle(data, file_name, target_dir=None):
+        file_path = FileUtil.get_path(file_name, target_dir)
+        with open(file_path, "wb") as f:
+            pickle.dump(data, f)
+            return True
+
+    @staticmethod
+    def load_json(file_name, target_dir=None):
+        file_path = FileUtil.get_path(file_name, target_dir)
+        with open(file_path, "r") as f:
+            return json.load(f)
+
+    @staticmethod
+    def save_json(data, file_name, target_dir=None):
+        file_path = FileUtil.get_path(file_name, target_dir)
+        with open(file_path, "w") as f:
+            json.dump(data, f)
+            return True
+
+    @staticmethod
+    def save_np_txt(np_data, file_name, target_dir=None):
+        if len(np_data.shape) > 2:
+            raise ValueError("Only 2D arrays are supported.")
+        file_path = FileUtil.get_path(file_name, target_dir)
+        np.savetxt(file_path, np_data)
+
+    @staticmethod
+    def load_np_txt(file_name, target_dir=None, shuffle=False):
+        file_path = FileUtil.get_path(file_name, target_dir)
+        np_data = np.loadtxt(file_path)
+        if shuffle:
+            indices = np.arange(np_data.shape[0])
+            np.random.shuffle(indices)
+            np_data_shuffled = np_data[indices]
+            return np_data_shuffled
+        else:
+            return np_data
+
+    @staticmethod
+    def find_object_models(path):
+        obj_files = {}
+        for root, dirs, files in os.walk(path):
+            for file in files:
+                if file.endswith(".obj"):
+                    full_path = os.path.join(root, file)
+                    modified_name = full_path.replace(path, "").replace(os.sep, "_").rstrip(".obj")
+                    if modified_name.startswith("_"):
+                        modified_name = modified_name[1:]
+                    obj_files[modified_name] = full_path
+        return obj_files
+
+
+''' ------------ Debug ------------ '''
+if __name__ == "__main__":
+    arr2d = np.random.random((4, 3))
+    print(arr2d)
+    np.savetxt("test.txt", arr2d)
+    loaded_arr2d = FileUtil.load_np_txt("test.txt")
+    print()
+    print(loaded_arr2d)
--- a/utils/metric_util.py
+++ b/utils/metric_util.py
@@ -0,0 +1,124 @@
+import numpy as np
+
+
+class MetricUtil:
+
+    @staticmethod
+    def rotate_around(axis, angle_deg):
+        angle = angle_deg * np.pi / 180
+        if axis == "x":
+            return np.array([[1, 0, 0],
+                             [0, np.cos(angle), -np.sin(angle)],
+                             [0, np.sin(angle), np.cos(angle)]])
+        elif axis == "y":
+            return np.array([[np.cos(angle), 0, np.sin(angle)],
+                             [0, 1, 0],
+                             [-np.sin(angle), 0, np.cos(angle)]])
+        elif axis == "z":
+            return np.array([[np.cos(angle), -np.sin(angle), 0],
+                             [np.sin(angle), np.cos(angle), 0],
+                             [0, 0, 1]])
+        else:
+            raise ValueError("Invalid axis")
+
+    @staticmethod
+    def basic_rot_diff(r0, r1):
+        mat_diff = np.matmul(r0, r1.swapaxes(-1, -2))
+        diff = np.trace(mat_diff) - 1
+        return np.arccos(np.clip(diff / 2.0, a_min=-1.0, a_max=1.0))
+
+    @staticmethod
+    def axis_rot_diff(r0, r1, axis):
+        axis1, axis2 = r0[..., axis], r1[..., axis]
+        diff = np.sum(axis1 * axis2, axis=-1)
+        return np.arccos(np.clip(diff, a_min=-1.0, a_max=1.0))
+
+    @staticmethod
+    def turn_rot_diff(r0, r1, axis, turn_degrees):
+        diffs = []
+        for i in turn_degrees:
+            rotation_matrix = MetricUtil.rotate_around(axis, i)
+            diffs.append(MetricUtil.basic_rot_diff(np.matmul(r0, rotation_matrix), r1))
+        return np.min(diffs, axis=0)
+
+    @staticmethod
+    def rot_diff_rad(r0, r1, sym):
+
+        axis_map = {0: "x", 1: "y", 2: "z"}
+        if sym is None or sym == 0:  # no symmetry
+            return MetricUtil.basic_rot_diff(r0, r1)
+        elif sym in [1, 2, 3]:  # free rotation around axis
+            return MetricUtil.axis_rot_diff(r0, r1, sym - 1)
+        else:  # symmetry
+            turns = 0
+            axis_idx = 0
+            if sym in [4, 5, 6]:  # half turn
+                axis_idx = sym - 4
+                turns = 2
+            elif sym in [7, 8, 9]:  # quarter turn
+                axis_idx = sym - 7
+                turns = 4
+            turn_degrees = np.arange(0, 360, 360 / turns)
+            return MetricUtil.turn_rot_diff(r0, r1, axis_map[axis_idx], turn_degrees)
+
+    @staticmethod
+    def collect_metric(pred_pose_mat, gt_pose_mat, sym):
+        pred_rot_mat = pred_pose_mat[:, :3, :3]
+        gt_rot_mat = gt_pose_mat[:, :3, :3]
+        pred_trans = pred_pose_mat[:, :3, 3]
+        gt_trans = gt_pose_mat[:, :3, 3]
+
+        trans_error = []
+        rot_error = []
+        for i in range(pred_rot_mat.shape[0]):
+            tdiff = np.linalg.norm(pred_trans[i] - gt_trans[i], ord=2) * 100
+            rdiff = MetricUtil.rot_diff_rad(pred_rot_mat[i], gt_rot_mat[i], sym[i]) / np.pi * 180.0
+            trans_error.append(tdiff)
+            rot_error.append(rdiff)
+
+        rot_error = {
+            'mean': np.mean(rot_error),
+            'median': np.median(rot_error),
+            'item': rot_error,
+        }
+        trans_error = {
+            'mean': np.mean(trans_error),
+            'median': np.median(trans_error),
+            'item': trans_error,
+        }
+        error = {'rot_error': rot_error,
+                 'trans_error': trans_error}
+        return error
+
+
+# -------------- Debug ---------------
+
+def test_MetricUtil():
+    print("test case 0: no rotation")
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]),
+                                  np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), 0) * 180 / np.pi)
+    print("test case 1: 29 degree rotation around x-axis")
+    rotation_matrix = MetricUtil.rotate_around("x", 29)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 0) * 180 / np.pi)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 1) * 180 / np.pi)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 8) * 180 / np.pi)
+    print("test case 2: 90 degree rotation around y-axis")
+    rotation_matrix = MetricUtil.rotate_around("y", 90)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 0) * 180 / np.pi)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 2) * 180 / np.pi)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 8) * 180 / np.pi)
+    print("test case 3: 60 degree rotation around y-axis")
+    rotation_matrix = MetricUtil.rotate_around("y", 60)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 0) * 180 / np.pi)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 2) * 180 / np.pi)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 8) * 180 / np.pi)
+    print("test case 4: 78 degree rotation around z-axis and 60 degree rotation around x-axis")
+    rotation_matrix = MetricUtil.rotate_around("z", 78) @ MetricUtil.rotate_around("x", 60)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 0) * 180 / np.pi)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 2) * 180 / np.pi)
+    print(MetricUtil.rot_diff_rad(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), rotation_matrix, 8) * 180 / np.pi)
+
+
+if __name__ == "__main__":
+    pass
+    test_MetricUtil()
--- a/utils/omni_util.py
+++ b/utils/omni_util.py
@@ -0,0 +1,439 @@
+import numpy as np
+import pickle
+import json
+import pickle
+import cv2
+import os
+import re
+from scipy.spatial.transform import Rotation as R
+
+class DepthToPCL:
+
+    def __new__(cls, *args, **kwargs):
+        raise RuntimeError(
+            "Use init_from_disk or init_from_memory to create an instance"
+        )
+
+    @classmethod
+    def _initialize(
+        cls,
+        distance_to_camera_path=None,
+        rgb_path=None,
+        camera_params_path=None,
+        seg_path=None,
+        seg_label_path=None,
+        depth=None,
+        rgb=None,
+        seg=None,
+        seg_label=None,
+        camera_params=None,
+    ):
+        instance = super().__new__(cls)
+        instance._distance_to_camera_path = distance_to_camera_path
+        instance._rgb_path = rgb_path
+        instance._camera_params_path = camera_params_path
+        instance._seg_path = seg_path
+        instance._seg_label_path = seg_label_path
+        instance._depth = depth
+        instance._rgb = rgb
+        instance._seg = seg
+        instance._seg_label = seg_label
+        instance._camera_params = camera_params
+
+        if any(
+            path is not None
+            for path in [
+                distance_to_camera_path,
+                rgb_path,
+                camera_params_path,
+                seg_path,
+                seg_label_path,
+            ]
+        ):
+            instance._load_from_disk()
+
+        instance._setup()
+        return instance
+
+    @classmethod
+    def init_from_disk(
+        cls,
+        distance_to_camera_path,
+        rgb_path,
+        camera_params_path,
+        seg_path,
+        seg_label_path,
+    ):
+        return cls._initialize(
+            distance_to_camera_path=distance_to_camera_path,
+            rgb_path=rgb_path,
+            camera_params_path=camera_params_path,
+            seg_path=seg_path,
+            seg_label_path=seg_label_path,
+        )
+
+    @classmethod
+    def init_from_memory(cls, depth, rgb, seg, seg_label, camera_params):
+        return cls._initialize(
+            depth=depth,
+            rgb=rgb,
+            seg=seg,
+            seg_label=seg_label,
+            camera_params=camera_params,
+        )
+
+    def _load_from_disk(self):
+        self._depth = np.load(self._distance_to_camera_path)
+        self._seg = cv2.imread(self._seg_path, cv2.IMREAD_UNCHANGED)
+
+        with open(self._seg_label_path, "r") as f:
+            self._seg_label = json.load(f)
+        with open(self._camera_params_path) as f:
+            self._camera_params = json.load(f)
+
+    def _setup(self):
+        self._read_camera_params()
+        self._get_intrinsic_matrix()
+
+    def _read_camera_params(self):
+        self._h_aperture = self._camera_params["cameraAperture"][0]
+        self._v_aperture = self._camera_params["cameraAperture"][1]
+        self._h_aperture_offset = self._camera_params["cameraApertureOffset"][0]
+        self._v_aperture_offset = self._camera_params["cameraApertureOffset"][1]
+        self._focal_length = self._camera_params["cameraFocalLength"]
+        self._h_resolution = self._camera_params["renderProductResolution"][0]
+        self._v_resolution = self._camera_params["renderProductResolution"][1]
+        self._cam_t = self._camera_params["cameraViewTransform"]
+
+    def _get_intrinsic_matrix(self):
+        self._focal_x = self._h_resolution * self._focal_length / self._h_aperture
+        self._focal_y = self._v_resolution * self._focal_length / self._v_aperture
+        self._center_x = self._h_resolution / 2
+        self._center_y = self._v_resolution / 2
+        self.intrinsic_matrix = np.array(
+            [
+                [self._focal_x, 0, self._center_x],
+                [0, self._focal_y, self._center_y],
+                [0, 0, 1],
+            ]
+        )
+        return self.intrinsic_matrix
+
+    def _get_extrinsic_matrix(self):
+        self._cam_pose = np.linalg.inv(np.resize(self._cam_t, (4, 4))).T.dot(
+            np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1.0], [0, 0, 0, 1]])
+        )
+        return self._cam_pose
+
+    
+
+    def get_pcd(self, target_name=None):
+        u_indices, v_indices = np.meshgrid(
+            np.arange(self._h_resolution), np.arange(self._v_resolution)
+        )
+        x_factors = (u_indices - self._center_x) / self._focal_x
+        y_factors = (v_indices - self._center_y) / self._focal_y
+        if target_name is not None:
+            if target_name == OmniUtil.FOREGROUND:
+                unlabelled_mask = self.get_mask_rgba(
+                    self._seg_label, OmniUtil.UNLABELLED
+                )
+                background_mask = self.get_mask_rgba(
+                    self._seg_label, OmniUtil.BACKGROUND
+                )
+                if unlabelled_mask is None:
+                    target_mask = (self._seg != background_mask).any(axis=2)
+                else:
+                    target_mask = (self._seg != unlabelled_mask).any(axis=2) & (
+                        self._seg != background_mask
+                    ).any(axis=2)
+            else:
+                target_mask = (
+                    self._seg == self.get_mask_rgba(self._seg_label, target_name)
+                ).all(axis=2)
+        else:
+            target_mask = np.ones((self._v_resolution, self._h_resolution), dtype=bool)
+        valid_x_factors = x_factors[target_mask]
+        valid_y_factors = y_factors[target_mask]
+        valid_z_factors = self._depth[target_mask]
+        points = np.stack([valid_x_factors, valid_y_factors, valid_z_factors], axis=1)
+        return points
+    
+    @staticmethod
+    def get_mask_rgba(mask_labels, mask_name):
+        name_list = [name_dict["class"] for name_dict in list(mask_labels.values())]
+        if mask_name not in name_list:
+            return None
+        rgba_list = list(mask_labels.keys())
+        mask_rgba_str = rgba_list[name_list.index(mask_name)]
+        r, g, b, a = re.findall("\d+", mask_rgba_str)
+        r, g, b, a = int(b), int(g), int(r), int(a)
+        return r, g, b, a
+
+    def get_segmented_pcd(self, target_list, N=15000):
+        u_indices, v_indices = np.meshgrid(
+            np.arange(self._h_resolution), np.arange(self._v_resolution)
+        )
+        x_factors = (u_indices - self._center_x) / self._focal_x
+        y_factors = (v_indices - self._center_y) / self._focal_y
+        points_dict = {}
+        total_points_with_label = []
+        for target_idx in range(len(target_list)):
+            target_name = target_list[target_idx]
+            target_mask = (
+                self._seg == self.get_mask_rgba(self._seg_label, target_name)
+            ).all(axis=2)
+            valid_x_factors = x_factors[target_mask]
+            valid_y_factors = y_factors[target_mask]
+            valid_z_factors = self._depth[target_mask]
+            label = np.ones_like(valid_x_factors) * target_idx
+            target_points_with_label = np.stack(
+                [valid_x_factors, valid_y_factors, valid_z_factors, label], axis=1
+            )
+            total_points_with_label.append(target_points_with_label)
+        total_points_with_label = np.concatenate(total_points_with_label, axis=0)
+        total_points_with_label = self.sample_pcl(total_points_with_label, N)
+        total_points = total_points_with_label[:, :3]
+        for target_idx in range(len(target_list)):
+            target_name = target_list[target_idx]
+            pts_seg = total_points_with_label[:, 3] == target_idx
+            points_dict[target_name] = total_points_with_label[pts_seg, :3]
+
+        return total_points, points_dict
+
+    def get_rgb(self):
+        return self._rgb
+
+    @staticmethod
+    def sample_pcl(pcl, n_pts=1024):
+        indices = np.random.choice(pcl.shape[0], n_pts, replace=pcl.shape[0] < n_pts)
+        return pcl[indices, :]
+
+
+class OmniUtil:
+    FOREGROUND = "FOREGROUND"
+    BACKGROUND = "BACKGROUND"
+    UNLABELLED = "UNLABELLED"
+    NON_OBJECT_LIST = ['chair_028', 'chair_029', 'chair_026', 'chair_027', 'table_025', 'table_027', 'table_026', 'table_028', 'sofa_014', 'sofa_013', 'picnic_basket_010', 'picnic_basket_011', 'cabinet_009', 'flower_pot_023', 'flower_pot_022', 'flower_pot_021', 'chair_017', 'chair_020', 'chair_012', 'chair_010', 'chair_018', 'chair_025', 'chair_024', 'chair_011', 'chair_001', 'chair_013', 'chair_004', 'chair_021', 'chair_023', 'chair_006', 'chair_014', 'chair_007', 'chair_003', 'chair_009', 'chair_022', 'chair_015', 'chair_016', 'chair_008', 'chair_005', 'chair_019', 'chair_002', 'table_004', 'table_023', 'table_014', 'table_024', 'table_019', 'table_022', 'table_007', 'table_017', 'table_013', 'table_002', 'table_016', 'table_009', 'table_008', 'table_003', 'table_015', 'table_001', 'table_018', 'table_005', 'table_020', 'table_021', 'sofa_001', 'sofa_005', 'sofa_012', 'sofa_009', 'sofa_006', 'sofa_008', 'sofa_011', 'sofa_004', 'sofa_003', 'sofa_002', 'sofa_007', 'sofa_010', 'picnic_basket_005', 'picnic_basket_004', 'picnic_basket_001', 'picnic_basket_008', 'picnic_basket_002', 'picnic_basket_009', 'picnic_basket_006', 'picnic_basket_003', 'picnic_basket_007', 'cabinet_006', 'cabinet_008', 'cabinet_002', 'cabinet_001', 'cabinet_005', 'cabinet_007', 'flower_pot_013', 'flower_pot_005', 'flower_pot_008', 'flower_pot_001', 'flower_pot_003', 'flower_pot_020', 'flower_pot_006', 'flower_pot_012', 'flower_pot_018', 'flower_pot_007', 'flower_pot_002', 'flower_pot_011', 'flower_pot_010', 'flower_pot_016', 'flower_pot_004', 'flower_pot_014', 'flower_pot_017', 'flower_pot_019']
+    CAMERA_PARAMS_TEMPLATE = "camera_params_{}.json"
+    DISTANCE_TEMPLATE = "distance_to_image_plane_{}.npy"
+    RGB_TEMPLATE = "rgb_{}.png"
+    MASK_TEMPLATE = "semantic_segmentation_{}.png"
+    MASK_LABELS_TEMPLATE = "semantic_segmentation_labels_{}.json"
+    SCORE_LABEL_TEMPLATE = "label_{}.json"
+    RGB_FEAT_TEMPLATE = "rgb_feat_{}.npy"
+
+    @staticmethod
+    def get_depth_to_pointcloud_instance(path):
+        root, idx = path[:-4], path[-4:]
+        distance2plane_path = os.path.join(root, OmniUtil.DISTANCE_TEMPLATE.format(idx))
+        rgb_path = os.path.join(root, OmniUtil.RGB_TEMPLATE.format(idx))
+        cam_params_path = os.path.join(
+            root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
+        )
+        seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
+        seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
+        depth_to_pcd = DepthToPCL.init_from_disk(
+            distance2plane_path, rgb_path, cam_params_path, seg_path, seg_labels_path
+        )
+        return depth_to_pcd
+
+    @staticmethod
+    def get_points(path, object_name=None):
+        depth_to_pcd = OmniUtil.get_depth_to_pointcloud_instance(path)
+        pcd = depth_to_pcd.get_pcd(object_name)
+        points = np.asarray(pcd, dtype=np.float32)
+        return points
+
+    @staticmethod
+    def get_segmented_points(path, target_list):
+        depth_to_pcd = OmniUtil.get_depth_to_pointcloud_instance(path)
+        total_points, target_points_dict = depth_to_pcd.get_segmented_pcd(target_list)
+        return total_points, target_points_dict
+
+    @staticmethod
+    def get_object_list(path, contains_non_obj=False):
+        root, idx = path[:-4], path[-4:]
+        seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
+        with open(seg_labels_path, "r") as f:
+            seg_labels = json.load(f)
+        object_list = [v["class"] for v in seg_labels.values()]
+        
+        object_list.remove(OmniUtil.BACKGROUND)
+        if OmniUtil.UNLABELLED in object_list:
+            object_list.remove(OmniUtil.UNLABELLED)
+        occluder_list = pickle.load(open(os.path.join(root,"occluder.pickle"), "rb"))
+        fall_objects_list = pickle.load(open(os.path.join(root,"fall_objects.pickle"), "rb"))
+        non_obj_list = occluder_list + fall_objects_list
+        if not contains_non_obj:
+            for non_obj in non_obj_list:
+                if non_obj in object_list:
+                    object_list.remove(non_obj)
+        return object_list
+
+    @staticmethod
+    def get_rotation_mat(path):
+        root, idx = os.path.split(path)
+        camera_params_path = os.path.join(
+            root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
+        )
+        with open(camera_params_path, "r") as f:
+            raw_camera_params = json.load(f)
+        cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
+            (4, 4)
+        )
+        cam_rot_mat = cam_transform[:3, :3].dot(
+            np.mat([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
+        )
+        return cam_rot_mat
+
+    @staticmethod
+    def get_rgb(path):
+        root, idx = os.path.split(path)
+        rgb_path = os.path.join(root, OmniUtil.RGB_TEMPLATE.format(idx))
+        rgb = cv2.imread(rgb_path)
+        return cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
+    
+    @staticmethod
+    def get_depth(path):
+        root, idx = os.path.split(path)
+        depth_path = os.path.join(root, OmniUtil.DISTANCE_TEMPLATE.format(idx))
+        depth = np.load(depth_path)
+        return depth
+    
+    @staticmethod
+    def get_seg_data(path):
+        root, idx = os.path.split(path)
+        seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
+        with open(seg_labels_path, "r") as f:
+            seg_labels = json.load(f)
+        seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
+        seg = cv2.imread(seg_path, cv2.IMREAD_UNCHANGED)
+        return seg, seg_labels
+    
+    @staticmethod
+    def get_single_seg(path, object_name):
+        root, idx = os.path.split(path)
+        seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
+        with open(seg_labels_path, "r") as f:
+            seg_labels = json.load(f)
+        seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
+        seg = cv2.imread(seg_path, cv2.IMREAD_UNCHANGED)
+        object_mask = (
+                seg == OmniUtil.get_mask_rgba(seg_labels, object_name)
+            ).all(axis=2)
+        return object_mask
+        
+        
+    @staticmethod
+    def get_mask_rgba(mask_labels, mask_name):
+        name_list = [name_dict["class"] for name_dict in list(mask_labels.values())]
+        if mask_name not in name_list:
+            return None
+        rgba_list = list(mask_labels.keys())
+        mask_rgba_str = rgba_list[name_list.index(mask_name)]
+        r, g, b, a = re.findall("\d+", mask_rgba_str)
+        r, g, b, a = int(b), int(g), int(r), int(a)
+        return r, g, b, a
+    
+    @staticmethod
+    def get_rgb_feat(path):
+        root, idx = os.path.split(path)
+        rgb_feat_path = os.path.join(root, OmniUtil.RGB_FEAT_TEMPLATE.format(idx))
+        rgb_feat = np.load(rgb_feat_path)
+        return rgb_feat
+    
+    @staticmethod
+    def get_target_object_list(path):
+        return OmniUtil.get_object_list(path, contains_non_obj=False) # TODO: generalize this
+        
+
+    @staticmethod
+    def get_transform_mat(path):
+        root, idx = os.path.split(path)
+        camera_params_path = os.path.join(
+            root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
+        )
+        with open(camera_params_path, "r") as f:
+            raw_camera_params = json.load(f)
+        cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
+            (4, 4)
+        )
+        real_cam_transform = np.linalg.inv(cam_transform).T
+        real_cam_transform = real_cam_transform.dot(
+            np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+        )
+        return real_cam_transform
+
+    @staticmethod
+    def get_intrinsic_matrix(path):
+        root, idx = os.path.split(path)
+        camera_params_path = os.path.join(
+            root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
+        )
+        with open(camera_params_path, "r") as f:
+            raw_camera_params = json.load(f)
+        h_aperture = raw_camera_params["cameraAperture"][0]
+        v_aperture = raw_camera_params["cameraAperture"][1]
+        focal_length = raw_camera_params["cameraFocalLength"]
+        h_resolution = raw_camera_params["renderProductResolution"][0]
+        v_resolution = raw_camera_params["renderProductResolution"][1]
+        focal_x = h_resolution * focal_length / h_aperture
+        focal_y = v_resolution * focal_length / v_aperture
+        center_x = h_resolution / 2
+        center_y = v_resolution / 2
+        intrinsic_matrix = np.array(
+            [
+                [focal_x, 0, center_x],
+                [0, focal_y, center_y],
+                [0, 0, 1],
+            ]
+        )
+        return intrinsic_matrix
+    
+    @staticmethod
+    def get_extrinsic_matrix(path):
+        root, idx = os.path.split(path)
+        camera_params_path = os.path.join(
+            root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
+        )
+        with open(camera_params_path, "r") as f:
+            raw_camera_params = json.load(f)
+        cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
+            (4, 4)
+        )
+        real_cam_transform = np.linalg.inv(cam_transform).T
+        real_cam_transform = real_cam_transform.dot(
+            np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+        )
+        return real_cam_transform
+    
+    @staticmethod
+    def get_scene_data(path):
+        root, _ = os.path.split(path)
+        scene_data_path = os.path.join(
+            root, "scene.pickle"
+        )
+        with open(scene_data_path, "rb") as f:
+            scene_data = pickle.load(f)
+        return scene_data
+    
+    @staticmethod
+    def get_o2c_pose(path, object_name):
+        scene_data = OmniUtil.get_scene_data(path)
+        cam_pose = OmniUtil.get_extrinsic_matrix(path)
+        pos = scene_data[object_name]["position"]
+        quat = scene_data[object_name]["rotation"]
+        rot = R.from_quat(quat).as_matrix()
+        obj_pose = np.eye(4)
+        obj_pose[:3, :3] = rot
+        obj_pose[:3, 3] = pos
+        obj_cam_pose = np.linalg.inv(cam_pose) @ obj_pose
+        return np.asarray(obj_cam_pose)
+
+if __name__ == "__main__":
+    test_path = r"/mnt/h/AI/Datasets/nbv1/sample_one/scene_0/0050"
+    obj_list = OmniUtil.get_object_list(test_path, contains_non_obj=True)
+    print(obj_list)
+    pts = OmniUtil.get_segmented_points(test_path, target_list=obj_list)
+    np.savetxt("pts1.txt", pts)
--- a/utils/pcl_util.py
+++ b/utils/pcl_util.py
@@ -0,0 +1,78 @@
+import numpy as np
+import torch
+from scipy.spatial.distance import cdist
+
+
+class PclUtil:
+    CHAMFER = 1
+
+    @staticmethod
+    def transform(pts, pose=np.eye(4), scale=np.ones(3), inverse=False):
+        aug_scale = np.ones(4)
+        aug_scale[:3] = scale
+        aug_scale_mat = np.diag(aug_scale)
+        scale_pose = pose @ aug_scale_mat
+        aug_pts = np.hstack((pts, np.ones((pts.shape[0], 1))))
+        if inverse:
+            scale_pose = np.linalg.inv(scale_pose)
+        transformed_pts = scale_pose @ aug_pts.T
+        return transformed_pts.T[:, :3]
+    
+    @staticmethod
+    def cam2canonical(cam_pts, cam2canonical_pose):
+        aug_pts = np.hstack((cam_pts, np.ones((cam_pts.shape[0], 1))))
+        transformed_pts = cam2canonical_pose @ aug_pts.T
+        return transformed_pts.T[:, :3]
+
+    @staticmethod
+    def transform_batch(pts, pose, scale, inverse=False):
+        batch_size = pts.shape[0]
+        aug_scale_mat = torch.eye(4).unsqueeze(0).repeat(batch_size, 1, 1)
+        for i in range(3):
+            aug_scale_mat[..., i, i] = scale[..., i]
+        scale_pose = pose @ aug_scale_mat
+        aug_pts = torch.cat((pts, torch.ones_like(pts[..., :1])), dim=-1)
+        if inverse:
+            scale_pose = torch.inverse(scale_pose)
+        transformers_pts = scale_pose @ aug_pts.transpose(1, 2)
+        return transformers_pts.transpose(1, 2)[..., :3]
+
+    @staticmethod
+    def transform_n_batch(pts, pose, scale=None, inverse=False):
+        transformed_pts_shape = (pts.shape[0], pose.shape[1], pts.shape[1], pts.shape[2])
+        transformed_pts = np.zeros(transformed_pts_shape)
+        batch_size = pose.shape[0]
+        n = pose.shape[1]
+        if scale is None:
+            scale = np.ones((batch_size, n, 3))
+        for batch_i in range(batch_size):
+            for i in range(n):
+                transformed_pts[batch_i, i, :, :] = PclUtil.transform(pts[batch_i], pose[batch_i, i],
+                                                                      scale[batch_i, i], inverse)
+        return transformed_pts
+
+    @staticmethod
+    def chamfer_distance(pts1, pts2):
+        dist_matrix1 = cdist(pts1, pts2, 'euclidean')
+        dist_matrix2 = cdist(pts2, pts1, 'euclidean')
+        chamfer_dist = np.mean(np.min(dist_matrix1, axis=1)) + np.mean(np.min(dist_matrix2, axis=1))
+        return chamfer_dist
+
+    @staticmethod
+    def distance(pts1, pts2, eval_type=1):
+        if eval_type == PclUtil.CHAMFER:
+            return PclUtil.chamfer_distance(pts1, pts2)
+        else:
+            raise ValueError('Unknown evaluation type:', eval_type)
+
+    @staticmethod
+    def sample_pcl(pcl, n_pts=1024):
+        indices = np.random.choice(pcl.shape[0], n_pts, replace=pcl.shape[0] < n_pts)
+        return pcl[indices, :]
+
+
+if __name__ == '__main__':
+    batch_pts = np.random.random((2, 16, 3))
+    batch_n_pose = np.random.random((2, 3, 4, 4))
+    batch_n_scale = np.random.random((2, 3, 3))
+    poses = PclUtil.transform_n_batch(batch_pts, batch_n_pose, batch_n_scale)
--- a/utils/pose_util.py
+++ b/utils/pose_util.py
@@ -0,0 +1,188 @@
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+class PoseUtil:
+    ROTATION = 1
+    TRANSLATION = 2
+    SCALE = 3
+
+    @staticmethod
+    def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False):
+        if isinstance(trans_m_min, list):
+            x_min, y_min, z_min = trans_m_min
+            x_max, y_max, z_max = trans_m_max
+        else:
+            x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min
+            x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max
+
+        x = np.random.uniform(x_min, x_max)
+        y = np.random.uniform(y_min, y_max)
+        z = np.random.uniform(z_min, z_max)
+        translation = np.array([x, y, z])
+        if trans_unit == "cm":
+            translation = translation / 100
+        if debug:
+            print("uniform translation:", translation)
+        return translation
+
+    @staticmethod
+    def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False):
+        axis = np.random.randn(3)
+        axis /= np.linalg.norm(axis)
+        theta = np.random.uniform(rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi)
+
+        K = np.array([[0, -axis[2], axis[1]],
+                      [axis[2], 0, -axis[0]],
+                      [-axis[1], axis[0], 0]])
+        R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K)
+        if debug:
+            print("uniform rotation:", theta * 180 / np.pi)
+        return R
+
+    @staticmethod
+    def get_uniform_pose(trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False):
+        translation = PoseUtil.get_uniform_translation(trans_min, trans_max, trans_unit, debug)
+        rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+        pose = np.eye(4)
+        pose[:3, :3] = rotation
+        pose[:3, 3] = translation
+        return pose
+
+    @staticmethod
+    def get_n_uniform_pose(trans_min, trans_max, rot_min=0, rot_max=180, n=1,
+                           trans_unit="cm", fix=None, contain_canonical=True, debug=False):
+        if fix == PoseUtil.ROTATION:
+            translations = np.zeros((n, 3))
+            for i in range(n):
+                translations[i] = PoseUtil.get_uniform_translation(trans_min, trans_max, trans_unit, debug)
+            if contain_canonical:
+                translations[0] = np.zeros(3)
+            rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+        elif fix == PoseUtil.TRANSLATION:
+            rotations = np.zeros((n, 3, 3))
+            for i in range(n):
+                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+            if contain_canonical:
+                rotations[0] = np.eye(3)
+            translations = PoseUtil.get_uniform_translation(trans_min, trans_max, trans_unit, debug)
+        else:
+            translations = np.zeros((n, 3))
+            rotations = np.zeros((n, 3, 3))
+            for i in range(n):
+                translations[i] = PoseUtil.get_uniform_translation(trans_min, trans_max, trans_unit, debug)
+            for i in range(n):
+                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+            if contain_canonical:
+                translations[0] = np.zeros(3)
+                rotations[0] = np.eye(3)
+
+        pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0)
+        pose[:, :3, :3] = rotations
+        pose[:, :3, 3] = translations
+
+        return pose
+
+    @staticmethod
+    def get_n_uniform_pose_batch(trans_min, trans_max, rot_min=0, rot_max=180, n=1, batch_size=1,
+                                 trans_unit="cm", fix=None, contain_canonical=False, debug=False):
+
+        batch_poses = []
+        for i in range(batch_size):
+            pose = PoseUtil.get_n_uniform_pose(trans_min, trans_max, rot_min, rot_max, n,
+                                               trans_unit, fix, contain_canonical, debug)
+            batch_poses.append(pose)
+        pose_batch = np.stack(batch_poses, axis=0)
+        return pose_batch
+
+    @staticmethod
+    def get_uniform_scale(scale_min, scale_max, debug=False):
+        if isinstance(scale_min, list):
+            x_min, y_min, z_min = scale_min
+            x_max, y_max, z_max = scale_max
+        else:
+            x_min, y_min, z_min = scale_min, scale_min, scale_min
+            x_max, y_max, z_max = scale_max, scale_max, scale_max
+
+        x = np.random.uniform(x_min, x_max)
+        y = np.random.uniform(y_min, y_max)
+        z = np.random.uniform(z_min, z_max)
+        scale = np.array([x, y, z])
+        if debug:
+            print("uniform scale:", scale)
+        return scale
+
+    @staticmethod
+    def normalize_rotation(rotation, rotation_mode):
+        if rotation_mode == 'quat_wxyz' or rotation_mode == 'quat_xyzw':
+            rotation /= torch.norm(rotation, dim=-1, keepdim=True)
+        elif rotation_mode == 'rot_matrix':
+            rot_matrix = PoseUtil.rotation_6d_to_matrix_tensor_batch(rotation)
+            rotation[:, :3] = rot_matrix[:, 0, :]
+            rotation[:, 3:6] = rot_matrix[:, 1, :]
+        elif rotation_mode == 'euler_xyz_sx_cx':
+            rot_sin_theta = rotation[:, :3]
+            rot_cos_theta = rotation[:, 3:6]
+            theta = torch.atan2(rot_sin_theta, rot_cos_theta)
+            rotation[:, :3] = torch.sin(theta)
+            rotation[:, 3:6] = torch.cos(theta)
+        elif rotation_mode == 'euler_xyz':
+            pass
+        else:
+            raise NotImplementedError
+        return rotation
+
+    @staticmethod
+    def get_pose_dim(rot_mode):
+        assert rot_mode in ['quat_wxyz', 'quat_xyzw', 'euler_xyz', 'euler_xyz_sx_cx', 'rot_matrix'], \
+            f"the rotation mode {rot_mode} is not supported!"
+
+        if rot_mode == 'quat_wxyz' or rot_mode == 'quat_xyzw':
+            pose_dim = 4
+        elif rot_mode == 'euler_xyz':
+            pose_dim = 3
+        elif rot_mode == 'euler_xyz_sx_cx' or rot_mode == 'rot_matrix':
+            pose_dim = 6
+        else:
+            raise NotImplementedError
+        return pose_dim
+
+    @staticmethod
+    def rotation_6d_to_matrix_tensor_batch(d6: torch.Tensor) -> torch.Tensor:
+
+        a1, a2 = d6[..., :3], d6[..., 3:]
+        b1 = F.normalize(a1, dim=-1)
+        b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
+        b2 = F.normalize(b2, dim=-1)
+        b3 = torch.cross(b1, b2, dim=-1)
+        return torch.stack((b1, b2, b3), dim=-2)
+
+    @staticmethod
+    def matrix_to_rotation_6d_tensor_batch(matrix: torch.Tensor) -> torch.Tensor:
+        batch_dim = matrix.size()[:-2]
+        return matrix[..., :2, :].clone().reshape(batch_dim + (6,))
+
+    @staticmethod
+    def rotation_6d_to_matrix_numpy(d6):
+        a1, a2 = d6[:3], d6[3:]
+        b1 = a1 / np.linalg.norm(a1)
+        b2 = a2 - np.dot(b1, a2) * b1
+        b2 = b2 / np.linalg.norm(b2)
+        b3 = np.cross(b1, b2)
+        return np.stack((b1, b2, b3),axis=-2)
+
+    @staticmethod
+    def matrix_to_rotation_6d_numpy(matrix):
+        return np.copy(matrix[:2, :]).reshape((6,))
+
+
+
+''' ------------ Debug ------------ '''
+
+if __name__ == '__main__':
+    for _ in range(1):
+        PoseUtil.get_uniform_pose(trans_min=[-25, -25, 10], trans_max=[25, 25, 60],
+                                  rot_min=0, rot_max=10, debug=True)
+        PoseUtil.get_uniform_scale(scale_min=0.25, scale_max=0.30, debug=True)
+    PoseUtil.get_n_uniform_pose_batch(trans_min=[-25, -25, 10], trans_max=[25, 25, 60],
+                                      rot_min=0, rot_max=10, batch_size=2, n=2, fix=PoseUtil.TRANSLATION, debug=True)
--- a/utils/tensorboard_util.py
+++ b/utils/tensorboard_util.py
@@ -0,0 +1,47 @@
+import torch
+
+
+class TensorboardWriter:
+    @staticmethod
+    def write_tensorboard(writer, panel, data_dict, step):
+        complex_dict = False
+        if "scalars" in data_dict:
+            scalar_data_dict = data_dict["scalars"]
+            TensorboardWriter.write_scalar_tensorboard(writer, panel, scalar_data_dict, step)
+            complex_dict = True
+        if "images" in data_dict:
+            image_data_dict = data_dict["images"]
+            TensorboardWriter.write_image_tensorboard(writer, panel, image_data_dict, step)
+            complex_dict = True
+        if "points" in data_dict:
+            point_data_dict = data_dict["points"]
+            TensorboardWriter.write_points_tensorboard(writer, panel, point_data_dict, step)
+            complex_dict = True
+
+        if not complex_dict:
+            TensorboardWriter.write_scalar_tensorboard(writer, panel, data_dict, step)
+
+    @staticmethod
+    def write_scalar_tensorboard(writer, panel, data_dict, step):
+        for key, value in data_dict.items():
+            if isinstance(value, dict):
+                writer.add_scalars(f'{panel}/{key}', value, step)
+            else:
+                writer.add_scalar(f'{panel}/{key}', value, step)
+
+    @staticmethod
+    def write_image_tensorboard(writer, panel, data_dict, step):
+        pass
+
+    @staticmethod
+    def write_points_tensorboard(writer, panel, data_dict, step):
+        for key, value in data_dict.items():
+            if value.shape[-1] == 3:
+                colors = torch.zeros_like(value)
+                vertices = torch.cat([value, colors], dim=-1)
+            elif value.shape[-1] == 6:
+                vertices = value
+            else:
+                raise ValueError(f'Unexpected value shape: {value.shape}')
+            faces = None
+            writer.add_mesh(f'{panel}/{key}', vertices=vertices, faces=faces, global_step=step)
--- a/utils/view_util.py
+++ b/utils/view_util.py
@@ -0,0 +1,239 @@
+import json
+import numpy as np
+import requests
+import torch
+from PIL import Image
+
+from utils.cache_util import LRUCache
+
+
+class ViewUtil:
+    view_cache = LRUCache(1024)
+    def load_camera_pose_from_frame(camera_params_path):
+        with open(camera_params_path, "r") as f:
+            camera_params = json.load(f)
+        
+        view_transform = camera_params["cameraViewTransform"]
+        view_transform = np.resize(view_transform, (4,4))
+        view_transform = np.linalg.inv(view_transform).T
+        offset = np.mat([[1,0,0,0],[0,-1,0,0],[0,0,-1,0],[0,0,0,1]])
+        view_transform = view_transform.dot(offset)
+        return view_transform
+
+    def save_image(rgb, filename):
+        if rgb.dtype != np.uint8:
+            rgb = rgb.astype(np.uint8)
+        img = Image.fromarray(rgb, 'RGB')
+        img.save(filename)
+
+    def save_depth(depth, filename):
+        if depth.dtype != np.uint16:
+            depth = depth.astype(np.uint16)
+        depth_img = Image.fromarray(depth)
+        depth_img.save(filename)
+
+    def save_segmentation(seg, filename):
+        if seg.dtype != np.uint8:
+            seg = seg.astype(np.uint8)
+        seg_img = Image.fromarray(seg)
+        seg_img.save(filename)
+    
+    @staticmethod
+    def get_view(camera_pose,source, data_type,scene,port):
+        camera_pose_tuple = tuple(map(tuple, camera_pose.tolist()))
+        cache_key = (camera_pose_tuple, source, data_type, scene, port)
+        cached_result = ViewUtil.view_cache.get(cache_key)
+        if cached_result:
+            print("Cache hit")
+            return cached_result
+        
+        url = f"http://127.0.0.1:{port}/get_images"
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        data = {
+            'camera_pose': camera_pose.tolist(),
+            'data_type': data_type,
+            'source': source,
+            'scene': scene
+        }
+        response = requests.post(url, headers=headers, data=json.dumps(data))
+        
+        if response.status_code == 200:
+            results = response.json()
+            
+            rgb = np.asarray(results['rgb'],dtype=np.uint8)
+            depth = np.asarray(results['depth'])/1000
+            seg = np.asarray(results['segmentation'])
+            seg_labels = results['segmentation_labels']
+            camera_params = results['camera_params']
+            ViewUtil.view_cache.put(cache_key, (rgb, depth, seg, seg_labels, camera_params))
+            return rgb, depth, seg, seg_labels, camera_params
+        else:
+            return None
+        
+    @staticmethod
+    def get_object_pose_batch(K, mesh, rgb_batch, depth_batch, mask_batch, gt_pose_batch ,port):
+        url = f"http://127.0.0.1:{port}/predict_estimation_batch"
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        mesh_data = {
+            'vertices': mesh.vertices.tolist(),
+            'faces': mesh.faces.tolist()
+        }
+        data = {
+            'K': K.tolist(),
+            'rgb_batch': rgb_batch.tolist(),
+            'depth_batch': depth_batch.tolist(),
+            'mask_batch': mask_batch.tolist(),
+            'mesh': mesh_data,
+            'gt_pose_batch': gt_pose_batch.tolist()
+        }
+        response = requests.post(url, headers=headers, data=json.dumps(data))
+        
+        if response.status_code == 200:
+            results = response.json()
+            pose_batch = np.array(results['pose_batch'])
+            results_batch = results["eval_result_batch"]
+            return pose_batch, results_batch
+        else:
+            return None
+        
+    @staticmethod
+    def get_visualized_result(K, mesh, rgb, pose ,port):
+        url = f"http://127.0.0.1:{port}/get_visualized_result"
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        mesh_data = {
+            'vertices': mesh.vertices.tolist(),
+            'faces': mesh.faces.tolist()
+        }
+        data = {
+            'K': K.tolist(),
+            'rgb': rgb.tolist(),
+            'mesh': mesh_data,
+            'pose': pose.tolist()
+        }
+        response = requests.post(url, headers=headers, data=json.dumps(data))
+        
+        if response.status_code == 200:
+            results = response.json()
+            vis_rgb = np.array(results['vis_rgb'])
+            return vis_rgb
+        else:
+            return None
+        
+    @staticmethod
+    def get_object_pose(K, mesh, rgb, depth, mask, gt_pose ,port):
+        url = f"http://127.0.0.1:{port}/predict_estimation"
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        mesh_data = {
+            'vertices': mesh.vertices.tolist(),
+            'faces': mesh.faces.tolist()
+        }
+        data = {
+            'K': K.tolist(),
+            'rgb': rgb.tolist(),
+            'depth': depth.tolist(),
+            'mask': mask.tolist(),
+            'mesh': mesh_data,
+            'gt_pose': gt_pose.tolist()
+        }
+        response = requests.post(url, headers=headers, data=json.dumps(data))
+        
+        if response.status_code == 200:
+            results = response.json()
+            pose_batch = np.array(results['pose_batch'])
+            results_batch = results["eval_result_batch"]
+            return pose_batch, results_batch
+        else:
+            return None
+        
+    def get_pts_dict(depth, seg, seg_labels, camera_params):
+        cx = camera_params['cx']
+        cy = camera_params['cy']
+        fx = camera_params['fx']
+        fy = camera_params['fy']
+        width = camera_params['width']
+        height = camera_params['height']
+        pts_dict = {name: [] for name in seg_labels.values()}
+        u = np.arange(width)
+        v = np.arange(height)
+        u, v = np.meshgrid(u, v)
+        Z = depth
+        X = (u - cx) * Z / fx
+        Y = (v - cy) * Z / fy
+        points = np.stack((X, Y, Z), axis=-1).reshape(-1, 3)
+        labels = seg.reshape(-1)
+        for label, name in seg_labels.items():
+            mask = labels == int(label)
+            pts_dict[name] = points[mask]
+        return pts_dict
+
+    def get_object_center_from_pts_dict(obj,pts_dict):
+        if obj is None:
+            for _, pts in pts_dict.items():
+                if pts.size != 0:
+                    obj_pts = pts
+                    break
+        else:
+            obj_pts = pts_dict[obj]
+            if obj_pts.size == 0:
+                for _, pts in pts_dict.items():
+                    if pts.size != 0:
+                        obj_pts = pts
+                        break
+        obj_center = obj_pts.mean(axis=0)
+        return obj_center
+    
+    def get_pts_center(pts):
+        pts_center = pts.mean(axis=0)
+        return pts_center
+
+    def get_scene_pts(pts_dict):
+        if any(isinstance(pts, torch.Tensor) for pts in pts_dict.values()):
+            scene_pts = torch.cat([pts for _, pts in pts_dict.items()], dim=0)
+            return scene_pts
+        else:
+            scene_pts = np.concatenate([pts for _, pts in pts_dict.items()])
+            return scene_pts
+
+    def crop_pts(scene_pts, crop_center, radius=0.2):
+        if isinstance(scene_pts, torch.Tensor):
+            crop_mask = torch.norm(scene_pts - crop_center, dim=1) < radius
+            return scene_pts[crop_mask]
+        else:
+            crop_mask = np.linalg.norm(scene_pts - crop_center, axis=1) < radius
+            return scene_pts[crop_mask]
+
+    def crop_pts_dict(pts_dict, crop_center, radius=0.2, min_pts_num = 5000):
+        crop_dict = {}
+        max_loop = 100
+        loop = 0
+        while(loop<=max_loop):
+            croped_length = 0
+            for obj, pts in pts_dict.items():
+                if isinstance(pts, torch.Tensor):
+                    crop_mask = torch.norm(pts - crop_center, dim=1) < radius
+                    crop_dict[obj] = pts[crop_mask]
+                else:
+                    crop_mask = np.linalg.norm(pts - crop_center, axis=1) < radius
+                    crop_dict[obj] = pts[crop_mask]
+                croped_length += crop_dict[obj].shape[0]
+            if croped_length >= min_pts_num:
+                break
+            radius += 0.02
+            loop += 1
+        return crop_dict
+    
+    def get_cam_pose_focused_on_point(point_w, cam_pose_w, old_camera_center_w):
+        distance = np.linalg.norm(point_w-old_camera_center_w)
+        z_axis_camera = cam_pose_w[:3, 2].reshape(-1)
+        new_camera_position_w = point_w - distance * z_axis_camera
+        new_camera_pose_w = cam_pose_w.copy()
+        new_camera_pose_w[:3, 3] = new_camera_position_w.reshape((3,1))
+        return new_camera_pose_w