fix overlap bug

2024-12-02 19:09:56 +08:00
30 changed files with 658 additions and 1969 deletions
--- a/app_heuristic.py
+++ b/app_heuristic.py
@@ -0,0 +1,8 @@
 from PytorchBoot.application import PytorchBootApplication
 from runners.heuristic import Heuristic
@PytorchBootApplication("exp_heuristic")
 class ExpHeuristic:
    @staticmethod
    def start():
        Heuristic("configs/local/heuristic_exp_config.yaml").run()
--- a/app_sim.py
+++ b/app_sim.py
@@ -1,11 +0,0 @@
 from PytorchBoot.application import PytorchBootApplication
 from runners.simulator import Simulator
@PytorchBootApplication("sim")
 class SimulateApp:
    @staticmethod
    def start():
        simulator = Simulator("configs/local/simulation_config.yaml")
        simulator.run("create")
        simulator.run("simulate")
--- a/beans/predict_result.py
+++ b/beans/predict_result.py
@@ -1,162 +0,0 @@
 import numpy as np
 from sklearn.cluster import DBSCAN
 class PredictResult:
    def __init__(self, raw_predict_result, input_pts=None, cluster_params=dict(eps=0.5, min_samples=2)):
        self.input_pts = input_pts
        self.cluster_params = cluster_params
        self.sampled_9d_pose = raw_predict_result
        self.sampled_matrix_pose = self.get_sampled_matrix_pose()
        self.distance_matrix = self.calculate_distance_matrix()
        self.clusters = self.get_cluster_result()
        self.candidate_matrix_poses = self.get_candidate_poses()
        self.candidate_9d_poses = [np.concatenate((self.matrix_to_rotation_6d_numpy(matrix[:3,:3]), matrix[:3,3].reshape(-1,)), axis=-1) for matrix in self.candidate_matrix_poses]
        self.cluster_num = len(self.clusters)
    @staticmethod
    def rotation_6d_to_matrix_numpy(d6):
        a1, a2 = d6[:3], d6[3:]
        b1 = a1 / np.linalg.norm(a1)
        b2 = a2 - np.dot(b1, a2) * b1
        b2 = b2 / np.linalg.norm(b2)
        b3 = np.cross(b1, b2)
        return np.stack((b1, b2, b3), axis=-2)
    @staticmethod
    def matrix_to_rotation_6d_numpy(matrix):
        return np.copy(matrix[:2, :]).reshape((6,))
    def __str__(self):
        info = "Predict Result:\n"
        info += f"  Predicted pose number: {len(self.sampled_9d_pose)}\n"
        info += f"  Cluster number: {self.cluster_num}\n"
        for i, cluster in enumerate(self.clusters):
            info += f"   - Cluster {i} size: {len(cluster)}\n"
        max_distance = np.max(self.distance_matrix[self.distance_matrix != 0])
        min_distance = np.min(self.distance_matrix[self.distance_matrix != 0])
        info += f"  Max distance: {max_distance}\n"
        info += f"  Min distance: {min_distance}\n"
        return info
    def get_sampled_matrix_pose(self):
        sampled_matrix_pose = []
        for pose in self.sampled_9d_pose:
            rotation = pose[:6]
            translation = pose[6:]
            pose = self.rotation_6d_to_matrix_numpy(rotation)
            pose = np.concatenate((pose, translation.reshape(-1, 1)), axis=-1)
            pose = np.concatenate((pose, np.array([[0, 0, 0, 1]])), axis=-2)
            sampled_matrix_pose.append(pose)
        return np.array(sampled_matrix_pose)
    def rotation_distance(self, R1, R2):
        R = np.dot(R1.T, R2)
        trace = np.trace(R)
        angle = np.arccos(np.clip((trace - 1) / 2, -1, 1))
        return angle
    def calculate_distance_matrix(self):
        n = len(self.sampled_matrix_pose)
        dist_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(n):
                dist_matrix[i, j] = self.rotation_distance(self.sampled_matrix_pose[i][:3, :3], self.sampled_matrix_pose[j][:3, :3])
        return dist_matrix
    def cluster_rotations(self):
        clustering = DBSCAN(eps=self.cluster_params['eps'], min_samples=self.cluster_params['min_samples'], metric='precomputed')
        labels = clustering.fit_predict(self.distance_matrix)
        return labels
    def get_cluster_result(self):
        labels = self.cluster_rotations()
        cluster_num = len(set(labels)) - (1 if -1 in labels else 0)
        clusters = []
        for _ in range(cluster_num):
            clusters.append([])
        for matrix_pose, label in zip(self.sampled_matrix_pose, labels):
            if label != -1:
                clusters[label].append(matrix_pose)
        clusters.sort(key=len, reverse=True)
        return clusters
    def get_center_matrix_pose_from_cluster(self, cluster):
        min_total_distance = float('inf')
        center_matrix_pose = None
        for matrix_pose in cluster:
            total_distance = 0
            for other_matrix_pose in cluster:
                rot_distance = self.rotation_distance(matrix_pose[:3, :3], other_matrix_pose[:3, :3])
                total_distance += rot_distance
            if total_distance < min_total_distance:
                min_total_distance = total_distance
                center_matrix_pose = matrix_pose
        return center_matrix_pose
    def get_candidate_poses(self):
        candidate_poses = []
        for cluster in self.clusters:
            candidate_poses.append(self.get_center_matrix_pose_from_cluster(cluster))
        return candidate_poses
    def visualize(self):
        import plotly.graph_objects as go
        fig = go.Figure()
        if self.input_pts is not None:
            fig.add_trace(go.Scatter3d(
                x=self.input_pts[:, 0], y=self.input_pts[:, 1], z=self.input_pts[:, 2],
                mode='markers', marker=dict(size=1, color='gray', opacity=0.5), name='Input Points'
            ))
        colors = ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
             'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg']
        for i, cluster in enumerate(self.clusters):
            color = colors[i]
            candidate_pose = self.candidate_matrix_poses[i]
            origin_candidate = candidate_pose[:3, 3]
            z_axis_candidate = candidate_pose[:3, 2]
            for pose in cluster:
                origin = pose[:3, 3]
                z_axis = pose[:3, 2]
                fig.add_trace(go.Cone(
                    x=[origin[0]], y=[origin[1]], z=[origin[2]],
                    u=[z_axis[0]], v=[z_axis[1]], w=[z_axis[2]],
                    colorscale=color,
                    sizemode="absolute", sizeref=0.05, anchor="tail", showscale=False
                ))
            fig.add_trace(go.Cone(
                x=[origin_candidate[0]], y=[origin_candidate[1]], z=[origin_candidate[2]],
                u=[z_axis_candidate[0]], v=[z_axis_candidate[1]], w=[z_axis_candidate[2]],
                colorscale=color,
                sizemode="absolute", sizeref=0.1, anchor="tail", showscale=False
            ))
        fig.update_layout(
            title="Clustered Poses and Input Points",
            scene=dict(
                xaxis_title='X',
                yaxis_title='Y',
                zaxis_title='Z'
            ),
            margin=dict(l=0, r=0, b=0, t=40),
            scene_camera=dict(eye=dict(x=1.25, y=1.25, z=1.25))
        )
        fig.show()
 if __name__ == "__main__":
    step = 0
    raw_predict_result = np.load(f"inference_result_pack/inference_result_pack/{step}/all_pred_pose_9d.npy")
    input_pts = np.loadtxt(f"inference_result_pack/inference_result_pack/{step}/input_pts.txt")
    print(raw_predict_result.shape)
    predict_result = PredictResult(raw_predict_result, input_pts, cluster_params=dict(eps=0.25, min_samples=3))
    print(predict_result)
    print(len(predict_result.candidate_matrix_poses))
    print(predict_result.distance_matrix)
    #import ipdb; ipdb.set_trace()
    predict_result.visualize()
--- a/configs/local/heuristic_exp_config.yaml
+++ b/configs/local/heuristic_exp_config.yaml
@@ -0,0 +1,71 @@
 runner:
  general:
    seed: 0
    device: cuda
    cuda_visible_devices: "0,1,2,3,4,5,6,7"
  experiment:
    name: exp_hemisphere_circle_trajectory
    root_dir: "experiments"
    epoch: -1 # -1 stands for last epoch
  test:
    dataset_list:
      - OmniObject3d_test
  blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"
  output_dir: "/media/hofee/data/results/nbv_rec_inference/hemisphere_random_241202"
  voxel_size: 0.003
  min_new_area: 1.0
  heuristic_method: hemisphere_random
 dataset:
  # OmniObject3d_train:
  #   root_dir: "C:\\Document\\Datasets\\inference_test1"
  #   model_dir: "C:\\Document\\Datasets\\scaled_object_meshes"
  #   source: seq_reconstruction_dataset_preprocessed
  #   split_file: "C:\\Document\\Datasets\\data_list\\sample.txt"
  #   type: test
  #   filter_degree: 75
  #   ratio: 1
  #   batch_size: 1
  #   num_workers: 12
  #   pts_num: 8192
  #   load_from_preprocess: True
  OmniObject3d_test:
    root_dir: "/media/hofee/data/data/new_testset_output"
    model_dir: "/media/hofee/data/data/scaled_object_meshes"
    source: seq_reconstruction_dataset_preprocessed
    # split_file: "C:\\Document\\Datasets\\data_list\\OmniObject3d_test.txt"
    type: test
    filter_degree: 75
    eval_list:
      - pose_diff
      - coverage_rate_increase
    ratio: 0.1
    batch_size: 1
    num_workers: 12
    pts_num: 8192
    load_from_preprocess: True
 heuristic_methods:
  hemisphere_random:
    center: [0, 0, 0]
    radius_fixed: True
    fixed_radius: 0.6
    min_radius: 0.4
    max_radius: 0.8
  hemisphere_circle_trajectory:
    center: [0, 0, 0]
    radius_fixed: False
    fixed_radius: 0.6
    min_radius: 0.4
    max_radius: 0.8
    phi_list: [15, 45, 75]
    circle_times: 12
--- a/configs/local/inference_config.yaml
+++ b/configs/local/inference_config.yaml
@@ -6,16 +6,16 @@ runner:
    cuda_visible_devices: "0,1,2,3,4,5,6,7"
  experiment:
-    name: train_ab_global_only_p++_wp
+    name: train_ab_partial
    root_dir: "experiments"
-    epoch: 922 # -1 stands for last epoch
+    epoch: -1 # -1 stands for last epoch
  test:
    dataset_list:
      - OmniObject3d_test
  blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"
-  output_dir: "/media/hofee/data/data/p++_wp"
+  output_dir: "/media/hofee/data/results/nbv_rec_inference/partial_241202"
  pipeline: nbv_reconstruction_pipeline
  voxel_size: 0.003
  min_new_area: 1.0
@@ -52,7 +52,7 @@ dataset:
 pipeline:
  nbv_reconstruction_pipeline:
    modules:
-      pts_encoder: pointnet++_encoder
+      pts_encoder: pointnet_encoder
      seq_encoder: transformer_seq_encoder
      pose_encoder: pose_encoder
      view_finder: gf_view_finder
@@ -60,10 +60,6 @@ pipeline:
    global_scanned_feat: True
 module:
  pointnet++_encoder:
    in_dim: 3
    params_name: light
  pointnet_encoder:
    in_dim: 3
    out_dim: 1024
--- a/configs/local/simulation_config.yaml
+++ b/configs/local/simulation_config.yaml
@@ -1,36 +0,0 @@
 runner:
  general:
    seed: 0
    device: cuda
    cuda_visible_devices: "0,1,2,3,4,5,6,7"
  experiment:
    name: simulation_debug
    root_dir: "experiments"
 simulation:
  robot:
    urdf_path: "assets/franka_panda/panda.urdf"
    initial_position: [0, 0, 0]  # 机械臂基座位置
    initial_orientation: [0, 0, 0]  # 机械臂基座朝向(欧拉角)
  turntable:
    radius: 0.3  # 转盘半径(米)
    height: 0.1  # 转盘高度
    center_position: [0.8, 0, 0.4]  
  target:
    obj_dir: /media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/assets/object_meshes
    obj_name: "google_scan-box_0185"        
    scale: 1.0  # 缩放系数
    mass: 0.1   # 质量(kg)
    rgba_color: [0.8, 0.8, 0.8, 1.0]  # 目标物体颜色
  camera:
    width: 640
    height: 480
    fov: 40
    near: 0.01
    far: 5.0
 displaytable:
--- a/configs/local/strategy_generate_config.yaml
+++ b/configs/local/strategy_generate_config.yaml
@@ -22,6 +22,6 @@ runner:
 datasets:
    OmniObject3d:
-      root_dir: /media/hofee/data/data/test_bottle/view 
+      root_dir: /data/hofee/nbv_rec_part2_preprocessed
-      from: 0
+      from: 155
-      to: -1 # ..-1 means end
+      to: 165 # ..-1 means end
--- a/configs/local/view_generate_config.yaml
+++ b/configs/local/view_generate_config.yaml
@@ -8,16 +8,16 @@ runner:
    root_dir: experiments
  generate:
    port: 5002
-    from: 0
+    from: 1
    to: 50 # -1 means all
-    object_dir: /media/hofee/data/data/test_bottle/bottle_mesh
+    object_dir: C:\\Document\\Datasets\\scaled_object_meshes
-    table_model_path: /media/hofee/data/data/others/table.obj
+    table_model_path: C:\\Document\\Datasets\\table.obj
-    output_dir: /media/hofee/data/data/test_bottle/view
+    output_dir: C:\\Document\\Datasets\\debug_generate_view
    binocular_vision: true
    plane_size: 10
    max_views: 512
    min_views: 128
-    random_view_ratio: 0.002
+    random_view_ratio: 0.02
    min_cam_table_included_degree: 20
    max_diag: 0.7
    min_diag: 0.01
@@ -34,7 +34,7 @@ runner:
        max_y: 0.05
        min_z: 0.01
        max_z: 0.01
-        random_rotation_ratio: 0.0
+        random_rotation_ratio: 0.3
      random_objects:
        num: 4
        cluster: 0.9
--- a/configs/server/server_train_config.yaml
+++ b/configs/server/server_train_config.yaml
@@ -3,17 +3,17 @@ runner:
  general:
    seed: 0
    device: cuda
-    cuda_visible_devices: "2"
+    cuda_visible_devices: "0"
    parallel: False
  experiment:
-    name: newtrain_real_global_only
+    name: train_ab_global_only
    root_dir: "experiments"
-    use_checkpoint: False
+    use_checkpoint: True
    epoch: -1 # -1 stands for last epoch
    max_epochs: 5000
    save_checkpoint_interval: 1
-    test_first: False  
+    test_first: True  
  train:
    optimizer:
@@ -28,18 +28,18 @@ runner:
      - OmniObject3d_test
      - OmniObject3d_val
-  pipeline: nbv_reconstruction_pipeline_global_only
+  pipeline: nbv_reconstruction_pipeline
 dataset:
  OmniObject3d_train:
    root_dir: "/data/hofee/data/new_full_data"
    model_dir: "../data/scaled_object_meshes"
    source: nbv_reconstruction_dataset
-    split_file: "/data/hofee/data/new_full_data_list/new_OmniObject3d_train.txt"
+    split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
    type: train
    cache: True
    ratio: 1
-    batch_size: 24
+    batch_size: 80
    num_workers: 128
    pts_num: 8192
    load_from_preprocess: True
@@ -48,14 +48,14 @@ dataset:
    root_dir: "/data/hofee/data/new_full_data"
    model_dir: "../data/scaled_object_meshes"
    source: nbv_reconstruction_dataset
-    split_file: "/data/hofee/data/new_full_data_list/new_OmniObject3d_test.txt"
+    split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_test.txt"
    type: test
    cache: True
    filter_degree: 75
    eval_list:
      - pose_diff
    ratio: 1
-    batch_size: 32
+    batch_size: 80
    num_workers: 12
    pts_num: 8192
    load_from_preprocess: True
@@ -64,39 +64,23 @@ dataset:
    root_dir: "/data/hofee/data/new_full_data"
    model_dir: "../data/scaled_object_meshes"
    source: nbv_reconstruction_dataset
-    split_file: "/data/hofee/data/new_full_data_list/new_OmniObject3d_train.txt"
+    split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
    type: test
    cache: True
    filter_degree: 75
    eval_list:
      - pose_diff
    ratio: 0.1
-    batch_size: 32
+    batch_size: 80
    num_workers: 12
    pts_num: 8192
    load_from_preprocess: True
 pipeline:
-  nbv_reconstruction_pipeline_local:
+  nbv_reconstruction_pipeline:
    modules:
-      pts_encoder: pointnet++_encoder
+      pts_encoder: pointnet_encoder
      seq_encoder: transformer_seq_encoder
      pose_encoder: pose_encoder
      view_finder: gf_view_finder
    eps: 1e-5
    global_scanned_feat: True
  nbv_reconstruction_pipeline_global:
    modules:
      pts_encoder: pointnet++_encoder
      seq_encoder: transformer_seq_encoder
      pose_encoder: pose_encoder
      view_finder: gf_view_finder
    eps: 1e-5
    global_scanned_feat: True
  nbv_reconstruction_pipeline_local_only:
    modules:
      pts_encoder: pointnet++_encoder
      seq_encoder: transformer_seq_encoder
      pose_encoder: pose_encoder
      view_finder: gf_view_finder
@@ -112,11 +96,8 @@ module:
    global_feat: True
    feature_transform: False
  pointnet++_encoder:
    in_dim: 3
  transformer_seq_encoder:
-    embed_dim: 1280
+    embed_dim: 256
    num_heads: 4
    ffn_dim: 256
    num_layers: 3
@@ -125,7 +106,7 @@ module:
  gf_view_finder:
    t_feat_dim: 128
    pose_feat_dim: 256
-    main_feat_dim: 1024
+    main_feat_dim: 2048
    regression_head: Rx_Ry_and_T
    pose_mode: rot_matrix
    per_point_feature: False
--- a/core/ab_global_only_pts_pipeline.py
+++ b/core/ab_global_only_pts_pipeline.py
@@ -1,81 +0,0 @@
 import torch
 from torch import nn
 import PytorchBoot.namespace as namespace
 import PytorchBoot.stereotype as stereotype
 from PytorchBoot.factory.component_factory import ComponentFactory
 from PytorchBoot.utils import Log
@stereotype.pipeline("nbv_reconstruction_pipeline_global_only")
 class NBVReconstructionGlobalPointsOnlyPipeline(nn.Module):
    def __init__(self, config):
        super(NBVReconstructionGlobalPointsOnlyPipeline, self).__init__()
        self.config = config
        self.module_config = config["modules"]
        self.pts_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pts_encoder"])
        self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pose_encoder"])
        self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["view_finder"])
        self.eps = float(self.config["eps"])
        self.enable_global_scanned_feat = self.config["global_scanned_feat"]
    def forward(self, data):
        mode = data["mode"]
        if mode == namespace.Mode.TRAIN:
            return self.forward_train(data)
        elif mode == namespace.Mode.TEST:
            return self.forward_test(data)
        else:
            Log.error("Unknown mode: {}".format(mode), True)
    def pertube_data(self, gt_delta_9d):
        bs = gt_delta_9d.shape[0]
        random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps
        random_t = random_t.unsqueeze(-1)
        mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t)
        std = std.view(-1, 1)
        z = torch.randn_like(gt_delta_9d)
        perturbed_x = mu + z * std
        target_score = - z * std / (std ** 2)
        return perturbed_x, random_t, target_score, std
    def forward_train(self, data):
        main_feat = self.get_main_feat(data)
        ''' get std '''
        best_to_world_pose_9d_batch = data["best_to_world_pose_9d"]
        perturbed_x, random_t, target_score, std = self.pertube_data(best_to_world_pose_9d_batch)
        input_data = {
            "sampled_pose": perturbed_x,
            "t": random_t,
            "main_feat": main_feat,
        }
        estimated_score = self.view_finder(input_data)
        output = {
            "estimated_score": estimated_score,
            "target_score": target_score,
            "std": std
        }
        return output
    def forward_test(self,data):
        main_feat = self.get_main_feat(data)
        estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(main_feat)
        result = {
            "pred_pose_9d": estimated_delta_rot_9d,
            "in_process_sample": in_process_sample
        }
        return result
    def get_main_feat(self, data):
        combined_scanned_pts_batch = data['combined_scanned_pts']   
        global_scanned_feat = self.pts_encoder.encode_points(combined_scanned_pts_batch)
        main_feat = global_scanned_feat
        if torch.isnan(main_feat).any():
            Log.error("nan in main_feat", True)
        return main_feat
--- a/core/ab_local_only_pts_pipeline.py
+++ b/core/ab_local_only_pts_pipeline.py
@@ -1,91 +0,0 @@
 import torch
 from torch import nn
 import PytorchBoot.namespace as namespace
 import PytorchBoot.stereotype as stereotype
 from PytorchBoot.factory.component_factory import ComponentFactory
 from PytorchBoot.utils import Log
@stereotype.pipeline("nbv_reconstruction_pipeline_local_only")
 class NBVReconstructionLocalPointsOnlyPipeline(nn.Module):
    def __init__(self, config):
        super(NBVReconstructionLocalPointsOnlyPipeline, self).__init__()
        self.config = config
        self.module_config = config["modules"]
        self.pts_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pts_encoder"])
        self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pose_encoder"])
        self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["seq_encoder"])
        self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["view_finder"])
        self.eps = float(self.config["eps"])
        self.enable_global_scanned_feat = self.config["global_scanned_feat"]
    def forward(self, data):
        mode = data["mode"]
        if mode == namespace.Mode.TRAIN:
            return self.forward_train(data)
        elif mode == namespace.Mode.TEST:
            return self.forward_test(data)
        else:
            Log.error("Unknown mode: {}".format(mode), True)
    def pertube_data(self, gt_delta_9d):
        bs = gt_delta_9d.shape[0]
        random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps
        random_t = random_t.unsqueeze(-1)
        mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t)
        std = std.view(-1, 1)
        z = torch.randn_like(gt_delta_9d)
        perturbed_x = mu + z * std
        target_score = - z * std / (std ** 2)
        return perturbed_x, random_t, target_score, std
    def forward_train(self, data):
        main_feat = self.get_main_feat(data)
        ''' get std '''
        best_to_world_pose_9d_batch = data["best_to_world_pose_9d"]
        perturbed_x, random_t, target_score, std = self.pertube_data(best_to_world_pose_9d_batch)
        input_data = {
            "sampled_pose": perturbed_x,
            "t": random_t,
            "main_feat": main_feat,
        }
        estimated_score = self.view_finder(input_data)
        output = {
            "estimated_score": estimated_score,
            "target_score": target_score,
            "std": std
        }
        return output
    def forward_test(self,data):
        main_feat = self.get_main_feat(data)
        estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(main_feat)
        result = {
            "pred_pose_9d": estimated_delta_rot_9d,
            "in_process_sample": in_process_sample
        }
        return result
    def get_main_feat(self, data):
        scanned_pts_batch = data['scanned_pts']
        scanned_n_to_world_pose_9d_batch = data['scanned_n_to_world_pose_9d']
        device = next(self.parameters()).device
        feat_seq_list = []
        for scanned_pts,scanned_n_to_world_pose_9d in zip(scanned_pts_batch,scanned_n_to_world_pose_9d_batch):
            scanned_pts = scanned_pts.to(device)
            scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device)
            pts_feat = self.pts_encoder.encode_points(scanned_pts)
            pose_feat = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d)
            seq_feat = torch.cat([pts_feat, pose_feat], dim=-1)
            feat_seq_list.append(seq_feat)
        main_feat = self.seq_encoder.encode_sequence(feat_seq_list)
        if torch.isnan(main_feat).any():
            Log.error("nan in main_feat", True)
        return main_feat
--- a/core/global_pts_pipeline.py
+++ b/core/global_pts_pipeline.py
@@ -6,7 +6,7 @@ from PytorchBoot.factory.component_factory import ComponentFactory
 from PytorchBoot.utils import Log
-@stereotype.pipeline("nbv_reconstruction_pipeline_global")
+@stereotype.pipeline("nbv_reconstruction_global_pts_pipeline")
 class NBVReconstructionGlobalPointsPipeline(nn.Module):
    def __init__(self, config):
        super(NBVReconstructionGlobalPointsPipeline, self).__init__()
@@ -14,7 +14,7 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module):
        self.module_config = config["modules"]
        self.pts_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pts_encoder"])
        self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pose_encoder"])
-        self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["seq_encoder"])
+        self.pose_seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pose_seq_encoder"])
        self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["view_finder"])
        self.eps = float(self.config["eps"])
        self.enable_global_scanned_feat = self.config["global_scanned_feat"]
@@ -73,13 +73,13 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module):
        device = next(self.parameters()).device
-        feat_seq_list = []
+        pose_feat_seq_list = []
        for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch:
            scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device)
-            feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d))
+            pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d))
-        main_feat = self.seq_encoder.encode_sequence(feat_seq_list)
+        main_feat = self.pose_seq_encoder.encode_sequence(pose_feat_seq_list)
        combined_scanned_pts_batch = data['combined_scanned_pts']   
--- a/core/local_pts_pipeline.py
+++ b/core/local_pts_pipeline.py
@@ -5,7 +5,7 @@ import PytorchBoot.stereotype as stereotype
 from PytorchBoot.factory.component_factory import ComponentFactory
 from PytorchBoot.utils import Log
-@stereotype.pipeline("nbv_reconstruction_pipeline_local")
+@stereotype.pipeline("nbv_reconstruction_local_pts_pipeline")
 class NBVReconstructionLocalPointsPipeline(nn.Module):
    def __init__(self, config):
        super(NBVReconstructionLocalPointsPipeline, self).__init__()
@@ -70,18 +70,23 @@ class NBVReconstructionLocalPointsPipeline(nn.Module):
    def get_main_feat(self, data):
        scanned_pts_batch = data['scanned_pts']
        scanned_n_to_world_pose_9d_batch = data['scanned_n_to_world_pose_9d']
        device = next(self.parameters()).device
-        feat_seq_list = []
+
        pts_feat_seq_list = []
        pose_feat_seq_list = []
        for scanned_pts,scanned_n_to_world_pose_9d in zip(scanned_pts_batch,scanned_n_to_world_pose_9d_batch):
            scanned_pts = scanned_pts.to(device)
            scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device)
-            pts_feat = self.pts_encoder.encode_points(scanned_pts)
+            pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts))
-            pose_feat = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d)
+            pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d))
-            seq_feat = torch.cat([pts_feat, pose_feat], dim=-1)
+
-            feat_seq_list.append(seq_feat)
+        main_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list)
        main_feat = self.seq_encoder.encode_sequence(feat_seq_list)
        if self.enable_global_scanned_feat:
            combined_scanned_pts_batch = data['combined_scanned_pts']   
--- a/core/nbv_dataset.py
+++ b/core/nbv_dataset.py
@@ -7,7 +7,6 @@ from PytorchBoot.utils.log_util import Log
 import torch
 import os
 import sys
 import time
 sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction")
@@ -115,13 +114,8 @@ class NBVReconstructionDataset(BaseDataset):
        except Exception as e:
            Log.error(f"Save cache failed: {e}")
-    def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003):
+    def voxel_downsample_with_mask(self, pts, voxel_size):
-        voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
+        pass
        unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
        idx_sort = np.argsort(inverse)
        idx_unique = idx_sort[np.cumsum(counts)-counts]
        downsampled_points = point_cloud[idx_unique]
        return downsampled_points, inverse
    def __getitem__(self, index):
@@ -135,9 +129,6 @@ class NBVReconstructionDataset(BaseDataset):
            scanned_coverages_rate,
            scanned_n_to_world_pose,
        ) = ([], [], [])
        #start_time = time.time()
        start_indices = [0]
        total_points = 0
        for view in scanned_views:
            frame_idx = view[0]
            coverage_rate = view[1]
@@ -159,12 +150,8 @@ class NBVReconstructionDataset(BaseDataset):
            n_to_world_trans = n_to_world_pose[:3, 3]
            n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
            scanned_n_to_world_pose.append(n_to_world_9d)
            total_points += len(downsampled_target_point_cloud)
            start_indices.append(total_points)
        #end_time = time.time()
        #Log.info(f"load data time: {end_time - start_time}")
        nbv_idx, nbv_coverage_rate = nbv[0], nbv[1]
        nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx)
        cam_info = DataLoadUtil.load_cam_info(nbv_path)
@@ -179,25 +166,12 @@ class NBVReconstructionDataset(BaseDataset):
        )
        combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0)
-        voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_views_pts, 0.003)
+        voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
-        random_downsampled_combined_scanned_pts_np, random_downsample_idx = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num, require_idx=True)
+        random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num)
        # all_idx_unique = np.arange(len(voxel_downsampled_combined_scanned_pts_np))
        # all_random_downsample_idx = all_idx_unique[random_downsample_idx]
        # scanned_pts_mask = []
        # for idx, start_idx in enumerate(start_indices):
        #     if idx == len(start_indices) - 1:
        #         break
        #     end_idx = start_indices[idx+1]
        #     view_inverse = inverse[start_idx:end_idx]
        #     view_unique_downsampled_idx = np.unique(view_inverse)
        #     view_unique_downsampled_idx_set = set(view_unique_downsampled_idx)
        #     mask = np.array([idx in view_unique_downsampled_idx_set for idx in all_random_downsample_idx])
        #     #scanned_pts_mask.append(mask)
        data_item = {
            "scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
            "combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3)
            #"scanned_pts_mask": np.asarray(scanned_pts_mask, dtype=np.bool), # Ndarray(N)
            "scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1)
            "scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
            "best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1)
@@ -223,9 +197,7 @@ class NBVReconstructionDataset(BaseDataset):
            collate_data["scanned_n_to_world_pose_9d"] = [
                torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch
            ]
-            # collate_data["scanned_pts_mask"] = [
+            
            #     torch.tensor(item["scanned_pts_mask"]) for item in batch
            # ]
            ''' ------ Fixed Length ------ '''
            collate_data["best_to_world_pose_9d"] = torch.stack(
@@ -234,14 +206,12 @@ class NBVReconstructionDataset(BaseDataset):
            collate_data["combined_scanned_pts"] = torch.stack(
                [torch.tensor(item["combined_scanned_pts"]) for item in batch]
            )
            for key in batch[0].keys():
                if key not in [
                    "scanned_pts",
                    "scanned_n_to_world_pose_9d",
                    "best_to_world_pose_9d",
                    "combined_scanned_pts",
                    "scanned_pts_mask",
                ]:
                    collate_data[key] = [item[key] for item in batch]
            return collate_data
@@ -257,9 +227,9 @@ if __name__ == "__main__":
    torch.manual_seed(seed)
    np.random.seed(seed)
    config = {
-        "root_dir": "/data/hofee/nbv_rec_part2_preprocessed",
+        "root_dir": "/data/hofee/data/packed_preprocessed_data",
        "source": "nbv_reconstruction_dataset",
-        "split_file": "/data/hofee/data/sample.txt",
+        "split_file": "/data/hofee/data/OmniObject3d_train.txt",
        "load_from_preprocess": True,
        "ratio": 0.5,
        "batch_size": 2,
--- a/core/pipeline.py
+++ b/core/pipeline.py
@@ -75,8 +75,6 @@ class NBVReconstructionPipeline(nn.Module):
    def forward_test(self, data):
        main_feat = self.get_main_feat(data)
        repeat_num = data.get("repeat_num", 1)
        main_feat = main_feat.repeat(repeat_num, 1)
        estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(
            main_feat
        )
@@ -92,8 +90,6 @@ class NBVReconstructionPipeline(nn.Module):
        ]  # List(B): Tensor(S x 9)
        scanned_pts_mask_batch = data["scanned_pts_mask"] # List(B): Tensor(S x N)
        scanned_pts_mask_batch = data["scanned_pts_mask"] # List(B): Tensor(N)
        device = next(self.parameters()).device
        embedding_list_batch = []
--- a/core/seq_dataset.py
+++ b/core/seq_dataset.py
@@ -55,7 +55,6 @@ class SeqReconstructionDataset(BaseDataset):
    def get_scene_name_list(self):
        return self.scene_name_list
    def get_datalist(self):
        datalist = []
        total = len(self.scene_name_list)
@@ -64,15 +63,11 @@ class SeqReconstructionDataset(BaseDataset):
            scene_max_cr_idx = 0
            frame_len = DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)
-            for i in range(10,frame_len):
+            for i in range(frame_len):
                path = DataLoadUtil.get_path(self.root_dir, scene_name, i)
                pts = DataLoadUtil.load_from_preprocessed_pts(path, "npy")
                print(pts.shape)
                if pts.shape[0] == 0:
                    continue
                else:
                    break
            print(i)
            datalist.append({
                "scene_name": scene_name,
                "first_frame": i,
@@ -184,9 +179,9 @@ if __name__ == "__main__":
    np.random.seed(seed)
    config = {
-        "root_dir": "/media/hofee/data/data/test_bottle/view",
+        "root_dir": "/media/hofee/data/data/new_testset",
        "source": "seq_reconstruction_dataset",
-        "split_file": "/media/hofee/data/data/test_bottle/test_bottle.txt",
+        "split_file": "/media/hofee/data/data/OmniObject3d_test.txt",
        "load_from_preprocess": True,
        "filter_degree": 75,
        "num_workers": 0,
@@ -194,7 +189,7 @@ if __name__ == "__main__":
        "type": namespace.Mode.TEST,
    }
-    output_dir = "/media/hofee/data/data/test_bottle/preprocessed_dataset"
+    output_dir = "/media/hofee/data/data/new_testset_output"
    os.makedirs(output_dir, exist_ok=True)
    ds = SeqReconstructionDataset(config)
--- a/core/seq_dataset_preprocessed.py
+++ b/core/seq_dataset_preprocessed.py
@@ -66,7 +66,7 @@ if __name__ == "__main__":
    load_from_preprocess: True
    '''
    config = {
-        "root_dir": "/media/hofee/data/data/test_bottle/preprocessed_dataset",
+        "root_dir": "H:\\AI\\Datasets\\packed_test_data",
        "source": "seq_reconstruction_dataset",
        "split_file": "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt",
        "load_from_preprocess": True,
--- a/modules/module_lib/pointnet2_modules.py
+++ b/modules/module_lib/pointnet2_modules.py
@@ -1,162 +0,0 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from . import pointnet2_utils
 from . import pytorch_utils as pt_utils
 from typing import List
 class _PointnetSAModuleBase(nn.Module):
    def __init__(self):
        super().__init__()
        self.npoint = None
        self.groupers = None
        self.mlps = None
        self.pool_method = 'max_pool'
    def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor):
        """
        :param xyz: (B, N, 3) tensor of the xyz coordinates of the features
        :param features: (B, N, C) tensor of the descriptors of the the features
        :param new_xyz:
        :return:
            new_xyz: (B, npoint, 3) tensor of the new features' xyz
            new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors
        """
        new_features_list = []
        xyz_flipped = xyz.transpose(1, 2).contiguous()
        if new_xyz is None:
            new_xyz = pointnet2_utils.gather_operation(
                xyz_flipped,
                pointnet2_utils.furthest_point_sample(xyz, self.npoint)
            ).transpose(1, 2).contiguous() if self.npoint is not None else None
        for i in range(len(self.groupers)):
            new_features = self.groupers[i](xyz, new_xyz, features)  # (B, C, npoint, nsample)
            new_features = self.mlps[i](new_features)  # (B, mlp[-1], npoint, nsample)
            if self.pool_method == 'max_pool':
                new_features = F.max_pool2d(
                    new_features, kernel_size=[1, new_features.size(3)]
                )  # (B, mlp[-1], npoint, 1)
            elif self.pool_method == 'avg_pool':
                new_features = F.avg_pool2d(
                    new_features, kernel_size=[1, new_features.size(3)]
                )  # (B, mlp[-1], npoint, 1)
            else:
                raise NotImplementedError
            new_features = new_features.squeeze(-1)  # (B, mlp[-1], npoint)
            new_features_list.append(new_features)
        return new_xyz, torch.cat(new_features_list, dim=1)
 class PointnetSAModuleMSG(_PointnetSAModuleBase):
    """Pointnet set abstraction layer with multiscale grouping"""
    def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True,
                 use_xyz: bool = True, pool_method='max_pool', instance_norm=False):
        """
        :param npoint: int
        :param radii: list of float, list of radii to group with
        :param nsamples: list of int, number of samples in each ball query
        :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale
        :param bn: whether to use batchnorm
        :param use_xyz:
        :param pool_method: max_pool / avg_pool
        :param instance_norm: whether to use instance_norm
        """
        super().__init__()
        assert len(radii) == len(nsamples) == len(mlps)
        self.npoint = npoint
        self.groupers = nn.ModuleList()
        self.mlps = nn.ModuleList()
        for i in range(len(radii)):
            radius = radii[i]
            nsample = nsamples[i]
            self.groupers.append(
                pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz)
                if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
            )
            mlp_spec = mlps[i]
            if use_xyz:
                mlp_spec[0] += 3
            self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm))
        self.pool_method = pool_method
 class PointnetSAModule(PointnetSAModuleMSG):
    """Pointnet set abstraction layer"""
    def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None,
                 bn: bool = True, use_xyz: bool = True, pool_method='max_pool', instance_norm=False):
        """
        :param mlp: list of int, spec of the pointnet before the global max_pool
        :param npoint: int, number of features
        :param radius: float, radius of ball
        :param nsample: int, number of samples in the ball query
        :param bn: whether to use batchnorm
        :param use_xyz:
        :param pool_method: max_pool / avg_pool
        :param instance_norm: whether to use instance_norm
        """
        super().__init__(
            mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz,
            pool_method=pool_method, instance_norm=instance_norm
        )
 class PointnetFPModule(nn.Module):
    r"""Propigates the features of one set to another"""
    def __init__(self, *, mlp: List[int], bn: bool = True):
        """
        :param mlp: list of int
        :param bn: whether to use batchnorm
        """
        super().__init__()
        self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
    def forward(
            self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor
    ) -> torch.Tensor:
        """
        :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features
        :param known: (B, m, 3) tensor of the xyz positions of the known features
        :param unknow_feats: (B, C1, n) tensor of the features to be propigated to
        :param known_feats: (B, C2, m) tensor of features to be propigated
        :return:
            new_features: (B, mlp[-1], n) tensor of the features of the unknown features
        """
        if known is not None:
            dist, idx = pointnet2_utils.three_nn(unknown, known)
            dist_recip = 1.0 / (dist + 1e-8)
            norm = torch.sum(dist_recip, dim=2, keepdim=True)
            weight = dist_recip / norm
            interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)
        else:
            interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1))
        if unknow_feats is not None:
            new_features = torch.cat([interpolated_feats, unknow_feats], dim=1)  # (B, C2 + C1, n)
        else:
            new_features = interpolated_feats
        new_features = new_features.unsqueeze(-1)
        new_features = self.mlp(new_features)
        return new_features.squeeze(-1)
 if __name__ == "__main__":
    pass
--- a/modules/module_lib/pointnet2_utils.py
+++ b/modules/module_lib/pointnet2_utils.py
@@ -1,291 +0,0 @@
 import torch
 from torch.autograd import Variable
 from torch.autograd import Function
 import torch.nn as nn
 from typing import Tuple
 import sys
 import pointnet2_cuda as pointnet2
 class FurthestPointSampling(Function):
    @staticmethod
    def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
        """
        Uses iterative furthest point sampling to select a set of npoint features that have the largest
        minimum distance
        :param ctx:
        :param xyz: (B, N, 3) where N > npoint
        :param npoint: int, number of features in the sampled set
        :return:
             output: (B, npoint) tensor containing the set
        """
        assert xyz.is_contiguous()
        B, N, _ = xyz.size()
        output = torch.cuda.IntTensor(B, npoint)
        temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
        pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output)
        return output
    @staticmethod
    def backward(xyz, a=None):
        return None, None
 furthest_point_sample = FurthestPointSampling.apply
 class GatherOperation(Function):
    @staticmethod
    def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
        """
        :param ctx:
        :param features: (B, C, N)
        :param idx: (B, npoint) index tensor of the features to gather
        :return:
            output: (B, C, npoint)
        """
        assert features.is_contiguous()
        assert idx.is_contiguous()
        B, npoint = idx.size()
        _, C, N = features.size()
        output = torch.cuda.FloatTensor(B, C, npoint)
        pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output)
        ctx.for_backwards = (idx, C, N)
        return output
    @staticmethod
    def backward(ctx, grad_out):
        idx, C, N = ctx.for_backwards
        B, npoint = idx.size()
        grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
        grad_out_data = grad_out.data.contiguous()
        pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data)
        return grad_features, None
 gather_operation = GatherOperation.apply
 class ThreeNN(Function):
    @staticmethod
    def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Find the three nearest neighbors of unknown in known
        :param ctx:
        :param unknown: (B, N, 3)
        :param known: (B, M, 3)
        :return:
            dist: (B, N, 3) l2 distance to the three nearest neighbors
            idx: (B, N, 3) index of 3 nearest neighbors
        """
        assert unknown.is_contiguous()
        assert known.is_contiguous()
        B, N, _ = unknown.size()
        m = known.size(1)
        dist2 = torch.cuda.FloatTensor(B, N, 3)
        idx = torch.cuda.IntTensor(B, N, 3)
        pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)
        return torch.sqrt(dist2), idx
    @staticmethod
    def backward(ctx, a=None, b=None):
        return None, None
 three_nn = ThreeNN.apply
 class ThreeInterpolate(Function):
    @staticmethod
    def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
        """
        Performs weight linear interpolation on 3 features
        :param ctx:
        :param features: (B, C, M) Features descriptors to be interpolated from
        :param idx: (B, n, 3) three nearest neighbors of the target features in features
        :param weight: (B, n, 3) weights
        :return:
            output: (B, C, N) tensor of the interpolated features
        """
        assert features.is_contiguous()
        assert idx.is_contiguous()
        assert weight.is_contiguous()
        B, c, m = features.size()
        n = idx.size(1)
        ctx.three_interpolate_for_backward = (idx, weight, m)
        output = torch.cuda.FloatTensor(B, c, n)
        pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output)
        return output
    @staticmethod
    def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        :param ctx:
        :param grad_out: (B, C, N) tensor with gradients of outputs
        :return:
            grad_features: (B, C, M) tensor with gradients of features
            None:
            None:
        """
        idx, weight, m = ctx.three_interpolate_for_backward
        B, c, n = grad_out.size()
        grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_())
        grad_out_data = grad_out.data.contiguous()
        pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data)
        return grad_features, None, None
 three_interpolate = ThreeInterpolate.apply
 class GroupingOperation(Function):
    @staticmethod
    def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
        """
        :param ctx:
        :param features: (B, C, N) tensor of features to group
        :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with
        :return:
            output: (B, C, npoint, nsample) tensor
        """
        assert features.is_contiguous()
        assert idx.is_contiguous()
        B, nfeatures, nsample = idx.size()
        _, C, N = features.size()
        output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
        pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output)
        ctx.for_backwards = (idx, N)
        return output
    @staticmethod
    def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        :param ctx:
        :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward
        :return:
            grad_features: (B, C, N) gradient of the features
        """
        idx, N = ctx.for_backwards
        B, C, npoint, nsample = grad_out.size()
        grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
        grad_out_data = grad_out.data.contiguous()
        pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data)
        return grad_features, None
 grouping_operation = GroupingOperation.apply
 class BallQuery(Function):
    @staticmethod
    def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor:
        """
        :param ctx:
        :param radius: float, radius of the balls
        :param nsample: int, maximum number of features in the balls
        :param xyz: (B, N, 3) xyz coordinates of the features
        :param new_xyz: (B, npoint, 3) centers of the ball query
        :return:
            idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls
        """
        assert new_xyz.is_contiguous()
        assert xyz.is_contiguous()
        B, N, _ = xyz.size()
        npoint = new_xyz.size(1)
        idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()
        pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx)
        return idx
    @staticmethod
    def backward(ctx, a=None):
        return None, None, None, None
 ball_query = BallQuery.apply
 class QueryAndGroup(nn.Module):
    def __init__(self, radius: float, nsample: int, use_xyz: bool = True):
        """
        :param radius: float, radius of ball
        :param nsample: int, maximum number of features to gather in the ball
        :param use_xyz:
        """
        super().__init__()
        self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
    def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]:
        """
        :param xyz: (B, N, 3) xyz coordinates of the features
        :param new_xyz: (B, npoint, 3) centroids
        :param features: (B, C, N) descriptors of the features
        :return:
            new_features: (B, 3 + C, npoint, nsample)
        """
        idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
        xyz_trans = xyz.transpose(1, 2).contiguous()
        grouped_xyz = grouping_operation(xyz_trans, idx)  # (B, 3, npoint, nsample)
        grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
        if features is not None:
            grouped_features = grouping_operation(features, idx)
            if self.use_xyz:
                new_features = torch.cat([grouped_xyz, grouped_features], dim=1)  # (B, C + 3, npoint, nsample)
            else:
                new_features = grouped_features
        else:
            assert self.use_xyz, "Cannot have not features and not use xyz as a feature!"
            new_features = grouped_xyz
        return new_features
 class GroupAll(nn.Module):
    def __init__(self, use_xyz: bool = True):
        super().__init__()
        self.use_xyz = use_xyz
    def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None):
        """
        :param xyz: (B, N, 3) xyz coordinates of the features
        :param new_xyz: ignored
        :param features: (B, C, N) descriptors of the features
        :return:
            new_features: (B, C + 3, 1, N)
        """
        grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
        if features is not None:
            grouped_features = features.unsqueeze(2)
            if self.use_xyz:
                new_features = torch.cat([grouped_xyz, grouped_features], dim=1)  # (B, 3 + C, 1, N)
            else:
                new_features = grouped_features
        else:
            new_features = grouped_xyz
        return new_features
--- a/modules/module_lib/pytorch_utils.py
+++ b/modules/module_lib/pytorch_utils.py
@@ -1,236 +0,0 @@
 import torch.nn as nn
 from typing import List, Tuple
 class SharedMLP(nn.Sequential):
    def __init__(
            self,
            args: List[int],
            *,
            bn: bool = False,
            activation=nn.ReLU(inplace=True),
            preact: bool = False,
            first: bool = False,
            name: str = "",
            instance_norm: bool = False,
    ):
        super().__init__()
        for i in range(len(args) - 1):
            self.add_module(
                name + 'layer{}'.format(i),
                Conv2d(
                    args[i],
                    args[i + 1],
                    bn=(not first or not preact or (i != 0)) and bn,
                    activation=activation
                    if (not first or not preact or (i != 0)) else None,
                    preact=preact,
                    instance_norm=instance_norm
                )
            )
 class _ConvBase(nn.Sequential):
    def __init__(
            self,
            in_size,
            out_size,
            kernel_size,
            stride,
            padding,
            activation,
            bn,
            init,
            conv=None,
            batch_norm=None,
            bias=True,
            preact=False,
            name="",
            instance_norm=False,
            instance_norm_func=None
    ):
        super().__init__()
        bias = bias and (not bn)
        conv_unit = conv(
            in_size,
            out_size,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias
        )
        init(conv_unit.weight)
        if bias:
            nn.init.constant_(conv_unit.bias, 0)
        if bn:
            if not preact:
                bn_unit = batch_norm(out_size)
            else:
                bn_unit = batch_norm(in_size)
        if instance_norm:
            if not preact:
                in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False)
            else:
                in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False)
        if preact:
            if bn:
                self.add_module(name + 'bn', bn_unit)
            if activation is not None:
                self.add_module(name + 'activation', activation)
            if not bn and instance_norm:
                self.add_module(name + 'in', in_unit)
        self.add_module(name + 'conv', conv_unit)
        if not preact:
            if bn:
                self.add_module(name + 'bn', bn_unit)
            if activation is not None:
                self.add_module(name + 'activation', activation)
            if not bn and instance_norm:
                self.add_module(name + 'in', in_unit)
 class _BNBase(nn.Sequential):
    def __init__(self, in_size, batch_norm=None, name=""):
        super().__init__()
        self.add_module(name + "bn", batch_norm(in_size))
        nn.init.constant_(self[0].weight, 1.0)
        nn.init.constant_(self[0].bias, 0)
 class BatchNorm1d(_BNBase):
    def __init__(self, in_size: int, *, name: str = ""):
        super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
 class BatchNorm2d(_BNBase):
    def __init__(self, in_size: int, name: str = ""):
        super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
 class Conv1d(_ConvBase):
    def __init__(
            self,
            in_size: int,
            out_size: int,
            *,
            kernel_size: int = 1,
            stride: int = 1,
            padding: int = 0,
            activation=nn.ReLU(inplace=True),
            bn: bool = False,
            init=nn.init.kaiming_normal_,
            bias: bool = True,
            preact: bool = False,
            name: str = "",
            instance_norm=False
    ):
        super().__init__(
            in_size,
            out_size,
            kernel_size,
            stride,
            padding,
            activation,
            bn,
            init,
            conv=nn.Conv1d,
            batch_norm=BatchNorm1d,
            bias=bias,
            preact=preact,
            name=name,
            instance_norm=instance_norm,
            instance_norm_func=nn.InstanceNorm1d
        )
 class Conv2d(_ConvBase):
    def __init__(
            self,
            in_size: int,
            out_size: int,
            *,
            kernel_size: Tuple[int, int] = (1, 1),
            stride: Tuple[int, int] = (1, 1),
            padding: Tuple[int, int] = (0, 0),
            activation=nn.ReLU(inplace=True),
            bn: bool = False,
            init=nn.init.kaiming_normal_,
            bias: bool = True,
            preact: bool = False,
            name: str = "",
            instance_norm=False
    ):
        super().__init__(
            in_size,
            out_size,
            kernel_size,
            stride,
            padding,
            activation,
            bn,
            init,
            conv=nn.Conv2d,
            batch_norm=BatchNorm2d,
            bias=bias,
            preact=preact,
            name=name,
            instance_norm=instance_norm,
            instance_norm_func=nn.InstanceNorm2d
        )
 class FC(nn.Sequential):
    def __init__(
            self,
            in_size: int,
            out_size: int,
            *,
            activation=nn.ReLU(inplace=True),
            bn: bool = False,
            init=None,
            preact: bool = False,
            name: str = ""
    ):
        super().__init__()
        fc = nn.Linear(in_size, out_size, bias=not bn)
        if init is not None:
            init(fc.weight)
        if not bn:
            nn.init.constant(fc.bias, 0)
        if preact:
            if bn:
                self.add_module(name + 'bn', BatchNorm1d(in_size))
            if activation is not None:
                self.add_module(name + 'activation', activation)
        self.add_module(name + 'fc', fc)
        if not preact:
            if bn:
                self.add_module(name + 'bn', BatchNorm1d(out_size))
            if activation is not None:
                self.add_module(name + 'activation', activation)
--- a/modules/pointnet++_encoder.py
+++ b/modules/pointnet++_encoder.py
@@ -1,149 +0,0 @@
 import torch
 import torch.nn as nn
 import os
 import sys
 path = os.path.abspath(__file__)
 for i in range(2):
    path = os.path.dirname(path)
 PROJECT_ROOT = path                 
 sys.path.append(PROJECT_ROOT)
 import PytorchBoot.stereotype as stereotype
 from modules.module_lib.pointnet2_modules import PointnetSAModuleMSG
 ClsMSG_CFG_Dense = {
    'NPOINTS': [512, 256, 128, None],
    'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]],
    'NSAMPLE': [[32, 64], [16, 32], [8, 16], [None, None]],
    'MLPS': [[[16, 16, 32], [32, 32, 64]],
             [[64, 64, 128], [64, 96, 128]],
             [[128, 196, 256], [128, 196, 256]],
             [[256, 256, 512], [256, 384, 512]]],
    'DP_RATIO': 0.5,
 }
 ClsMSG_CFG_Light = {
    'NPOINTS': [512, 256, 128, None],
    'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]],
    'NSAMPLE': [[16, 32], [16, 32], [16, 32], [None, None]],
    'MLPS': [[[16, 16, 32], [32, 32, 64]],
             [[64, 64, 128], [64, 96, 128]],
             [[128, 196, 256], [128, 196, 256]],
             [[256, 256, 512], [256, 384, 512]]],
    'DP_RATIO': 0.5,
 }
 ClsMSG_CFG_Light_2048 = {
    'NPOINTS': [512, 256, 128, None],
    'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]],
    'NSAMPLE': [[16, 32], [16, 32], [16, 32], [None, None]],
    'MLPS': [[[16, 16, 32], [32, 32, 64]],
         [[64, 64, 128], [64, 96, 128]],
         [[128, 196, 256], [128, 196, 256]],
         [[256, 256, 1024], [256, 512, 1024]]],
    'DP_RATIO': 0.5,
 }
 ClsMSG_CFG_Strong = {
    'NPOINTS': [512, 256, 128, 64, None],
    'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16],[0.16, 0.32], [None, None]],
    'NSAMPLE': [[16, 32], [16, 32], [16, 32], [16, 32], [None, None]],
    'MLPS': [[[16, 16, 32], [32, 32, 64]],
         [[64, 64, 128], [64, 96, 128]],
         [[128, 196, 256], [128, 196, 256]],
         [[256, 256, 512], [256, 512, 512]],
         [[512, 512, 2048], [512, 1024, 2048]]
         ],
    'DP_RATIO': 0.5,
 }
 ClsMSG_CFG_Lighter = {
    'NPOINTS': [512, 256, 128, 64, None],
    'RADIUS': [[0.01], [0.02], [0.04], [0.08], [None]],
    'NSAMPLE': [[64], [32], [16], [8], [None]],
    'MLPS': [[[32, 32, 64]],
             [[64, 64, 128]],
             [[128, 196, 256]],
             [[256, 256, 512]],
             [[512, 512, 1024]]],
    'DP_RATIO': 0.5,
 }
 def select_params(name):
    if name == 'light':
        return ClsMSG_CFG_Light
    elif name == 'lighter':
        return ClsMSG_CFG_Lighter
    elif name == 'dense':
        return ClsMSG_CFG_Dense
    elif name == 'light_2048':
        return ClsMSG_CFG_Light_2048
    elif name == 'strong':
        return ClsMSG_CFG_Strong
    else:
        raise NotImplementedError
 def break_up_pc(pc):
    xyz = pc[..., 0:3].contiguous()
    features = (
        pc[..., 3:].transpose(1, 2).contiguous()
        if pc.size(-1) > 3 else None
    )
    return xyz, features
@stereotype.module("pointnet++_encoder")
 class PointNet2Encoder(nn.Module):
    def encode_points(self, pts, require_per_point_feat=False):
        return self.forward(pts)
    def __init__(self, config:dict):
        super().__init__()
        channel_in = config.get("in_dim", 3) - 3
        params_name = config.get("params_name", "light")
        self.SA_modules = nn.ModuleList()
        selected_params = select_params(params_name)
        for k in range(selected_params['NPOINTS'].__len__()):
            mlps = selected_params['MLPS'][k].copy()
            channel_out = 0
            for idx in range(mlps.__len__()):
                mlps[idx] = [channel_in] + mlps[idx]
                channel_out += mlps[idx][-1]
            self.SA_modules.append(
                PointnetSAModuleMSG(
                    npoint=selected_params['NPOINTS'][k],
                    radii=selected_params['RADIUS'][k],
                    nsamples=selected_params['NSAMPLE'][k],
                    mlps=mlps,
                    use_xyz=True,
                    bn=True
                )
            )
            channel_in = channel_out
    def forward(self, point_cloud: torch.cuda.FloatTensor):
        xyz, features = break_up_pc(point_cloud)
        l_xyz, l_features = [xyz], [features]
        for i in range(len(self.SA_modules)):
            li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
            l_xyz.append(li_xyz)
            l_features.append(li_features)
        return l_features[-1].squeeze(-1)
 if __name__ == '__main__':
    seed = 100
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    net = PointNet2Encoder(config={"in_dim": 3, "params_name": "strong"}).cuda()
    pts = torch.randn(2, 2444, 3).cuda()
    print(torch.mean(pts, dim=1))
    pre = net.encode_points(pts)
    print(pre.shape)
--- a/preprocess/preprocessor.py
+++ b/preprocess/preprocessor.py
@@ -164,10 +164,10 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
 if __name__ == "__main__":
    #root = "/media/hofee/repository/new_data_with_normal"
-    root = r"/media/hofee/data/data/test_bottle/view"
+    root = r"H:\AI\Datasets\nbv_rec_part2"
    scene_list = os.listdir(root)
    from_idx = 0 # 1000
-    to_idx = len(scene_list) # 1500
+    to_idx = 600 # 1500
    cnt = 0
--- a/runners/heuristic.py
+++ b/runners/heuristic.py
@@ -0,0 +1,425 @@
 import os
 import json
 from utils.render import RenderUtil
 from utils.pose import PoseUtil
 from utils.pts import PtsUtil
 from utils.reconstruction import ReconstructionUtil
 import torch
 from tqdm import tqdm
 import numpy as np
 import pickle
 from PytorchBoot.config import ConfigManager
 import PytorchBoot.namespace as namespace
 import PytorchBoot.stereotype as stereotype
 from PytorchBoot.factory import ComponentFactory
 from PytorchBoot.dataset import BaseDataset
 from PytorchBoot.runners.runner import Runner
 from PytorchBoot.utils import Log
 from PytorchBoot.status import status_manager
 from utils.data_load import DataLoadUtil
@stereotype.runner("heuristic")
 class Heuristic(Runner):
    def __init__(self, config_path):
        super().__init__(config_path)
        self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path")
        self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir")
        self.voxel_size = ConfigManager.get(namespace.Stereotype.RUNNER, "voxel_size")
        self.min_new_area = ConfigManager.get(namespace.Stereotype.RUNNER, "min_new_area")
        self.heuristic_method = ConfigManager.get(namespace.Stereotype.RUNNER, "heuristic_method")
        self.heuristic_method_config = ConfigManager.get("heuristic_methods", self.heuristic_method)
        CM = 0.01
        self.min_new_pts_num = self.min_new_area * (CM / self.voxel_size) **2
        ''' Experiment '''
        self.load_experiment("nbv_evaluator")
        self.stat_result_path = os.path.join(self.output_dir, "stat.json")
        if os.path.exists(self.stat_result_path):
            with open(self.stat_result_path, "r") as f:
                self.stat_result = json.load(f)
        else:
            self.stat_result = {}
        ''' Test '''
        self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST)
        self.test_dataset_name_list = self.test_config["dataset_list"]
        self.test_set_list = []
        self.test_writer_list = []
        seen_name = set()
        for test_dataset_name in self.test_dataset_name_list:
            if test_dataset_name not in seen_name:
                seen_name.add(test_dataset_name)
            else:
                raise ValueError("Duplicate test dataset name: {}".format(test_dataset_name))
            test_set: BaseDataset = ComponentFactory.create(namespace.Stereotype.DATASET, test_dataset_name)
            self.test_set_list.append(test_set)
        self.print_info()
    def run(self):
        Log.info("Loading from epoch {}.".format(self.current_epoch))
        self.run_heuristic()
        Log.success("Inference finished.")
    def run_heuristic(self):
        test_set: BaseDataset
        for dataset_idx, test_set in enumerate(self.test_set_list):
            status_manager.set_progress("heuristic", "heuristic", f"dataset", dataset_idx, len(self.test_set_list))
            test_set_name = test_set.get_name()
            total=int(len(test_set))
            for i in tqdm(range(total), desc=f"Processing {test_set_name}", ncols=100):
                try:
                    data = test_set.__getitem__(i)
                    scene_name = data["scene_name"]
                    inference_result_path = os.path.join(self.output_dir, test_set_name, f"{scene_name}.pkl")
                    if os.path.exists(inference_result_path):
                        Log.info(f"Inference result already exists for scene: {scene_name}")
                        continue
                    status_manager.set_progress("heuristic", "heuristic", f"Batch[{test_set_name}]", i+1, total)
                    output = self.predict_sequence(data)
                    self.save_inference_result(test_set_name, data["scene_name"], output)
                except Exception as e:
                    print(e)
                    Log.error(f"Error, {e}")
                    continue
        status_manager.set_progress("heuristic", "heuristic", f"dataset", len(self.test_set_list), len(self.test_set_list))
    def predict_sequence(self, data, cr_increase_threshold=0, overlap_area_threshold=25, scan_points_threshold=10, max_iter=5000, max_retry=5000, max_success=5000):
        scene_name = data["scene_name"]
        Log.info(f"Processing scene: {scene_name}")
        status_manager.set_status("heuristic", "heuristic", "scene", scene_name)
        ''' data for rendering '''
        scene_path = data["scene_path"]
        O_to_L_pose = data["O_to_L_pose"]
        voxel_threshold = self.voxel_size
        filter_degree = 75
        down_sampled_model_pts = data["gt_pts"]
        first_frame_to_world_9d = data["first_scanned_n_to_world_pose_9d"][0]
        first_frame_to_world = np.eye(4)
        first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(first_frame_to_world_9d[:6])
        first_frame_to_world[:3,3] = first_frame_to_world_9d[6:]
        # 获取扫描点
        root = os.path.dirname(scene_path)
        display_table_info = DataLoadUtil.get_display_table_info(root, scene_name)
        radius = display_table_info["radius"]
        scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=radius))
        # 生成位姿序列
        if self.heuristic_method == "hemisphere_random":
            pose_sequence = self.generate_hemisphere_random_sequence(
                max_iter,
                self.heuristic_method_config
            )
        elif self.heuristic_method == "hemisphere_circle_trajectory":
            pose_sequence = self.generate_hemisphere_circle_sequence(
                self.heuristic_method_config
            )
        else:
            raise ValueError(f"Unknown heuristic method: {self.heuristic_method}")
        # 执行第一帧
        first_frame_target_pts, _, first_frame_scan_points_indices = RenderUtil.render_pts(
            first_frame_to_world, scene_path, self.script_path, scan_points, 
            voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose
        )
        # 初始化结果存储
        scanned_view_pts = [first_frame_target_pts]
        history_indices = [first_frame_scan_points_indices]
        pred_cr_seq = []
        retry_duplication_pose = []
        retry_no_pts_pose = []
        retry_overlap_pose = []
        pose_9d_seq = [first_frame_to_world_9d]
        last_pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
        pred_cr_seq.append(last_pred_cr)
        last_pts_num = PtsUtil.voxel_downsample_point_cloud(first_frame_target_pts, voxel_threshold).shape[0]
        # 执行序列
        retry = 0
        success = 0
        #import ipdb; ipdb.set_trace()
        combined_scanned_pts_tensor = torch.tensor([0,0,0])
        cnt = 0
        for pred_pose in pose_sequence:
            cnt += 1
            if retry >= max_retry or success >= max_success:
                break
            Log.green(f"迭代: {cnt}/{len(pose_sequence)}, 重试: {retry}/{max_retry}, 成功: {success}/{max_success}")
            try:
                new_target_pts, _, new_scan_points_indices = RenderUtil.render_pts(
                    pred_pose, scene_path, self.script_path, scan_points,
                    voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose
                )
                # 检查扫描点重叠
                if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
                    curr_overlap_area_threshold = overlap_area_threshold
                else:
                    curr_overlap_area_threshold = overlap_area_threshold * 0.5
                # 检查点云重叠
                downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold)
                overlap, _ = ReconstructionUtil.check_overlap(
                    downsampled_new_target_pts, down_sampled_model_pts,
                    overlap_area_threshold=curr_overlap_area_threshold,
                    voxel_size=voxel_threshold,
                    require_new_added_pts_num=True
                )
                if not overlap:
                    Log.yellow("no overlap!")
                    retry += 1
                    retry_overlap_pose.append(pred_pose.tolist())
                    continue
                if new_target_pts.shape[0] == 0:
                    Log.red("新视角无点云")
                    retry_no_pts_pose.append(pred_pose.tolist())
                    retry += 1
                    continue
                history_indices.append(new_scan_points_indices)
                # 计算覆盖率
                pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold)
                Log.yellow(f"覆盖率: {pred_cr}, 上一次: {last_pred_cr}, 最大: {data['seq_max_coverage_rate']}")
                # 更新结果
                pred_cr_seq.append(pred_cr)
                scanned_view_pts.append(new_target_pts)
                pose_6d = PoseUtil.matrix_to_rotation_6d_numpy(pred_pose[:3,:3])
                pose_9d = np.concatenate([
                    pose_6d,
                    pred_pose[:3,3]
                ])
                pose_9d_seq.append(pose_9d)
                # 处理点云数据用于combined_scanned_pts
                combined_scanned_pts = np.vstack(scanned_view_pts)
                voxel_downsampled_pts, _ = self.voxel_downsample_with_mapping(combined_scanned_pts, voxel_threshold)
                random_downsampled_pts, _ = PtsUtil.random_downsample_point_cloud(voxel_downsampled_pts, 8192, require_idx=True)
                combined_scanned_pts_tensor = torch.tensor(random_downsampled_pts, dtype=torch.float32)
                # 检查点数增量
                pts_num = voxel_downsampled_pts.shape[0]
                Log.info(f"点数增量: {pts_num - last_pts_num}, 当前: {pts_num}, 上一次: {last_pts_num}")
                if pts_num - last_pts_num < self.min_new_pts_num:
                    if pred_cr <= data["seq_max_coverage_rate"] - 1e-2:
                        retry += 1
                        retry_duplication_pose.append(pred_pose.tolist())
                        Log.red(f"点数增量过小 < {self.min_new_pts_num}")
                    else:
                        success += 1
                        Log.success(f"达到目标覆盖率")
                last_pts_num = pts_num
                last_pred_cr = pred_cr
                if pred_cr >= data["seq_max_coverage_rate"] - 1e-3:
                    Log.success(f"达到最大覆盖率: {pred_cr}")
            except Exception as e:
                import traceback
                traceback.print_exc()
                Log.error(f"场景 {scene_path} 处理出错: {e}")
                retry_no_pts_pose.append(pred_pose.tolist())
                retry += 1
                continue
        # 返回结果
        result = {
            "pred_pose_9d_seq": pose_9d_seq,
            "combined_scanned_pts_tensor": combined_scanned_pts_tensor,
            "target_pts_seq": scanned_view_pts,
            "coverage_rate_seq": pred_cr_seq,
            "max_coverage_rate": data["seq_max_coverage_rate"],
            "pred_max_coverage_rate": max(pred_cr_seq),
            "scene_name": scene_name,
            "retry_no_pts_pose": retry_no_pts_pose,
            "retry_duplication_pose": retry_duplication_pose,
            "retry_overlap_pose": retry_overlap_pose,
            "best_seq_len": data["best_seq_len"],
        }
        self.stat_result[scene_name] = {
            "coverage_rate_seq": pred_cr_seq,
            "pred_max_coverage_rate": max(pred_cr_seq),
            "pred_seq_len": len(pred_cr_seq),
        }
        print('success rate: ', max(pred_cr_seq))
        return result
    def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003):
        voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
        unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
        idx_sort = np.argsort(inverse)
        idx_unique = idx_sort[np.cumsum(counts)-counts]
        downsampled_points = point_cloud[idx_unique]
        return downsampled_points, inverse
    def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005):
        if new_pts is not None:
            new_scanned_view_pts = scanned_view_pts + [new_pts]
        else:
            new_scanned_view_pts = scanned_view_pts
        combined_point_cloud = np.vstack(new_scanned_view_pts)
        down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold)
        return ReconstructionUtil.compute_coverage_rate(model_pts, down_sampled_combined_point_cloud, threshold)
    def save_inference_result(self, dataset_name, scene_name, output):
        dataset_dir = os.path.join(self.output_dir, dataset_name)
        if not os.path.exists(dataset_dir):
            os.makedirs(dataset_dir)
        output_path = os.path.join(dataset_dir, f"{scene_name}.pkl")
        pickle.dump(output, open(output_path, "wb"))
        with open(self.stat_result_path, "w") as f:
            json.dump(self.stat_result, f)
    def get_checkpoint_path(self, is_last=False):
        return os.path.join(self.experiment_path, namespace.Direcotry.CHECKPOINT_DIR_NAME,
                            "Epoch_{}.pth".format(
                                self.current_epoch if self.current_epoch != -1 and not is_last else "last"))
    def load_checkpoint(self, is_last=False):
        self.load(self.get_checkpoint_path(is_last))
        Log.success(f"Loaded checkpoint from {self.get_checkpoint_path(is_last)}")
        if is_last:
            checkpoint_root = os.path.join(self.experiment_path, namespace.Direcotry.CHECKPOINT_DIR_NAME)
            meta_path = os.path.join(checkpoint_root, "meta.json")
            if not os.path.exists(meta_path):
                raise FileNotFoundError(
                    "No checkpoint meta.json file in the experiment {}".format(self.experiments_config["name"]))
            file_path = os.path.join(checkpoint_root, "meta.json")
            with open(file_path, "r") as f:
                meta = json.load(f)
            self.current_epoch = meta["last_epoch"]
            self.current_iter = meta["last_iter"]
    def load_experiment(self, backup_name=None):
        super().load_experiment(backup_name)
        self.current_epoch = self.experiments_config["epoch"]
    def create_experiment(self, backup_name=None):
        super().create_experiment(backup_name)
    def print_info(self):
        def print_dataset(dataset: BaseDataset):
            config = dataset.get_config()
            name = dataset.get_name()
            Log.blue(f"Dataset: {name}")
            for k,v in config.items():
                Log.blue(f"\t{k}: {v}")
        super().print_info()
        table_size = 70
        Log.blue(f"{'+' + '-' * (table_size // 2)} Datasets {'-' * (table_size // 2)}" + '+')
        for i, test_set in enumerate(self.test_set_list):
            Log.blue(f"test dataset {i}: ")
            print_dataset(test_set)
        Log.blue(f"{'+' + '-' * (table_size // 2)}----------{'-' * (table_size // 2)}" + '+')
    def generate_hemisphere_random_sequence(self, max_iter, config):
        """Generate a random hemisphere sampling sequence"""
        radius_fixed = config["radius_fixed"]
        fixed_radius = config["fixed_radius"]
        min_radius = config["min_radius"]
        max_radius = config["max_radius"]
        poses = []
        center = np.array(config["center"])
        for _ in range(max_iter):
            # 随机采样方向
            direction = np.random.randn(3)
            direction[2] = abs(direction[2])  # 确保在上半球
            direction = direction / np.linalg.norm(direction)
            # 确定半径
            if radius_fixed:
                radius = fixed_radius
            else:
                radius = np.random.uniform(min_radius, max_radius)
            # 计算位置和朝向
            position = center + direction * radius
            z_axis = -direction
            y_axis = np.array([0, 0, 1])
            x_axis = np.cross(y_axis, z_axis)
            x_axis = x_axis / np.linalg.norm(x_axis)
            y_axis = np.cross(z_axis, x_axis)
            pose = np.eye(4)
            pose[:3,:3] = np.stack([x_axis, y_axis, z_axis], axis=1)
            pose[:3,3] = position
            poses.append(pose)
        return poses
    def generate_hemisphere_circle_sequence(self, config):
        """Generate a circular trajectory sampling sequence"""
        radius_fixed = config["radius_fixed"]
        fixed_radius = config["fixed_radius"]
        min_radius = config["min_radius"]
        max_radius = config["max_radius"]
        phi_list = config["phi_list"]
        circle_times = config["circle_times"]
        poses = []
        center = np.array(config["center"])
        for phi in phi_list:  # 仰角
            phi_rad = np.deg2rad(phi)
            for i in range(circle_times):  # 方位角
                theta = i * (2 * np.pi / circle_times)
                # 确定半径
                if radius_fixed:
                    radius = fixed_radius
                else:
                    radius = np.random.uniform(min_radius, max_radius)
                # 球坐标转笛卡尔坐标
                x = radius * np.cos(theta) * np.sin(phi_rad)
                y = radius * np.sin(theta) * np.sin(phi_rad)
                z = radius * np.cos(phi_rad)
                position = center + np.array([x, y, z])
                # 计算朝向
                direction = (center - position) / np.linalg.norm(center - position)
                z_axis = direction
                y_axis = np.array([0, 0, 1])
                x_axis = np.cross(y_axis, z_axis)
                x_axis = x_axis / np.linalg.norm(x_axis)
                y_axis = np.cross(z_axis, x_axis)
                pose = np.eye(4)
                pose[:3,:3] = np.stack([x_axis, y_axis, z_axis], axis=1)
                pose[:3,3] = position
                poses.append(pose)
        return poses
--- a/runners/inference_server.py
+++ b/runners/inference_server.py
@@ -12,7 +12,6 @@ from PytorchBoot.runners.runner import Runner
 from PytorchBoot.utils import Log
 from utils.pts import PtsUtil
 from beans.predict_result import PredictResult
@stereotype.runner("inferencer_server")
 class InferencerServer(Runner):
@@ -51,7 +50,6 @@ class InferencerServer(Runner):
    def get_result(self, output_data):
        pred_pose_9d = output_data["pred_pose_9d"]
        pred_pose_9d = np.asarray(PredictResult(pred_pose_9d.cpu().numpy(), None, cluster_params=dict(eps=0.25, min_samples=3)).candidate_9d_poses, dtype=np.float32)
        result = {
            "pred_pose_9d": pred_pose_9d.tolist()
        }
--- a/runners/inferencer.py
+++ b/runners/inferencer.py
@@ -4,7 +4,6 @@ from utils.render import RenderUtil
 from utils.pose import PoseUtil
 from utils.pts import PtsUtil
 from utils.reconstruction import ReconstructionUtil
 from beans.predict_result import PredictResult
 import torch
 from tqdm import tqdm
@@ -83,7 +82,6 @@ class Inferencer(Runner):
                        data = test_set.__getitem__(i)
                        scene_name = data["scene_name"]
                        inference_result_path = os.path.join(self.output_dir, test_set_name, f"{scene_name}.pkl")
                        if os.path.exists(inference_result_path):
                            Log.info(f"Inference result already exists for scene: {scene_name}")
                            continue
@@ -140,7 +138,7 @@ class Inferencer(Runner):
        pred_cr_seq = [last_pred_cr]
        success = 0
        last_pts_num = PtsUtil.voxel_downsample_point_cloud(data["first_scanned_pts"][0], voxel_threshold).shape[0]
-        #import time
+        import time
        while len(pred_cr_seq) < max_iter and retry < max_retry and success < max_success:
            Log.green(f"iter: {len(pred_cr_seq)}, retry: {retry}/{max_retry}, success: {success}/{max_success}")
            combined_scanned_pts = np.vstack(scanned_view_pts)
@@ -148,90 +146,91 @@ class Inferencer(Runner):
            output = self.pipeline(input_data)
            pred_pose_9d = output["pred_pose_9d"]
            pred_pose = torch.eye(4, device=pred_pose_9d.device)
-            # # save pred_pose_9d ------
+
-            # root = "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/temp_output_result"
+            pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9d[:,:6])[0]
-            # scene_dir = os.path.join(root, scene_name)
+            pred_pose[:3,3] = pred_pose_9d[0,6:]
-            # if not os.path.exists(scene_dir):
+            
-            #     os.makedirs(scene_dir)
+            try:
-            # pred_9d_path = os.path.join(scene_dir,f"pred_pose_9d_{len(pred_cr_seq)}.npy")
+                new_target_pts, new_target_normals, new_scan_points_indices = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
            # pts_path = os.path.join(scene_dir,f"combined_scanned_pts_{len(pred_cr_seq)}.txt")
            # np_combined_scanned_pts = input_data["combined_scanned_pts"][0].cpu().numpy()
            # np.save(pred_9d_path, pred_pose_9d.cpu().numpy())
            # np.savetxt(pts_path, np_combined_scanned_pts)
            # # ----- ----- -----
            predict_result = PredictResult(pred_pose_9d.cpu().numpy(), input_pts=input_data["combined_scanned_pts"][0].cpu().numpy(), cluster_params=dict(eps=0.25, min_samples=3))
            # -----------------------
            # import ipdb; ipdb.set_trace()
            # predict_result.visualize()
            # -----------------------
            pred_pose_9d_candidates = predict_result.candidate_9d_poses
            for pred_pose_9d in pred_pose_9d_candidates:
                #import ipdb; ipdb.set_trace()
-                pred_pose_9d = torch.tensor(pred_pose_9d, dtype=torch.float32).to(self.device).unsqueeze(0)
+                if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
-                pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9d[:,:6])[0]
+                    curr_overlap_area_threshold = overlap_area_threshold
-                pred_pose[:3,3] = pred_pose_9d[0,6:]
+                else:
-                try:
+                    curr_overlap_area_threshold = overlap_area_threshold * 0.5  
                    new_target_pts, new_target_normals, new_scan_points_indices = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
                    #import ipdb; ipdb.set_trace()
                    if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
                        curr_overlap_area_threshold = overlap_area_threshold
                    else:
                        curr_overlap_area_threshold = overlap_area_threshold * 0.5  
-                    downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold)
+                downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold)
-                    overlap, _ = ReconstructionUtil.check_overlap(downsampled_new_target_pts, voxel_downsampled_combined_scanned_pts_np, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=voxel_threshold, require_new_added_pts_num = True)
+                overlap, _ = ReconstructionUtil.check_overlap(downsampled_new_target_pts, voxel_downsampled_combined_scanned_pts_np, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=voxel_threshold, require_new_added_pts_num = True)
-                    if not overlap:
+                if not overlap:
-                        Log.yellow("no overlap!")
+                    Log.yellow("no overlap!")
                        retry += 1
                        retry_overlap_pose.append(pred_pose.cpu().numpy().tolist())
                        continue
                    history_indices.append(new_scan_points_indices)
                except Exception as e:
                    Log.error(f"Error in scene {scene_path}, {e}")
                    print("current pose: ", pred_pose)
                    print("curr_pred_cr: ", last_pred_cr)
                    retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
                    retry += 1
                    retry_overlap_pose.append(pred_pose.cpu().numpy().tolist())
                    continue
-                if new_target_pts.shape[0] == 0:
+                history_indices.append(new_scan_points_indices)
-                    Log.red("no pts in new target")
+            except Exception as e:
-                    retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
+                Log.error(f"Error in scene {scene_path}, {e}")
-                    retry += 1
+                print("current pose: ", pred_pose)
-                    continue
+                print("curr_pred_cr: ", last_pred_cr)
                retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
                retry += 1
                continue
-                pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold)
+            if new_target_pts.shape[0] == 0:
-                Log.yellow(f"{pred_cr}, {last_pred_cr}, max: , {data['seq_max_coverage_rate']}")
+                Log.red("no pts in new target")
-                if pred_cr >= data["seq_max_coverage_rate"] - 1e-3:
+                retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
-                    print("max coverage rate reached!: ", pred_cr)
+                retry += 1
                continue
            pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold)
            Log.yellow(f"{pred_cr}, {last_pred_cr}, max: , {data['seq_max_coverage_rate']}")
            if pred_cr >= data["seq_max_coverage_rate"] - 1e-3:
                print("max coverage rate reached!: ", pred_cr)
-                pred_cr_seq.append(pred_cr)
+            pred_cr_seq.append(pred_cr)
-                scanned_view_pts.append(new_target_pts)
+            scanned_view_pts.append(new_target_pts)
-                input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)]
+            input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)]
            start_indices = [0]
            total_points = 0
            for pts in scanned_view_pts:
                total_points += pts.shape[0]
                start_indices.append(total_points)
            combined_scanned_pts = np.vstack(scanned_view_pts)
            voxel_downsampled_combined_scanned_pts_np, inverse =  self.voxel_downsample_with_mapping(combined_scanned_pts, voxel_threshold)
            random_downsampled_combined_scanned_pts_np, random_downsample_idx = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N, require_idx=True)
            all_idx_unique = np.arange(len(voxel_downsampled_combined_scanned_pts_np))
            all_random_downsample_idx = all_idx_unique[random_downsample_idx]
            scanned_pts_mask = []
            for idx, start_idx in enumerate(start_indices):
                if idx == len(start_indices) - 1:
                    break
                end_idx = start_indices[idx+1]
                view_inverse = inverse[start_idx:end_idx]
                view_unique_downsampled_idx = np.unique(view_inverse)
                view_unique_downsampled_idx_set = set(view_unique_downsampled_idx)
                mask = np.array([idx in view_unique_downsampled_idx_set for idx in all_random_downsample_idx])
                scanned_pts_mask.append(mask)
-                combined_scanned_pts = np.vstack(scanned_view_pts)
+            input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device)
-                voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_pts, voxel_threshold)
+            #import ipdb; ipdb.set_trace()
-                random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N)
+            input_data["scanned_pts_mask"] = [torch.tensor(scanned_pts_mask, dtype=torch.bool)]
                input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device)
-                last_pred_cr = pred_cr
+            last_pred_cr = pred_cr
-                pts_num = voxel_downsampled_combined_scanned_pts_np.shape[0]
+            pts_num = voxel_downsampled_combined_scanned_pts_np.shape[0]
-                Log.info(f"delta pts num:,{pts_num - last_pts_num },{pts_num}, {last_pts_num}")
+            Log.info(f"delta pts num:,{pts_num - last_pts_num },{pts_num}, {last_pts_num}")
-                if pts_num - last_pts_num < self.min_new_pts_num and pred_cr <= data["seq_max_coverage_rate"] - 1e-2:
+            if pts_num - last_pts_num < self.min_new_pts_num and pred_cr <= data["seq_max_coverage_rate"] - 1e-2:
-                    retry += 1
+                retry += 1
-                    retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
+                retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
-                    Log.red(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}")
+                Log.red(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}")
-                elif pts_num - last_pts_num < self.min_new_pts_num and pred_cr > data["seq_max_coverage_rate"] - 1e-2:
+            elif pts_num - last_pts_num < self.min_new_pts_num and pred_cr > data["seq_max_coverage_rate"] - 1e-2:
-                    success += 1
+                success += 1
-                    Log.success(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}")
+                Log.success(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}")
-                last_pts_num = pts_num
+            last_pts_num = pts_num
        input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
@@ -274,13 +273,6 @@ class Inferencer(Runner):
        down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold)
        return ReconstructionUtil.compute_coverage_rate(model_pts, down_sampled_combined_point_cloud, threshold)
    def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003):
        voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
        unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
        idx_sort = np.argsort(inverse)
        idx_unique = idx_sort[np.cumsum(counts)-counts]
        downsampled_points = point_cloud[idx_unique]
        return downsampled_points, inverse
    def save_inference_result(self, dataset_name, scene_name, output):
        dataset_dir = os.path.join(self.output_dir, dataset_name)
--- a/runners/simulator.py
+++ b/runners/simulator.py
@@ -1,456 +0,0 @@
 # import pybullet as p
 # import pybullet_data
 import numpy as np
 import os
 import time
 from PytorchBoot.runners.runner import Runner
 import PytorchBoot.stereotype as stereotype
 from PytorchBoot.config import ConfigManager
 from utils.control import ControlUtil
@stereotype.runner("simulator")
 class Simulator(Runner):
    CREATE: str = "create"
    SIMULATE: str = "simulate"
    INIT_GRIPPER_POSE:np.ndarray = np.asarray(
        [[0.41869126  ,0.87596275 , 0.23951774 , 0.36005292],
        [ 0.70787907 ,-0.4800251  , 0.51813998 ,-0.40499909],
        [ 0.56884584, -0.04739109 ,-0.82107382  ,0.76881103],
        [ 0.         , 0.    ,      0.      ,    1.        ]])
    TURNTABLE_WORLD_TO_PYBULLET_WORLD:np.ndarray = np.asarray(
        [[1, 0, 0, 0.8],
        [0, 1, 0, 0],
        [0, 0, 1, 0.5],
        [0, 0, 0, 1]])
    debug_pose = np.asarray([
        [
            0.992167055606842,
            -0.10552699863910675,
            0.06684812903404236,
            -0.07388903945684433
        ],
        [
            0.10134342312812805,
            0.3670985698699951,
            -0.9246448874473572,
            -0.41582486033439636
        ],
        [
            0.07303514331579208,
            0.9241767525672913,
            0.37491756677627563,
            1.0754833221435547
        ],
        [
            0.0,
            0.0,
            0.0,
            1.0
        ]])
    def __init__(self, config_path):
        super().__init__(config_path)
        self.config_path = config_path
        self.robot_id = None
        self.turntable_id = None
        self.target_id = None
        camera_config = ConfigManager.get("simulation", "camera")
        self.camera_params = {
            'width': camera_config["width"],
            'height': camera_config["height"],
            'fov': camera_config["fov"],
            'near': camera_config["near"],
            'far': camera_config["far"]
        }
        self.sim_config = ConfigManager.get("simulation")
    def run(self, cmd):
        print(f"Simulator run {cmd}")
        if cmd == self.CREATE:
            self.prepare_env()
            self.create_env()
        elif cmd == self.SIMULATE:
            self.simulate()
    def simulate(self):
        self.reset()
        self.init()
        debug_pose = Simulator.debug_pose
        offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
        debug_pose = debug_pose @ offset
        for _ in range(10000):
            debug_pose_2 = np.eye(4)
            debug_pose_2[0,0] = -1
            debug_pose_2[2,3] = 0.5
            self.move_to(debug_pose_2)
            # Wait for the system to stabilize
            for _ in range(20):  # Simulate 20 steps to ensure stability
                p.stepSimulation()
                time.sleep(0.001)  # Add small delay to ensure physics simulation
            depth_img, segm_img = self.take_picture()
            p.stepSimulation()
    def prepare_env(self):
        p.connect(p.GUI)
        p.setAdditionalSearchPath(pybullet_data.getDataPath())
        p.setGravity(0, 0, 0)
        p.loadURDF("plane.urdf")
    def create_env(self):
        print(self.config)
        robot_config = self.sim_config["robot"]
        turntable_config = self.sim_config["turntable"]
        target_config = self.sim_config["target"]
        self.robot_id = p.loadURDF(
            robot_config["urdf_path"],
            robot_config["initial_position"],
            p.getQuaternionFromEuler(robot_config["initial_orientation"]),
            useFixedBase=True
        )
        p.changeDynamics(
            self.robot_id,
            linkIndex=-1,
            mass=0,
            linearDamping=0,
            angularDamping=0,
            lateralFriction=0
        )
        visual_shape_id = p.createVisualShape(
            shapeType=p.GEOM_CYLINDER,
            radius=turntable_config["radius"],
            length=turntable_config["height"],
            rgbaColor=[0.7, 0.7, 0.7, 1]
        )
        collision_shape_id = p.createCollisionShape(
            shapeType=p.GEOM_CYLINDER,
            radius=turntable_config["radius"],
            height=turntable_config["height"]
        )
        self.turntable_id = p.createMultiBody(
            baseMass=0,  # 设置质量为0使其成为静态物体
            baseCollisionShapeIndex=collision_shape_id,
            baseVisualShapeIndex=visual_shape_id,
            basePosition=turntable_config["center_position"]
        )
        # 禁用转盘的动力学
        p.changeDynamics(
            self.turntable_id,
            -1,  # -1 表示基座
            mass=0,
            linearDamping=0,
            angularDamping=0,
            lateralFriction=0
        )
        obj_path = os.path.join(target_config["obj_dir"], target_config["obj_name"], "mesh.obj")
        assert os.path.exists(obj_path), f"Error: File not found at {obj_path}"
        # 加载OBJ文件作为目标物体
        target_visual = p.createVisualShape(
            shapeType=p.GEOM_MESH,
            fileName=obj_path,
            rgbaColor=target_config["rgba_color"],
            specularColor=[0.4, 0.4, 0.4],
            meshScale=[target_config["scale"]] * 3
        )
        # 使用简化的碰撞形状
        target_collision = p.createCollisionShape(
            shapeType=p.GEOM_MESH,
            fileName=obj_path,
            meshScale=[target_config["scale"]] * 3,
            flags=p.GEOM_FORCE_CONCAVE_TRIMESH  # 尝试使用凹面网格
        )
        # 创建目标物体
        self.target_id = p.createMultiBody(
            baseMass=0,  # 设置质量为0使其成为静态物体
            baseCollisionShapeIndex=target_collision,
            baseVisualShapeIndex=target_visual,
            basePosition=[
                turntable_config["center_position"][0],
                turntable_config["center_position"][1],
                turntable_config["height"] + turntable_config["center_position"][2]
            ],
            baseOrientation=p.getQuaternionFromEuler([np.pi/2, 0, 0])
        )
        # 禁用目标物体的动力学
        p.changeDynamics(
            self.target_id,
            -1,  # -1 表示基座
            mass=0,
            linearDamping=0,
            angularDamping=0,
            lateralFriction=0
        )
        # 创建固定约束，将目标物体固定在转盘上
        cid = p.createConstraint(
            parentBodyUniqueId=self.turntable_id,
            parentLinkIndex=-1,  # -1 表示基座
            childBodyUniqueId=self.target_id,
            childLinkIndex=-1,  # -1 表示基座
            jointType=p.JOINT_FIXED,
            jointAxis=[0, 0, 0],
            parentFramePosition=[0, 0, 0],  # 相对于转盘中心的偏移
            childFramePosition=[0, 0, 0]  # 相对于物体中心的偏移
        )
        # 设置约束参数
        p.changeConstraint(cid, maxForce=100)  # 设置最大力，确保约束稳定
    def move_robot_to_pose(self, target_matrix):
        # 从4x4齐次矩阵中提取位置（前3个元素）
        position = target_matrix[:3, 3]
        # 从3x3旋转矩阵中提取方向四元数
        R = target_matrix[:3, :3]
        # 计算四元数的w分量
        w = np.sqrt(max(0, 1 + R[0,0] + R[1,1] + R[2,2])) / 2
        # 避免除零错误，同时处理不同情况
        if abs(w) < 1e-8:
            # 当w接近0时的特殊情况
            x = np.sqrt(max(0, 1 + R[0,0] - R[1,1] - R[2,2])) / 2
            y = np.sqrt(max(0, 1 - R[0,0] + R[1,1] - R[2,2])) / 2
            z = np.sqrt(max(0, 1 - R[0,0] - R[1,1] + R[2,2])) / 2
            # 确定符号
            if R[2,1] - R[1,2] < 0: x = -x
            if R[0,2] - R[2,0] < 0: y = -y
            if R[1,0] - R[0,1] < 0: z = -z
        else:
            # 正常情况
            x = (R[2,1] - R[1,2]) / (4 * w)
            y = (R[0,2] - R[2,0]) / (4 * w)
            z = (R[1,0] - R[0,1]) / (4 * w)
        orientation = (x, y, z, w)
        # 设置IK求解参数
        num_joints = p.getNumJoints(self.robot_id)
        lower_limits = []
        upper_limits = []
        joint_ranges = []
        rest_poses = []
        # 获取关节限制和默认姿态
        for i in range(num_joints):
            joint_info = p.getJointInfo(self.robot_id, i)
            lower_limits.append(joint_info[8])
            upper_limits.append(joint_info[9])
            joint_ranges.append(joint_info[9] - joint_info[8])
            rest_poses.append(0)  # 可以设置一个较好的默认姿态
        # 使用增强版IK求解器，考虑碰撞避障
        joint_poses = p.calculateInverseKinematics(
            self.robot_id,
            7,  # end effector link index
            position,
            orientation,
            lowerLimits=lower_limits,
            upperLimits=upper_limits,
            jointRanges=joint_ranges,
            restPoses=rest_poses,
            maxNumIterations=100,
            residualThreshold=1e-4
        )
        # 分步移动到目标位置，同时检查碰撞
        current_poses = [p.getJointState(self.robot_id, i)[0] for i in range(7)]
        steps = 50  # 分50步移动
        for step in range(steps):
            # 线性插值计算中间位置
            intermediate_poses = []
            for current, target in zip(current_poses, joint_poses):
                t = (step + 1) / steps
                intermediate = current + (target - current) * t
                intermediate_poses.append(intermediate)
            # 设置关节位置
            for i in range(7):
                p.setJointMotorControl2(
                    self.robot_id,
                    i,
                    p.POSITION_CONTROL,
                    intermediate_poses[i]
                )
            # 执行一步模拟
            p.stepSimulation()
            # 检查碰撞
            if p.getContactPoints(self.robot_id, self.turntable_id):
                print("检测到潜在碰撞，停止移动")
                return False
        return True
    def rotate_turntable(self, angle_degrees):
        # 旋转转盘
        current_pos, current_orn = p.getBasePositionAndOrientation(self.turntable_id)
        current_orn = p.getEulerFromQuaternion(current_orn)
        new_orn = list(current_orn)
        new_orn[2] += np.radians(angle_degrees)
        new_orn_quat = p.getQuaternionFromEuler(new_orn)
        p.resetBasePositionAndOrientation(
            self.turntable_id,
            current_pos,
            new_orn_quat
        )
        # 同时旋转目标物体
        target_pos, target_orn = p.getBasePositionAndOrientation(self.target_id)
        target_orn = p.getEulerFromQuaternion(target_orn)
        # 更新目标物体的方向
        target_orn = list(target_orn)
        target_orn[2] += np.radians(angle_degrees)
        target_orn_quat = p.getQuaternionFromEuler(target_orn)
        # 计算物体新的位置（绕转盘中心旋转）
        turntable_center = current_pos
        relative_pos = np.array(target_pos) - np.array(turntable_center)
        # 创建旋转矩阵
        theta = np.radians(angle_degrees)
        rotation_matrix = np.array([
            [np.cos(theta), -np.sin(theta), 0],
            [np.sin(theta), np.cos(theta), 0],
            [0, 0, 1]
        ])
        # 计算新的相对位置
        new_relative_pos = rotation_matrix.dot(relative_pos)
        new_pos = np.array(turntable_center) + new_relative_pos
        # 更新目标物体的位置和方向
        p.resetBasePositionAndOrientation(
            self.target_id,
            new_pos,
            target_orn_quat
        )
    def get_camera_pose(self):
        end_effector_link = 7  # Franka末端执行器的链接索引
        state = p.getLinkState(self.robot_id, end_effector_link)
        ee_pos = state[0]  # 世界坐标系中的位置
        camera_orn = state[1]  # 世界坐标系中的朝向（四元数）
        # 计算相机的视角矩阵
        rot_matrix = p.getMatrixFromQuaternion(camera_orn)
        rot_matrix = np.array(rot_matrix).reshape(3, 3)
        # 相机的前向向量（与末端执行器的x轴对齐）
        camera_forward = rot_matrix.dot(np.array([0, 0, 1]))  # x轴方向
        # 将相机位置向前偏移0.1米
        offset = 0.12
        camera_pos = np.array(ee_pos) + camera_forward * offset
        camera_target = camera_pos + camera_forward
        # 相机的上向量（与末端执行器的z轴对齐）
        camera_up = rot_matrix.dot(np.array([1, 0, 0]))  # z轴方向
        return camera_pos, camera_target, camera_up
    def take_picture(self):
        camera_pos, camera_target, camera_up = self.get_camera_pose()
        view_matrix = p.computeViewMatrix(
            cameraEyePosition=camera_pos,
            cameraTargetPosition=camera_target,
            cameraUpVector=camera_up
        )
        projection_matrix = p.computeProjectionMatrixFOV(
            fov=self.camera_params['fov'],
            aspect=self.camera_params['width'] / self.camera_params['height'],
            nearVal=self.camera_params['near'],
            farVal=self.camera_params['far']
        )
        _,_,rgb_img,depth_img,segm_img = p.getCameraImage(
            width=self.camera_params['width'],
            height=self.camera_params['height'],
            viewMatrix=view_matrix,
            projectionMatrix=projection_matrix,
            renderer=p.ER_BULLET_HARDWARE_OPENGL
        )
        depth_img = self.camera_params['far'] * self.camera_params['near'] / (
            self.camera_params['far'] - (self.camera_params['far'] - self.camera_params['near']) * depth_img)
        depth_img = np.array(depth_img)
        segm_img = np.array(segm_img)
        return depth_img, segm_img
    def reset(self):
        target_pos = [0.5, 0, 1]
        target_orn = p.getQuaternionFromEuler([np.pi, 0, 0])
        target_matrix = np.eye(4)
        target_matrix[:3, 3] = target_pos
        target_matrix[:3, :3] = np.asarray(p.getMatrixFromQuaternion(target_orn)).reshape(3,3)
        self.move_robot_to_pose(target_matrix)
    def init(self):
        self.move_to(Simulator.INIT_GRIPPER_POSE)
    def move_to(self, pose: np.ndarray):
        #delta_degree, min_new_cam_to_world = ControlUtil.solve_display_table_rot_and_cam_to_world(pose)
        #print(delta_degree)
        min_new_cam_to_pybullet_world = Simulator.TURNTABLE_WORLD_TO_PYBULLET_WORLD@pose
        self.move_to_cam_pose(min_new_cam_to_pybullet_world)
        #self.rotate_turntable(delta_degree)
    def __del__(self):
        p.disconnect()
    def create_experiment(self, backup_name=None):
        return super().create_experiment(backup_name)
    def load_experiment(self, backup_name=None):
        super().load_experiment(backup_name)
    def move_to_cam_pose(self, camera_pose: np.ndarray):
        # 从相机位姿矩阵中提取位置和旋转矩阵
        camera_pos = camera_pose[:3, 3]
        R_camera = camera_pose[:3, :3]
        # 相机的朝向向量（z轴）
        forward = R_camera[:, 2]
        # 由于相机与末端执行器之间有固定偏移，需要计算末端执行器位置
        # 相机在末端执行器前方0.12米
        gripper_pos = camera_pos - forward * 0.12
        # 末端执行器的旋转矩阵需要考虑与相机坐标系的固定变换
        # 假设相机的forward对应gripper的z轴，相机的x轴对应gripper的x轴
        R_gripper = R_camera
        # 构建4x4齐次变换矩阵
        gripper_pose = np.eye(4)
        gripper_pose[:3, :3] = R_gripper
        gripper_pose[:3, 3] = gripper_pos
        print(gripper_pose)
        # 移动机器人到计算出的位姿
        return self.move_robot_to_pose(gripper_pose)
--- a/runners/view_generator.py
+++ b/runners/view_generator.py
@@ -9,7 +9,7 @@ class ViewGenerator(Runner):
        self.config_path = config_path
    def run(self):
-        result = subprocess.run(['/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', '../blender/run_blender.py', '--', self.config_path])
+        result = subprocess.run(['blender', '-b', '-P', '../blender/run_blender.py', '--', self.config_path])
        print()
    def create_experiment(self, backup_name=None):
--- a/utils/control.py
+++ b/utils/control.py
@@ -1,59 +0,0 @@
 import numpy as np
 from scipy.spatial.transform import Rotation as R
 import time
 class ControlUtil:
    curr_rotation = 0
    @staticmethod
    def check_limit(new_cam_to_world):
        if new_cam_to_world[0,3] < 0 or new_cam_to_world[1,3] > 0:
        # if new_cam_to_world[0,3] > 0:
            return False
        x = abs(new_cam_to_world[0,3])
        y = abs(new_cam_to_world[1,3])
        tan_y_x = y/x
        min_angle = 0 / 180 * np.pi
        max_angle = 90 / 180 * np.pi
        if tan_y_x < np.tan(min_angle) or tan_y_x > np.tan(max_angle):
            return False
        return True
    @staticmethod
    def solve_display_table_rot_and_cam_to_world(cam_to_world: np.ndarray) -> tuple:   
        if ControlUtil.check_limit(cam_to_world):
            return 0, cam_to_world
        else:
            min_display_table_rot = 180
            min_new_cam_to_world = None
            for display_table_rot in np.linspace(0.1,360, 1800):
                new_world_to_world = ControlUtil.get_z_axis_rot_mat(display_table_rot)
                new_cam_to_new_world = cam_to_world
                new_cam_to_world = new_world_to_world @ new_cam_to_new_world
                if ControlUtil.check_limit(new_cam_to_world):
                    if display_table_rot < min_display_table_rot:
                        min_display_table_rot, min_new_cam_to_world = display_table_rot, new_cam_to_world
                    if abs(display_table_rot - 360) < min_display_table_rot:
                        min_display_table_rot, min_new_cam_to_world = display_table_rot - 360, new_cam_to_world
        if min_new_cam_to_world is None:
            raise ValueError("No valid display table rotation found")
        delta_degree = min_display_table_rot - ControlUtil.curr_rotation
        ControlUtil.curr_rotation = min_display_table_rot
        return delta_degree, min_new_cam_to_world
    @staticmethod
    def get_z_axis_rot_mat(degree):
        radian = np.radians(degree)
        return np.array([
            [np.cos(radian), -np.sin(radian), 0, 0],
            [np.sin(radian), np.cos(radian), 0, 0],
            [0, 0, 1, 0],
            [0, 0, 0, 1]
            ])
--- a/utils/render.py
+++ b/utils/render.py
@@ -70,7 +70,7 @@ class RenderUtil:
    @staticmethod
    def render_pts(cam_pose, scene_path, script_path, scan_points, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
-        import ipdb; ipdb.set_trace()
+        
        nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
@@ -87,8 +87,7 @@ class RenderUtil:
            result = subprocess.run([
                '/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', script_path, '--', temp_dir
            ], capture_output=True, text=True)
-            #print(result)
+            # print(result)
            #import ipdb; ipdb.set_trace()
            path = os.path.join(temp_dir, "tmp")
            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
            depth_L, depth_R = DataLoadUtil.load_depth(
--- a/utils/vis.py
+++ b/utils/vis.py
@@ -7,7 +7,6 @@ import trimesh
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from utils.data_load import DataLoadUtil
 from utils.pts import PtsUtil
 from utils.pose import PoseUtil
 class visualizeUtil:
@@ -35,21 +34,6 @@ class visualizeUtil:
        np.savetxt(os.path.join(output_dir, "all_cam_pos.txt"), all_cam_pos)
        np.savetxt(os.path.join(output_dir, "all_cam_axis.txt"), all_cam_axis)
    @staticmethod
    def get_cam_pose_and_cam_axis(cam_pose, is_6d_pose):
        if is_6d_pose:
            matrix_cam_pose = np.eye(4)
            matrix_cam_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(cam_pose[:6])
            matrix_cam_pose[:3, 3] = cam_pose[6:]
        else:
            matrix_cam_pose = cam_pose
        cam_pos = matrix_cam_pose[:3, 3]
        cam_axis = matrix_cam_pose[:3, 2]
        num_samples = 10
        sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
        sample_points = np.array(sample_points)
        return cam_pos, sample_points
    @staticmethod
    def save_all_combined_pts(root, scene, output_dir):
        length = DataLoadUtil.get_scene_seq_length(root, scene)
@@ -191,6 +175,9 @@ class visualizeUtil:
        np.savetxt(os.path.join(output_dir, "nrm.txt"), visualized_nrm)
        np.savetxt(os.path.join(output_dir, "pts.txt"), pts_world)
    # @staticmethod
    # def save_
 # ------ Debug ------
 if __name__ == "__main__":