From 43f22ad91ba30a24b3192bff6c2679d02ae9da6a Mon Sep 17 00:00:00 2001 From: hofee Date: Tue, 24 Sep 2024 09:10:25 +0000 Subject: [PATCH] add global_feat --- configs/local/inference_config.yaml | 4 +- configs/server/split_dataset_config.yaml | 4 +- configs/server/train_config.yaml | 51 ++++++++++++++----- core/nbv_dataset.py | 65 +++++++++++++++--------- core/pipeline.py | 49 ++++++++++++------ modules/func_lib/samplers.py | 2 +- modules/gf_view_finder.py | 10 ++-- 7 files changed, 123 insertions(+), 62 deletions(-) diff --git a/configs/local/inference_config.yaml b/configs/local/inference_config.yaml index 4cc4e01..60e979b 100644 --- a/configs/local/inference_config.yaml +++ b/configs/local/inference_config.yaml @@ -20,7 +20,7 @@ runner: dataset: OmniObject3d_train: - root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/sample_preprocessed_scenes" + root_dir: "/media/hofee/repository/nbv_reconstruction_data_512" model_dir: "/media/hofee/data/data/scaled_object_meshes" source: seq_nbv_reconstruction_dataset split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt" @@ -30,7 +30,7 @@ dataset: batch_size: 1 num_workers: 12 pts_num: 4096 - load_from_preprocess: True + load_from_preprocess: False pipeline: nbv_reconstruction_pipeline: diff --git a/configs/server/split_dataset_config.yaml b/configs/server/split_dataset_config.yaml index 0812e45..d3ddb5d 100644 --- a/configs/server/split_dataset_config.yaml +++ b/configs/server/split_dataset_config.yaml @@ -9,8 +9,8 @@ runner: name: debug root_dir: "experiments" - split: - root_dir: "../data/sample_for_training_preprocessed/sample_preprocessed_scenes" + split: # + root_dir: "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy" type: "unseen_instance" # "unseen_category" datasets: OmniObject3d_train: diff --git a/configs/server/train_config.yaml b/configs/server/train_config.yaml index 45d84cf..d3d70bd 100644 --- a/configs/server/train_config.yaml +++ b/configs/server/train_config.yaml @@ -3,16 +3,16 @@ runner: general: seed: 0 device: cuda - cuda_visible_devices: "0,1,2,3,4,5,6,7" + cuda_visible_devices: "1" parallel: False experiment: - name: new_test_overfit_to_world_preprocessed + name: overfit_w_global_feat root_dir: "experiments" use_checkpoint: False epoch: -1 # -1 stands for last epoch max_epochs: 5000 - save_checkpoint_interval: 3 + save_checkpoint_interval: 1 test_first: True train: @@ -25,16 +25,17 @@ runner: test: frequency: 3 # test frequency dataset_list: - - OmniObject3d_test + #- OmniObject3d_test + - OmniObject3d_val pipeline: nbv_reconstruction_pipeline dataset: OmniObject3d_train: - root_dir: "../data/sample_for_training_preprocessed/sample_preprocessed_scenes" + root_dir: "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy" model_dir: "../data/scaled_object_meshes" source: nbv_reconstruction_dataset - split_file: "../data/sample_for_training_preprocessed/OmniObject3d_train.txt" + split_file: "/home/data/hofee/project/nbv_rec/data/OmniObject3d_sample.txt" type: train cache: True ratio: 1 @@ -44,27 +45,49 @@ dataset: load_from_preprocess: True OmniObject3d_test: - root_dir: "../data/sample_for_training_preprocessed/sample_preprocessed_scenes" + root_dir: "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy" model_dir: "../data/scaled_object_meshes" source: nbv_reconstruction_dataset - split_file: "../data/sample_for_training_preprocessed/OmniObject3d_train.txt" + split_file: "/home/data/hofee/project/nbv_rec/data/OmniObject3d_test.txt" type: test cache: True filter_degree: 75 eval_list: - pose_diff - ratio: 0.1 + ratio: 0.05 batch_size: 1 num_workers: 12 pts_num: 4096 load_from_preprocess: True + OmniObject3d_val: + root_dir: "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy" + model_dir: "../data/scaled_object_meshes" + source: nbv_reconstruction_dataset + split_file: "/home/data/hofee/project/nbv_rec/data/OmniObject3d_sample.txt" + type: test + cache: True + filter_degree: 75 + eval_list: + - pose_diff + ratio: 0.005 + batch_size: 1 + num_workers: 12 + pts_num: 4096 + load_from_preprocess: True + + pipeline: nbv_reconstruction_pipeline: - pts_encoder: pointnet_encoder - seq_encoder: transformer_seq_encoder - pose_encoder: pose_encoder - view_finder: gf_view_finder + modules: + pts_encoder: pointnet_encoder + seq_encoder: transformer_seq_encoder + pose_encoder: pose_encoder + view_finder: gf_view_finder + eps: 1e-5 + global_scanned_feat: True + + module: @@ -85,7 +108,7 @@ module: gf_view_finder: t_feat_dim: 128 pose_feat_dim: 256 - main_feat_dim: 2048 + main_feat_dim: 3072 regression_head: Rx_Ry_and_T pose_mode: rot_matrix per_point_feature: False diff --git a/core/nbv_dataset.py b/core/nbv_dataset.py index c3bc9b8..ebe45da 100644 --- a/core/nbv_dataset.py +++ b/core/nbv_dataset.py @@ -7,12 +7,11 @@ from PytorchBoot.utils.log_util import Log import torch import os import sys -sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction") +sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction") from utils.data_load import DataLoadUtil from utils.pose import PoseUtil from utils.pts import PtsUtil -from utils.reconstruction import ReconstructionUtil @stereotype.dataset("nbv_reconstruction_dataset") @@ -35,7 +34,7 @@ class NBVReconstructionDataset(BaseDataset): self.model_dir = config["model_dir"] self.filter_degree = config["filter_degree"] if self.type == namespace.Mode.TRAIN: - scale_ratio = 1 + scale_ratio = 10 self.datalist = self.datalist*scale_ratio if self.cache: expr_root = ConfigManager.get("runner", "experiment", "root_dir") @@ -56,20 +55,34 @@ class NBVReconstructionDataset(BaseDataset): def get_datalist(self): datalist = [] for scene_name in self.scene_name_list: - label_path = DataLoadUtil.get_label_path_old(self.root_dir, scene_name) - label_data = DataLoadUtil.load_label(label_path) - for data_pair in label_data["data_pairs"]: - scanned_views = data_pair[0] - next_best_view = data_pair[1] + seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name) + scene_max_coverage_rate = 0 + max_coverage_rate_list = [] + + for seq_idx in range(seq_num): + label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx) + label_data = DataLoadUtil.load_label(label_path) max_coverage_rate = label_data["max_coverage_rate"] - datalist.append( - { - "scanned_views": scanned_views, - "next_best_view": next_best_view, - "max_coverage_rate": max_coverage_rate, - "scene_name": scene_name, - } - ) + if max_coverage_rate > scene_max_coverage_rate: + scene_max_coverage_rate = max_coverage_rate + max_coverage_rate_list.append(max_coverage_rate) + mean_coverage_rate = np.mean(max_coverage_rate_list) + + for seq_idx in range(seq_num): + label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx) + label_data = DataLoadUtil.load_label(label_path) + if max_coverage_rate_list[seq_idx] > mean_coverage_rate - 0.1: + for data_pair in label_data["data_pairs"]: + scanned_views = data_pair[0] + next_best_view = data_pair[1] + datalist.append({ + "scanned_views": scanned_views, + "next_best_view": next_best_view, + "seq_max_coverage_rate": max_coverage_rate, + "scene_name": scene_name, + "label_idx": seq_idx, + "scene_max_coverage_rate": scene_max_coverage_rate + }) return datalist def preprocess_cache(self): @@ -102,7 +115,7 @@ class NBVReconstructionDataset(BaseDataset): data_item_info = self.datalist[index] scanned_views = data_item_info["scanned_views"] nbv = data_item_info["next_best_view"] - max_coverage_rate = data_item_info["max_coverage_rate"] + max_coverage_rate = data_item_info["seq_max_coverage_rate"] scene_name = data_item_info["scene_name"] scanned_views_pts, scanned_coverages_rate, scanned_n_to_world_pose = [], [], [] @@ -151,13 +164,18 @@ class NBVReconstructionDataset(BaseDataset): best_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(best_frame_to_world[:3,:3])) best_to_world_trans = best_frame_to_world[:3,3] best_to_world_9d = np.concatenate([best_to_world_6d, best_to_world_trans], axis=0) + + combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0) + voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002) + random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num) data_item = { "scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32), + "combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np,dtype=np.float32), "scanned_coverage_rate": scanned_coverages_rate, "scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose,dtype=np.float32), "best_coverage_rate": nbv_coverage_rate, "best_to_world_pose_9d": np.asarray(best_to_world_9d,dtype=np.float32), - "max_coverage_rate": max_coverage_rate, + "seq_max_coverage_rate": max_coverage_rate, "scene_name": scene_name } @@ -195,10 +213,11 @@ class NBVReconstructionDataset(BaseDataset): collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch] collate_data["scanned_n_to_world_pose_9d"] = [torch.tensor(item['scanned_n_to_world_pose_9d']) for item in batch] collate_data["best_to_world_pose_9d"] = torch.stack([torch.tensor(item['best_to_world_pose_9d']) for item in batch]) + collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch]) if "first_frame_to_world" in batch[0]: collate_data["first_frame_to_world"] = torch.stack([torch.tensor(item["first_frame_to_world"]) for item in batch]) for key in batch[0].keys(): - if key not in ["scanned_pts", "scanned_n_to_world_pose_9d", "best_to_world_pose_9d", "first_frame_to_world"]: + if key not in ["scanned_pts", "scanned_n_to_world_pose_9d", "best_to_world_pose_9d", "first_frame_to_world", "combined_scanned_pts"]: collate_data[key] = [item[key] for item in batch] return collate_data return collate_fn @@ -211,11 +230,11 @@ if __name__ == "__main__": torch.manual_seed(seed) np.random.seed(seed) config = { - "root_dir": "/media/hofee/repository/nbv_reconstruction_data_512", - "model_dir": "/media/hofee/data/data/scaled_object_meshes", + "root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy", + "model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes", "source": "nbv_reconstruction_dataset", - "split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt", - "load_from_preprocess": False, + "split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_test.txt", + "load_from_preprocess": True, "ratio": 0.5, "batch_size": 2, "filter_degree": 75, diff --git a/core/pipeline.py b/core/pipeline.py index 294ed50..8079706 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -5,16 +5,20 @@ import PytorchBoot.stereotype as stereotype from PytorchBoot.factory.component_factory import ComponentFactory from PytorchBoot.utils import Log +from utils.pts import PtsUtil + @stereotype.pipeline("nbv_reconstruction_pipeline") class NBVReconstructionPipeline(nn.Module): def __init__(self, config): super(NBVReconstructionPipeline, self).__init__() self.config = config - self.pts_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pts_encoder"]) - self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pose_encoder"]) - self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["seq_encoder"]) - self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, config["view_finder"]) - self.eps = 1e-5 + self.module_config = config["modules"] + self.pts_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pts_encoder"]) + self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pose_encoder"]) + self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["seq_encoder"]) + self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["view_finder"]) + self.eps = float(self.config["eps"]) + self.enable_global_scanned_feat = self.config["global_scanned_feat"] def forward(self, data): mode = data["mode"] @@ -38,14 +42,14 @@ class NBVReconstructionPipeline(nn.Module): return perturbed_x, random_t, target_score, std def forward_train(self, data): - seq_feat = self.get_seq_feat(data) + main_feat = self.get_main_feat(data) ''' get std ''' best_to_world_pose_9d_batch = data["best_to_world_pose_9d"] perturbed_x, random_t, target_score, std = self.pertube_data(best_to_world_pose_9d_batch) input_data = { "sampled_pose": perturbed_x, "t": random_t, - "seq_feat": seq_feat, + "main_feat": main_feat, } estimated_score = self.view_finder(input_data) output = { @@ -56,29 +60,44 @@ class NBVReconstructionPipeline(nn.Module): return output def forward_test(self,data): - seq_feat = self.get_seq_feat(data) - estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(seq_feat) + main_feat = self.get_main_feat(data) + estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(main_feat) result = { "pred_pose_9d": estimated_delta_rot_9d, "in_process_sample": in_process_sample } return result - def get_seq_feat(self, data): + + def get_main_feat(self, data): scanned_pts_batch = data['scanned_pts'] scanned_n_to_world_pose_9d_batch = data['scanned_n_to_world_pose_9d'] + + + device = next(self.parameters()).device + + + pts_feat_seq_list = [] pose_feat_seq_list = [] - device = next(self.parameters()).device + for scanned_pts,scanned_n_to_world_pose_9d in zip(scanned_pts_batch,scanned_n_to_world_pose_9d_batch): scanned_pts = scanned_pts.to(device) scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device) pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts)) pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d)) + + main_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list) - seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list) - if torch.isnan(seq_feat).any(): - Log.error("nan in seq_feat", True) - return seq_feat + if self.enable_global_scanned_feat: + combined_scanned_pts_batch = data['combined_scanned_pts'] + global_scanned_feat = self.pts_encoder.encode_points(combined_scanned_pts_batch) + main_feat = torch.cat([main_feat, global_scanned_feat], dim=-1) + + + if torch.isnan(main_feat).any(): + Log.error("nan in main_feat", True) + + return main_feat diff --git a/modules/func_lib/samplers.py b/modules/func_lib/samplers.py index ae5c33c..42307c5 100644 --- a/modules/func_lib/samplers.py +++ b/modules/func_lib/samplers.py @@ -32,7 +32,7 @@ def cond_ode_sampler( init_x=None, ): pose_dim = PoseUtil.get_pose_dim(pose_mode) - batch_size = data["seq_feat"].shape[0] + batch_size = data["main_feat"].shape[0] init_x = ( prior((batch_size, pose_dim), T=T).to(device) if init_x is None diff --git a/modules/gf_view_finder.py b/modules/gf_view_finder.py index fd05843..dfccb06 100644 --- a/modules/gf_view_finder.py +++ b/modules/gf_view_finder.py @@ -80,13 +80,13 @@ class GradientFieldViewFinder(nn.Module): """ Args: data, dict { - 'seq_feat': [bs, c] + 'main_feat': [bs, c] 'pose_sample': [bs, pose_dim] 't': [bs, 1] } """ - seq_feat = data['seq_feat'] + main_feat = data['main_feat'] sampled_pose = data['sampled_pose'] t = data['t'] t_feat = self.t_encoder(t.squeeze(1)) @@ -95,7 +95,7 @@ class GradientFieldViewFinder(nn.Module): if self.per_point_feature: raise NotImplementedError else: - total_feat = torch.cat([seq_feat, t_feat, pose_feat], dim=-1) + total_feat = torch.cat([main_feat, t_feat, pose_feat], dim=-1) _, std = self.marginal_prob_fn(total_feat, t) if self.regression_head == 'Rx_Ry_and_T': @@ -134,9 +134,9 @@ class GradientFieldViewFinder(nn.Module): return in_process_sample, res - def next_best_view(self, seq_feat): + def next_best_view(self, main_feat): data = { - 'seq_feat': seq_feat, + 'main_feat': main_feat, } in_process_sample, res = self.sample(data) return res.to(dtype=torch.float32), in_process_sample