Compare commits

..

4 Commits

Author SHA1 Message Date
1123e69bff fix nan 2024-10-31 12:02:48 +00:00
5e8684d149 debug 2024-10-31 11:13:37 +00:00
96fa40cc35 global_and_partial_global: upd 2024-10-30 15:34:15 +00:00
b82b92eebb global_and_partial_global: all 2024-10-30 11:49:45 +00:00
19 changed files with 424 additions and 833 deletions

View File

@ -1,6 +1,5 @@
from PytorchBoot.application import PytorchBootApplication from PytorchBoot.application import PytorchBootApplication
from runners.inferencer import Inferencer from runners.inferencer import Inferencer
from runners.inference_server import InferencerServer
@PytorchBootApplication("inference") @PytorchBootApplication("inference")
class InferenceApp: class InferenceApp:
@ -15,17 +14,3 @@ class InferenceApp:
Evaluator("path_to_your_eval_config").run() Evaluator("path_to_your_eval_config").run()
''' '''
Inferencer("./configs/local/inference_config.yaml").run() Inferencer("./configs/local/inference_config.yaml").run()
@PytorchBootApplication("server")
class InferenceServerApp:
@staticmethod
def start():
'''
call default or your custom runners here, code will be executed
automatically when type "pytorch-boot run" or "ptb run" in terminal
example:
Trainer("path_to_your_train_config").run()
Evaluator("path_to_your_eval_config").run()
'''
InferencerServer("./configs/server/server_inference_server_config.yaml").run()

View File

@ -1,72 +1,76 @@
runner: runner:
general: general:
seed: 0 seed: 1
device: cuda device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7" cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment: experiment:
name: train_ab_global_only name: w_gf_wo_lf_full
root_dir: "experiments" root_dir: "experiments"
epoch: -1 # -1 stands for last epoch epoch: 1 # -1 stands for last epoch
test: test:
dataset_list: dataset_list:
- OmniObject3d_test - OmniObject3d_train
blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py" blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"
output_dir: "/media/hofee/data/data/new_inference_test_output" output_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/inference_global_full_on_testset"
pipeline: nbv_reconstruction_pipeline pipeline: nbv_reconstruction_global_pts_pipeline
voxel_size: 0.003
dataset: dataset:
# OmniObject3d_train: OmniObject3d_train:
# root_dir: "C:\\Document\\Datasets\\inference_test1" root_dir: "/media/hofee/repository/nbv_reconstruction_data_512"
# model_dir: "C:\\Document\\Datasets\\scaled_object_meshes"
# source: seq_reconstruction_dataset_preprocessed
# split_file: "C:\\Document\\Datasets\\data_list\\sample.txt"
# type: test
# filter_degree: 75
# ratio: 1
# batch_size: 1
# num_workers: 12
# pts_num: 8192
# load_from_preprocess: True
OmniObject3d_test:
root_dir: "/media/hofee/data/data/new_testset_output"
model_dir: "/media/hofee/data/data/scaled_object_meshes" model_dir: "/media/hofee/data/data/scaled_object_meshes"
source: seq_reconstruction_dataset_preprocessed source: seq_nbv_reconstruction_dataset
# split_file: "C:\\Document\\Datasets\\data_list\\OmniObject3d_test.txt" split_file: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/test_set_list.txt"
type: test type: test
filter_degree: 75 filter_degree: 75
eval_list: ratio: 1
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1 batch_size: 1
num_workers: 12 num_workers: 12
pts_num: 8192 pts_num: 4096
load_from_preprocess: True load_from_preprocess: False
pipeline: pipeline:
nbv_reconstruction_pipeline: nbv_reconstruction_local_pts_pipeline:
modules: modules:
pts_encoder: pointnet_encoder pts_encoder: pointnet_encoder
seq_encoder: transformer_seq_encoder seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder pose_encoder: pose_encoder
view_finder: gf_view_finder view_finder: gf_view_finder
eps: 1e-5 eps: 1e-5
global_scanned_feat: False
nbv_reconstruction_global_pts_pipeline:
modules:
pts_encoder: pointnet_encoder
pose_seq_encoder: transformer_pose_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True global_scanned_feat: True
module: module:
pointnet_encoder: pointnet_encoder:
in_dim: 3 in_dim: 3
out_dim: 1024 out_dim: 1024
global_feat: True global_feat: True
feature_transform: False feature_transform: False
transformer_seq_encoder: transformer_seq_encoder:
embed_dim: 256 pts_embed_dim: 1024
pose_embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 2048
transformer_pose_seq_encoder:
pose_embed_dim: 256
num_heads: 4 num_heads: 4
ffn_dim: 256 ffn_dim: 256
num_layers: 3 num_layers: 3
@ -82,8 +86,7 @@ module:
sample_mode: ode sample_mode: ode
sampling_steps: 500 sampling_steps: 500
sde_mode: ve sde_mode: ve
pose_encoder: pose_encoder:
pose_dim: 9 pose_dim: 9
out_dim: 256 out_dim: 256
pts_num_encoder:
out_dim: 64

View File

@ -7,19 +7,17 @@ runner:
name: debug name: debug
root_dir: experiments root_dir: experiments
generate: generate:
port: 5000 port: 5002
from: 0 from: 600
to: -1 # -1 means all to: -1 # -1 means all
object_dir: /media/hofee/data/data/scaled_object_meshes object_dir: /media/hofee/data/data/object_meshes_part1
table_model_path: "/media/hofee/data/data/others/table.obj" table_model_path: "/media/hofee/data/data/others/table.obj"
output_dir: /media/hofee/data/data/new_testset output_dir: /media/hofee/repository/data_part_1
object_list_path: /media/hofee/data/data/OmniObject3d_test.txt
use_list: True
binocular_vision: true binocular_vision: true
plane_size: 10 plane_size: 10
max_views: 512 max_views: 512
min_views: 128 min_views: 128
random_view_ratio: 0.01 random_view_ratio: 0.02
min_cam_table_included_degree: 20 min_cam_table_included_degree: 20
max_diag: 0.7 max_diag: 0.7
min_diag: 0.01 min_diag: 0.01

View File

@ -1,53 +0,0 @@
runner:
general:
seed: 0
device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: train_ab_global_only
root_dir: "experiments"
epoch: -1 # -1 stands for last epoch
pipeline: nbv_reconstruction_pipeline
voxel_size: 0.003
pipeline:
nbv_reconstruction_pipeline:
modules:
pts_encoder: pointnet_encoder
seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True
module:
pointnet_encoder:
in_dim: 3
out_dim: 1024
global_feat: True
feature_transform: False
transformer_seq_encoder:
embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 1024
gf_view_finder:
t_feat_dim: 128
pose_feat_dim: 256
main_feat_dim: 2048
regression_head: Rx_Ry_and_T
pose_mode: rot_matrix
per_point_feature: False
sample_mode: ode
sampling_steps: 500
sde_mode: ve
pose_encoder:
pose_dim: 9
out_dim: 256
pts_num_encoder:
out_dim: 64

View File

@ -6,17 +6,17 @@ runner:
cuda_visible_devices: "0,1,2,3,4,5,6,7" cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment: experiment:
name: debug name: server_split_dataset
root_dir: "experiments" root_dir: "experiments"
split: # split: #
root_dir: "/data/hofee/data/packed_preprocessed_data" root_dir: "/data/hofee/data/new_full_data"
type: "unseen_instance" # "unseen_category" type: "unseen_instance" # "unseen_category"
datasets: datasets:
OmniObject3d_train: OmniObject3d_train:
path: "/data/hofee/data/OmniObject3d_train.txt" path: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
ratio: 0.9 ratio: 0.9
OmniObject3d_test: OmniObject3d_test:
path: "/data/hofee/data/OmniObject3d_test.txt" path: "/data/hofee/data/new_full_data_list/OmniObject3d_test.txt"
ratio: 0.1 ratio: 0.1

View File

@ -3,17 +3,17 @@ runner:
general: general:
seed: 0 seed: 0
device: cuda device: cuda
cuda_visible_devices: "0" cuda_visible_devices: "1"
parallel: False parallel: False
experiment: experiment:
name: train_ab_global_only name: train_ab_global_and_partial_global
root_dir: "experiments" root_dir: "experiments"
use_checkpoint: True use_checkpoint: False
epoch: -1 # -1 stands for last epoch epoch: -1 # -1 stands for last epoch
max_epochs: 5000 max_epochs: 5000
save_checkpoint_interval: 1 save_checkpoint_interval: 1
test_first: True test_first: False
train: train:
optimizer: optimizer:
@ -25,7 +25,7 @@ runner:
test: test:
frequency: 3 # test frequency frequency: 3 # test frequency
dataset_list: dataset_list:
- OmniObject3d_test #- OmniObject3d_test
- OmniObject3d_val - OmniObject3d_val
pipeline: nbv_reconstruction_pipeline pipeline: nbv_reconstruction_pipeline
@ -97,7 +97,7 @@ module:
feature_transform: False feature_transform: False
transformer_seq_encoder: transformer_seq_encoder:
embed_dim: 256 embed_dim: 320
num_heads: 4 num_heads: 4
ffn_dim: 256 ffn_dim: 256
num_layers: 3 num_layers: 3

View File

@ -7,6 +7,7 @@ from PytorchBoot.utils.log_util import Log
import torch import torch
import os import os
import sys import sys
import time
sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction") sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction")
@ -114,8 +115,13 @@ class NBVReconstructionDataset(BaseDataset):
except Exception as e: except Exception as e:
Log.error(f"Save cache failed: {e}") Log.error(f"Save cache failed: {e}")
def voxel_downsample_with_mask(self, pts, voxel_size): def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003):
pass voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
return downsampled_points, inverse
def __getitem__(self, index): def __getitem__(self, index):
@ -129,6 +135,9 @@ class NBVReconstructionDataset(BaseDataset):
scanned_coverages_rate, scanned_coverages_rate,
scanned_n_to_world_pose, scanned_n_to_world_pose,
) = ([], [], []) ) = ([], [], [])
start_time = time.time()
start_indices = [0]
total_points = 0
for view in scanned_views: for view in scanned_views:
frame_idx = view[0] frame_idx = view[0]
coverage_rate = view[1] coverage_rate = view[1]
@ -150,8 +159,12 @@ class NBVReconstructionDataset(BaseDataset):
n_to_world_trans = n_to_world_pose[:3, 3] n_to_world_trans = n_to_world_pose[:3, 3]
n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0) n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
scanned_n_to_world_pose.append(n_to_world_9d) scanned_n_to_world_pose.append(n_to_world_9d)
total_points += len(downsampled_target_point_cloud)
start_indices.append(total_points)
end_time = time.time()
#Log.info(f"load data time: {end_time - start_time}")
nbv_idx, nbv_coverage_rate = nbv[0], nbv[1] nbv_idx, nbv_coverage_rate = nbv[0], nbv[1]
nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx) nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx)
cam_info = DataLoadUtil.load_cam_info(nbv_path) cam_info = DataLoadUtil.load_cam_info(nbv_path)
@ -164,14 +177,27 @@ class NBVReconstructionDataset(BaseDataset):
best_to_world_9d = np.concatenate( best_to_world_9d = np.concatenate(
[best_to_world_6d, best_to_world_trans], axis=0 [best_to_world_6d, best_to_world_trans], axis=0
) )
combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0)
voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num)
combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0)
voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_views_pts, 0.003)
random_downsampled_combined_scanned_pts_np, random_downsample_idx = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num, require_idx=True)
all_idx_unique = np.arange(len(voxel_downsampled_combined_scanned_pts_np))
all_random_downsample_idx = all_idx_unique[random_downsample_idx]
scanned_pts_mask = []
for idx, start_idx in enumerate(start_indices):
if idx == len(start_indices) - 1:
break
end_idx = start_indices[idx+1]
view_inverse = inverse[start_idx:end_idx]
view_unique_downsampled_idx = np.unique(view_inverse)
view_unique_downsampled_idx_set = set(view_unique_downsampled_idx)
mask = np.array([idx in view_unique_downsampled_idx_set for idx in all_random_downsample_idx])
scanned_pts_mask.append(mask)
data_item = { data_item = {
"scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3) "scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
"combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3) "combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3)
"scanned_pts_mask": np.asarray(scanned_pts_mask, dtype=np.bool), # Ndarray(N)
"scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1) "scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1)
"scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9) "scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
"best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1) "best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1)
@ -197,7 +223,9 @@ class NBVReconstructionDataset(BaseDataset):
collate_data["scanned_n_to_world_pose_9d"] = [ collate_data["scanned_n_to_world_pose_9d"] = [
torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch
] ]
collate_data["scanned_pts_mask"] = [
torch.tensor(item["scanned_pts_mask"]) for item in batch
]
''' ------ Fixed Length ------ ''' ''' ------ Fixed Length ------ '''
collate_data["best_to_world_pose_9d"] = torch.stack( collate_data["best_to_world_pose_9d"] = torch.stack(
@ -206,12 +234,14 @@ class NBVReconstructionDataset(BaseDataset):
collate_data["combined_scanned_pts"] = torch.stack( collate_data["combined_scanned_pts"] = torch.stack(
[torch.tensor(item["combined_scanned_pts"]) for item in batch] [torch.tensor(item["combined_scanned_pts"]) for item in batch]
) )
for key in batch[0].keys(): for key in batch[0].keys():
if key not in [ if key not in [
"scanned_pts", "scanned_pts",
"scanned_n_to_world_pose_9d", "scanned_n_to_world_pose_9d",
"best_to_world_pose_9d", "best_to_world_pose_9d",
"combined_scanned_pts", "combined_scanned_pts",
"scanned_pts_mask",
]: ]:
collate_data[key] = [item[key] for item in batch] collate_data[key] = [item[key] for item in batch]
return collate_data return collate_data
@ -227,9 +257,9 @@ if __name__ == "__main__":
torch.manual_seed(seed) torch.manual_seed(seed)
np.random.seed(seed) np.random.seed(seed)
config = { config = {
"root_dir": "/data/hofee/data/packed_preprocessed_data", "root_dir": "/data/hofee/nbv_rec_part2_preprocessed",
"source": "nbv_reconstruction_dataset", "source": "nbv_reconstruction_dataset",
"split_file": "/data/hofee/data/OmniObject3d_train.txt", "split_file": "/data/hofee/data/sample.txt",
"load_from_preprocess": True, "load_from_preprocess": True,
"ratio": 0.5, "ratio": 0.5,
"batch_size": 2, "batch_size": 2,

View File

@ -1,154 +0,0 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("old_seq_nbv_reconstruction_dataset")
class SeqNBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqNBVReconstructionDataset, self).__init__(config)
self.type = config["type"]
if self.type != namespace.Mode.TEST:
Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
self.load_from_preprocess = config.get("load_from_preprocess", False)
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
scene_name_list.append(scene_name)
return scene_name_list
def get_datalist(self):
datalist = []
for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0
scene_max_cr_idx = 0
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate
scene_max_cr_idx = seq_idx
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
label_data = DataLoadUtil.load_label(label_path)
first_frame = label_data["best_sequence"][0]
best_seq_len = len(label_data["best_sequence"])
datalist.append({
"scene_name": scene_name,
"first_frame": first_frame,
"max_coverage_rate": scene_max_coverage_rate,
"best_seq_len": best_seq_len,
"label_idx": scene_max_cr_idx,
})
return datalist
def __getitem__(self, index):
data_item_info = self.datalist[index]
first_frame_idx = data_item_info["first_frame"][0]
first_frame_coverage = data_item_info["first_frame"][1]
max_coverage_rate = data_item_info["max_coverage_rate"]
scene_name = data_item_info["scene_name"]
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
first_left_cam_pose = first_cam_info["cam_to_world"]
first_center_cam_pose = first_cam_info["cam_to_world_O"]
first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
first_pts_num = first_target_point_cloud.shape[0]
first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
first_to_world_trans = first_left_cam_pose[:3,3]
first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
voxel_threshold = diag*0.02
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
scene_path = os.path.join(self.root_dir, scene_name)
model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
data_item = {
"first_pts_num": np.asarray(
first_pts_num, dtype=np.int32
),
"first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
"combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
"scene_name": scene_name,
"max_coverage_rate": max_coverage_rate,
"voxel_threshold": voxel_threshold,
"filter_degree": self.filter_degree,
"O_to_L_pose": first_O_to_first_L_pose,
"first_frame_coverage": first_frame_coverage,
"scene_path": scene_path,
"model_points_normals": model_points_normals,
"best_seq_len": data_item_info["best_seq_len"],
"first_frame_id": first_frame_idx,
}
return data_item
def __len__(self):
return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
for key in batch[0].keys():
if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
"split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
"model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
"ratio": 0.005,
"batch_size": 2,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 32684,
"type": namespace.Mode.TEST,
"load_from_preprocess": True
}
ds = SeqNBVReconstructionDataset(config)
print(len(ds))
#ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------+

View File

@ -89,25 +89,49 @@ class NBVReconstructionPipeline(nn.Module):
"scanned_n_to_world_pose_9d" "scanned_n_to_world_pose_9d"
] # List(B): Tensor(S x 9) ] # List(B): Tensor(S x 9)
scanned_pts_mask_batch = data["scanned_pts_mask"] # List(B): Tensor(N)
device = next(self.parameters()).device device = next(self.parameters()).device
embedding_list_batch = [] embedding_list_batch = []
combined_scanned_pts_batch = data["combined_scanned_pts"] # Tensor(B x N x 3) combined_scanned_pts_batch = data["combined_scanned_pts"] # Tensor(B x N x 3)
global_scanned_feat = self.pts_encoder.encode_points( global_scanned_feat, per_point_feat_batch = self.pts_encoder.encode_points(
combined_scanned_pts_batch, require_per_point_feat=False combined_scanned_pts_batch, require_per_point_feat=True
) # global_scanned_feat: Tensor(B x Dg) ) # global_scanned_feat: Tensor(B x Dg)
batch_size = len(scanned_n_to_world_pose_9d_batch)
for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch: for i in range(batch_size):
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device) # Tensor(S x 9) seq_len = len(scanned_n_to_world_pose_9d_batch[i])
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d_batch[i].to(device) # Tensor(S x 9)
scanned_pts_mask = scanned_pts_mask_batch[i] # Tensor(S x N)
per_point_feat = per_point_feat_batch[i] # Tensor(N x Dp)
partial_point_feat_seq = []
for j in range(seq_len):
partial_per_point_feat = per_point_feat[scanned_pts_mask[j]]
if partial_per_point_feat.shape[0] == 0:
partial_point_feat = torch.zeros(per_point_feat.shape[1], device=device)
else:
partial_point_feat = torch.mean(partial_per_point_feat, dim=0) # Tensor(Dp)
partial_point_feat_seq.append(partial_point_feat)
partial_point_feat_seq = torch.stack(partial_point_feat_seq, dim=0) # Tensor(S x Dp)
pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d) # Tensor(S x Dp) pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d) # Tensor(S x Dp)
seq_embedding = pose_feat_seq
seq_embedding = torch.cat([partial_point_feat_seq, pose_feat_seq], dim=-1)
embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp)) embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp))
seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds) seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds)
main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg)) main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg))
if torch.isnan(main_feat).any(): if torch.isnan(main_feat).any():
for i in range(len(main_feat)):
if torch.isnan(main_feat[i]).any():
scanned_pts_mask = scanned_pts_mask_batch[i]
Log.info(f"scanned_pts_mask shape: {scanned_pts_mask.shape}")
Log.info(f"scanned_pts_mask sum: {scanned_pts_mask.sum()}")
import ipdb
ipdb.set_trace()
Log.error("nan in main_feat", True) Log.error("nan in main_feat", True)
return main_feat return main_feat

View File

@ -1,204 +1,154 @@
import numpy as np import numpy as np
from PytorchBoot.dataset import BaseDataset from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager from PytorchBoot.utils.log_util import Log
from PytorchBoot.utils.log_util import Log import torch
import torch import os
import os import sys
import sys sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction") from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.data_load import DataLoadUtil from utils.pts import PtsUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil @stereotype.dataset("seq_nbv_reconstruction_dataset")
class SeqNBVReconstructionDataset(BaseDataset):
def __init__(self, config):
@stereotype.dataset("seq_reconstruction_dataset") super(SeqNBVReconstructionDataset, self).__init__(config)
class SeqReconstructionDataset(BaseDataset): self.type = config["type"]
def __init__(self, config): if self.type != namespace.Mode.TEST:
super(SeqReconstructionDataset, self).__init__(config) Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
self.config = config self.config = config
self.root_dir = config["root_dir"] self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"] self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list() self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist() self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.pts_num = config["pts_num"]
self.type = config["type"] self.model_dir = config["model_dir"]
self.cache = config.get("cache") self.filter_degree = config["filter_degree"]
self.load_from_preprocess = config.get("load_from_preprocess", False) self.load_from_preprocess = config.get("load_from_preprocess", False)
if self.type == namespace.Mode.TEST:
#self.model_dir = config["model_dir"] def load_scene_name_list(self):
self.filter_degree = config["filter_degree"] scene_name_list = []
if self.type == namespace.Mode.TRAIN: with open(self.split_file_path, "r") as f:
scale_ratio = 1 for line in f:
self.datalist = self.datalist*scale_ratio scene_name = line.strip()
if self.cache: scene_name_list.append(scene_name)
expr_root = ConfigManager.get("runner", "experiment", "root_dir") return scene_name_list
expr_name = ConfigManager.get("runner", "experiment", "name")
self.cache_dir = os.path.join(expr_root, expr_name, "cache") def get_datalist(self):
# self.preprocess_cache() datalist = []
for scene_name in self.scene_name_list:
def load_scene_name_list(self): seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_name_list = [] scene_max_coverage_rate = 0
with open(self.split_file_path, "r") as f: scene_max_cr_idx = 0
for line in f:
scene_name = line.strip() for seq_idx in range(seq_num):
if os.path.exists(os.path.join(self.root_dir, scene_name)): label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
scene_name_list.append(scene_name) label_data = DataLoadUtil.load_label(label_path)
return scene_name_list max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
def get_scene_name_list(self): scene_max_coverage_rate = max_coverage_rate
return self.scene_name_list scene_max_cr_idx = seq_idx
def get_datalist(self): label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
datalist = [] label_data = DataLoadUtil.load_label(label_path)
total = len(self.scene_name_list) first_frame = label_data["best_sequence"][0]
for idx, scene_name in enumerate(self.scene_name_list): best_seq_len = len(label_data["best_sequence"])
print(f"processing {scene_name} ({idx}/{total})") datalist.append({
scene_max_cr_idx = 0 "scene_name": scene_name,
frame_len = DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name) "first_frame": first_frame,
"max_coverage_rate": scene_max_coverage_rate,
for i in range(frame_len): "best_seq_len": best_seq_len,
path = DataLoadUtil.get_path(self.root_dir, scene_name, i) "label_idx": scene_max_cr_idx,
pts = DataLoadUtil.load_from_preprocessed_pts(path, "npy") })
if pts.shape[0] == 0: return datalist
continue
datalist.append({ def __getitem__(self, index):
"scene_name": scene_name, data_item_info = self.datalist[index]
"first_frame": i, first_frame_idx = data_item_info["first_frame"][0]
"best_seq_len": -1, first_frame_coverage = data_item_info["first_frame"][1]
"max_coverage_rate": 1.0, max_coverage_rate = data_item_info["max_coverage_rate"]
"label_idx": scene_max_cr_idx, scene_name = data_item_info["scene_name"]
}) first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
return datalist first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
first_left_cam_pose = first_cam_info["cam_to_world"]
def preprocess_cache(self): first_center_cam_pose = first_cam_info["cam_to_world_O"]
Log.info("preprocessing cache...") first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
for item_idx in range(len(self.datalist)): first_pts_num = first_target_point_cloud.shape[0]
self.__getitem__(item_idx) first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
Log.success("finish preprocessing cache.") first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
first_to_world_trans = first_left_cam_pose[:3,3]
def load_from_cache(self, scene_name, curr_frame_idx): first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
cache_name = f"{scene_name}_{curr_frame_idx}.txt" diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
cache_path = os.path.join(self.cache_dir, cache_name) voxel_threshold = diag*0.02
if os.path.exists(cache_path): first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
data = np.loadtxt(cache_path) scene_path = os.path.join(self.root_dir, scene_name)
return data model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
else:
return None data_item = {
"first_pts_num": np.asarray(
def save_to_cache(self, scene_name, curr_frame_idx, data): first_pts_num, dtype=np.int32
cache_name = f"{scene_name}_{curr_frame_idx}.txt" ),
cache_path = os.path.join(self.cache_dir, cache_name) "first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
try: "combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
np.savetxt(cache_path, data) "first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
except Exception as e: "scene_name": scene_name,
Log.error(f"Save cache failed: {e}") "max_coverage_rate": max_coverage_rate,
"voxel_threshold": voxel_threshold,
def seq_combined_pts(self, scene, frame_idx_list): "filter_degree": self.filter_degree,
all_combined_pts = [] "O_to_L_pose": first_O_to_first_L_pose,
for i in frame_idx_list: "first_frame_coverage": first_frame_coverage,
path = DataLoadUtil.get_path(self.root_dir, scene, i) "scene_path": scene_path,
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy") "model_points_normals": model_points_normals,
if pts.shape[0] == 0: "best_seq_len": data_item_info["best_seq_len"],
continue "first_frame_id": first_frame_idx,
all_combined_pts.append(pts) }
all_combined_pts = np.vstack(all_combined_pts) return data_item
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.003)
return downsampled_all_pts def __len__(self):
return len(self.datalist)
def __getitem__(self, index):
data_item_info = self.datalist[index] def get_collate_fn(self):
max_coverage_rate = data_item_info["max_coverage_rate"] def collate_fn(batch):
best_seq_len = data_item_info["best_seq_len"] collate_data = {}
scene_name = data_item_info["scene_name"] collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
( collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
scanned_views_pts, collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
scanned_coverages_rate, for key in batch[0].keys():
scanned_n_to_world_pose, if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
) = ([], [], []) collate_data[key] = [item[key] for item in batch]
view = data_item_info["first_frame"] return collate_data
frame_idx = view return collate_fn
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True) # -------------- Debug ---------------- #
if __name__ == "__main__":
n_to_world_pose = cam_info["cam_to_world"] import torch
target_point_cloud = ( seed = 0
DataLoadUtil.load_from_preprocessed_pts(view_path) torch.manual_seed(seed)
) np.random.seed(seed)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud( config = {
target_point_cloud, self.pts_num "root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
) "split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
scanned_views_pts.append(downsampled_target_point_cloud) "model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
"ratio": 0.005,
n_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy( "batch_size": 2,
np.asarray(n_to_world_pose[:3, :3]) "filter_degree": 75,
) "num_workers": 0,
first_left_cam_pose = cam_info["cam_to_world"] "pts_num": 32684,
first_center_cam_pose = cam_info["cam_to_world_O"] "type": namespace.Mode.TEST,
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose) "load_from_preprocess": True
n_to_world_trans = n_to_world_pose[:3, 3] }
n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0) ds = SeqNBVReconstructionDataset(config)
scanned_n_to_world_pose.append(n_to_world_9d) print(len(ds))
#ds.__getitem__(10)
frame_list = [] dl = ds.get_loader(shuffle=True)
for i in range(DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)): for idx, data in enumerate(dl):
frame_list.append(i) data = ds.process_batch(data, "cuda:0")
gt_pts = self.seq_combined_pts(scene_name, frame_list) print(data)
data_item = { # ------ Debug Start ------
"first_scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3) import ipdb;ipdb.set_trace()
"first_scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9) # ------ Debug End ------+
"seq_max_coverage_rate": max_coverage_rate, # Float, range(0, 1)
"best_seq_len": best_seq_len, # Int
"scene_name": scene_name, # String
"gt_pts": gt_pts, # Ndarray(N x 3)
"scene_path": os.path.join(self.root_dir, scene_name), # String
"O_to_L_pose": first_O_to_first_L_pose,
}
return data_item
def __len__(self):
return len(self.datalist)
# -------------- Debug ---------------- #
if __name__ == "__main__":
#import ipdb; ipdb.set_trace()
import torch
from tqdm import tqdm
import pickle
import os
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/media/hofee/data/data/new_testset",
"source": "seq_reconstruction_dataset",
"split_file": "/media/hofee/data/data/OmniObject3d_test.txt",
"load_from_preprocess": True,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 8192,
"type": namespace.Mode.TEST,
}
output_dir = "/media/hofee/data/data/new_testset_output"
os.makedirs(output_dir, exist_ok=True)
ds = SeqReconstructionDataset(config)
for i in tqdm(range(len(ds)), desc="processing dataset"):
output_path = os.path.join(output_dir, f"item_{i}.pkl")
item = ds.__getitem__(i)
for key, value in item.items():
if isinstance(value, np.ndarray):
item[key] = value.tolist()
import ipdb; ipdb.set_trace()
with open(output_path, "wb") as f:
pickle.dump(item, f)

View File

@ -1,84 +0,0 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from PytorchBoot.utils.log_util import Log
import pickle
import torch
import os
import sys
sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("seq_reconstruction_dataset_preprocessed")
class SeqReconstructionDatasetPreprocessed(BaseDataset):
def __init__(self, config):
super(SeqReconstructionDatasetPreprocessed, self).__init__(config)
self.config = config
self.root_dir = config["root_dir"]
self.real_root_dir = r"/media/hofee/data/data/new_testset"
self.item_list = os.listdir(self.root_dir)
def __getitem__(self, index):
data = pickle.load(open(os.path.join(self.root_dir, self.item_list[index]), "rb"))
data_item = {
"first_scanned_pts": np.asarray(data["first_scanned_pts"], dtype=np.float32), # Ndarray(S x Nv x 3)
"first_scanned_n_to_world_pose_9d": np.asarray(data["first_scanned_n_to_world_pose_9d"], dtype=np.float32), # Ndarray(S x 9)
"seq_max_coverage_rate": data["seq_max_coverage_rate"], # Float, range(0, 1)
"best_seq_len": data["best_seq_len"], # Int
"scene_name": data["scene_name"], # String
"gt_pts": np.asarray(data["gt_pts"], dtype=np.float32), # Ndarray(N x 3)
"scene_path": os.path.join(self.real_root_dir, data["scene_name"]), # String
"O_to_L_pose": np.asarray(data["O_to_L_pose"], dtype=np.float32),
}
return data_item
def __len__(self):
return len(self.item_list)
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
'''
OmniObject3d_test:
root_dir: "H:\\AI\\Datasets\\packed_test_data"
model_dir: "H:\\AI\\Datasets\\scaled_object_meshes"
source: seq_reconstruction_dataset
split_file: "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt"
type: test
filter_degree: 75
eval_list:
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1
num_workers: 12
pts_num: 8192
load_from_preprocess: True
'''
config = {
"root_dir": "H:\\AI\\Datasets\\packed_test_data",
"source": "seq_reconstruction_dataset",
"split_file": "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt",
"load_from_preprocess": True,
"ratio": 1,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 8192,
"type": "test",
}
ds = SeqReconstructionDataset(config)
print(len(ds))
print(ds.__getitem__(10))

View File

@ -29,8 +29,8 @@ def pack_all_scenes(root, scene_list, output_dir):
pack_scene_data(root, scene, output_dir) pack_scene_data(root, scene, output_dir)
if __name__ == "__main__": if __name__ == "__main__":
root = r"/media/hofee/repository/data_part_1" root = r"H:\AI\Datasets\nbv_rec_part2"
output_dir = r"/media/hofee/repository/upload_part1" output_dir = r"H:\AI\Datasets\upload_part2"
scene_list = os.listdir(root) scene_list = os.listdir(root)
from_idx = 0 from_idx = 0
to_idx = len(scene_list) to_idx = len(scene_list)

View File

@ -164,10 +164,10 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
if __name__ == "__main__": if __name__ == "__main__":
#root = "/media/hofee/repository/new_data_with_normal" #root = "/media/hofee/repository/new_data_with_normal"
root = "/media/hofee/data/data/new_testset" root = r"H:\AI\Datasets\nbv_rec_part2"
scene_list = os.listdir(root) scene_list = os.listdir(root)
from_idx = 0 # 1000 from_idx = 0 # 1000
to_idx = len(scene_list) # 1500 to_idx = 600 # 1500
cnt = 0 cnt = 0
@ -179,11 +179,7 @@ if __name__ == "__main__":
print(f"Scene {scene} has been processed") print(f"Scene {scene} has been processed")
cnt+=1 cnt+=1
continue continue
try: save_scene_data(root, scene, cnt, total, file_type="npy")
save_scene_data(root, scene, cnt, total, file_type="npy")
except Exception as e:
print(f"Error occurred when processing scene {scene}")
print(e)
cnt+=1 cnt+=1
end = time.time() end = time.time()
print(f"Time cost: {end-start}") print(f"Time cost: {end-start}")

View File

@ -13,7 +13,7 @@ from PytorchBoot.utils import Log
from utils.pts import PtsUtil from utils.pts import PtsUtil
@stereotype.runner("inferencer_server") @stereotype.runner("inferencer")
class InferencerServer(Runner): class InferencerServer(Runner):
def __init__(self, config_path): def __init__(self, config_path):
super().__init__(config_path) super().__init__(config_path)
@ -24,10 +24,9 @@ class InferencerServer(Runner):
self.pipeline_name = self.config[namespace.Stereotype.PIPELINE] self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name) self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
self.pipeline = self.pipeline.to(self.device) self.pipeline = self.pipeline.to(self.device)
self.pts_num = 8192
''' Experiment ''' ''' Experiment '''
self.load_experiment("inferencer_server") self.load_experiment("nbv_evaluator")
def get_input_data(self, data): def get_input_data(self, data):
input_data = {} input_data = {}
@ -37,36 +36,28 @@ class InferencerServer(Runner):
fps_downsampled_combined_scanned_pts, fps_idx = PtsUtil.fps_downsample_point_cloud( fps_downsampled_combined_scanned_pts, fps_idx = PtsUtil.fps_downsample_point_cloud(
combined_scanned_views_pts, self.pts_num, require_idx=True combined_scanned_views_pts, self.pts_num, require_idx=True
) )
# combined_scanned_views_pts_mask = np.zeros(len(scanned_pts), dtype=np.uint8) combined_scanned_views_pts_mask = np.zeros(len(scanned_pts), dtype=np.uint8)
# start_idx = 0 start_idx = 0
# for i in range(len(scanned_pts)): for i in range(len(scanned_pts)):
# end_idx = start_idx + len(scanned_pts[i]) end_idx = start_idx + len(scanned_pts[i])
# combined_scanned_views_pts_mask[start_idx:end_idx] = i combined_scanned_views_pts_mask[start_idx:end_idx] = i
# start_idx = end_idx start_idx = end_idx
# fps_downsampled_combined_scanned_pts_mask = combined_scanned_views_pts_mask[fps_idx] fps_downsampled_combined_scanned_pts_mask = combined_scanned_views_pts_mask[fps_idx]
input_data["scanned_pts"] = scanned_pts input_data["scanned_pts_mask"] = np.asarray(fps_downsampled_combined_scanned_pts_mask, dtype=np.uint8)
# input_data["scanned_pts_mask"] = np.asarray(fps_downsampled_combined_scanned_pts_mask, dtype=np.uint8)
input_data["scanned_n_to_world_pose_9d"] = np.asarray(scanned_n_to_world_pose_9d, dtype=np.float32) input_data["scanned_n_to_world_pose_9d"] = np.asarray(scanned_n_to_world_pose_9d, dtype=np.float32)
input_data["combined_scanned_pts"] = np.asarray(fps_downsampled_combined_scanned_pts, dtype=np.float32) input_data["combined_scanned_pts"] = np.asarray(fps_downsampled_combined_scanned_pts, dtype=np.float32)
return input_data return input_data
def get_result(self, output_data): def get_result(self, output_data):
pred_pose_9d = output_data["pred_pose_9d"] estimated_delta_rot_9d = output_data["pred_pose_9d"]
result = { result = {
"pred_pose_9d": pred_pose_9d.tolist() "estimated_delta_rot_9d": estimated_delta_rot_9d.tolist()
} }
return result return result
def collate_input(self, input_data):
collated_input_data = {}
collated_input_data["scanned_pts"] = [torch.tensor(input_data["scanned_pts"], dtype=torch.float32, device=self.device)]
collated_input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(input_data["scanned_n_to_world_pose_9d"], dtype=torch.float32, device=self.device)]
collated_input_data["combined_scanned_pts"] = torch.tensor(input_data["combined_scanned_pts"], dtype=torch.float32, device=self.device).unsqueeze(0)
return collated_input_data
def run(self): def run(self):
Log.info("Loading from epoch {}.".format(self.current_epoch)) Log.info("Loading from epoch {}.".format(self.current_epoch))
@ -74,8 +65,7 @@ class InferencerServer(Runner):
def inference(): def inference():
data = request.json data = request.json
input_data = self.get_input_data(data) input_data = self.get_input_data(data)
collated_input_data = self.collate_input(input_data) output_data = self.pipeline.forward_test(input_data)
output_data = self.pipeline.forward_test(collated_input_data)
result = self.get_result(output_data) result = self.get_result(output_data)
return jsonify(result) return jsonify(result)

View File

@ -19,7 +19,7 @@ from PytorchBoot.dataset import BaseDataset
from PytorchBoot.runners.runner import Runner from PytorchBoot.runners.runner import Runner
from PytorchBoot.utils import Log from PytorchBoot.utils import Log
from PytorchBoot.status import status_manager from PytorchBoot.status import status_manager
from utils.data_load import DataLoadUtil
@stereotype.runner("inferencer") @stereotype.runner("inferencer")
class Inferencer(Runner): class Inferencer(Runner):
def __init__(self, config_path): def __init__(self, config_path):
@ -27,7 +27,6 @@ class Inferencer(Runner):
self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path") self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path")
self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir") self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir")
self.voxel_size = ConfigManager.get(namespace.Stereotype.RUNNER, "voxel_size")
''' Pipeline ''' ''' Pipeline '''
self.pipeline_name = self.config[namespace.Stereotype.PIPELINE] self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name) self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
@ -35,12 +34,7 @@ class Inferencer(Runner):
''' Experiment ''' ''' Experiment '''
self.load_experiment("nbv_evaluator") self.load_experiment("nbv_evaluator")
self.stat_result_path = os.path.join(self.output_dir, "stat.json") self.stat_result = {}
if os.path.exists(self.stat_result_path):
with open(self.stat_result_path, "r") as f:
self.stat_result = json.load(f)
else:
self.stat_result = {}
''' Test ''' ''' Test '''
self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST) self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST)
@ -71,71 +65,59 @@ class Inferencer(Runner):
for dataset_idx, test_set in enumerate(self.test_set_list): for dataset_idx, test_set in enumerate(self.test_set_list):
status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list)) status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list))
test_set_name = test_set.get_name() test_set_name = test_set.get_name()
test_loader = test_set.get_loader()
total=int(len(test_set)) if test_loader.batch_size > 1:
for i in tqdm(range(total), desc=f"Processing {test_set_name}", ncols=100): Log.error("Batch size should be 1 for inference, found {} in {}".format(test_loader.batch_size, test_set_name), terminate=True)
data = test_set.__getitem__(i)
scene_name = data["scene_name"] total=int(len(test_loader))
if scene_name != "omniobject3d-book_004": loop = tqdm(enumerate(test_loader), total=total)
continue for i, data in loop:
inference_result_path = os.path.join(self.output_dir, test_set_name, f"{scene_name}.pkl")
if os.path.exists(inference_result_path):
Log.info(f"Inference result already exists for scene: {scene_name}")
continue
status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total) status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total)
test_set.process_batch(data, self.device)
output = self.predict_sequence(data) output = self.predict_sequence(data)
self.save_inference_result(test_set_name, data["scene_name"], output) self.save_inference_result(test_set_name, data["scene_name"][0], output)
status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list)) status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list))
def predict_sequence(self, data, cr_increase_threshold=0, overlap_area_threshold=25, scan_points_threshold=10, max_iter=50, max_retry = 5): def predict_sequence(self, data, cr_increase_threshold=0, max_iter=50, max_retry=5):
scene_name = data["scene_name"] scene_name = data["scene_name"][0]
Log.info(f"Processing scene: {scene_name}") Log.info(f"Processing scene: {scene_name}")
status_manager.set_status("inference", "inferencer", "scene", scene_name) status_manager.set_status("inference", "inferencer", "scene", scene_name)
''' data for rendering ''' ''' data for rendering '''
scene_path = data["scene_path"] scene_path = data["scene_path"][0]
O_to_L_pose = data["O_to_L_pose"] O_to_L_pose = data["O_to_L_pose"][0]
voxel_threshold = self.voxel_size voxel_threshold = data["voxel_threshold"][0]
filter_degree = 75 filter_degree = data["filter_degree"][0]
down_sampled_model_pts = data["gt_pts"] model_points_normals = data["model_points_normals"][0]
model_pts = model_points_normals[:,:3]
first_frame_to_world_9d = data["first_scanned_n_to_world_pose_9d"][0] down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold)
first_frame_to_world = np.eye(4) first_frame_to_world_9d = data["first_to_world_9d"][0]
first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(first_frame_to_world_9d[:6]) first_frame_to_world = torch.eye(4, device=first_frame_to_world_9d.device)
first_frame_to_world[:3,3] = first_frame_to_world_9d[6:] first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(first_frame_to_world_9d[:,:6])[0]
first_frame_to_world[:3,3] = first_frame_to_world_9d[0,6:]
first_frame_to_world = first_frame_to_world.to(self.device)
''' data for inference ''' ''' data for inference '''
input_data = {} input_data = {}
input_data["combined_scanned_pts"] = torch.tensor(data["first_scanned_pts"][0], dtype=torch.float32).to(self.device).unsqueeze(0) input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)]
input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(data["first_scanned_n_to_world_pose_9d"], dtype=torch.float32).to(self.device)] input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)]
input_data["mode"] = namespace.Mode.TEST input_data["mode"] = namespace.Mode.TEST
input_pts_N = input_data["combined_scanned_pts"].shape[1] input_data["combined_scanned_pts"] = data["combined_scanned_pts"]
input_pts_N = input_data["scanned_pts"][0].shape[1]
root = os.path.dirname(scene_path) first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
display_table_info = DataLoadUtil.get_display_table_info(root, scene_name)
radius = display_table_info["radius"]
scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=radius))
first_frame_target_pts, first_frame_target_normals, first_frame_scan_points_indices = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
scanned_view_pts = [first_frame_target_pts] scanned_view_pts = [first_frame_target_pts]
history_indices = [first_frame_scan_points_indices] last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
last_pred_cr, added_pts_num = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
retry_duplication_pose = [] retry_duplication_pose = []
retry_no_pts_pose = [] retry_no_pts_pose = []
retry_overlap_pose = []
retry = 0 retry = 0
pred_cr_seq = [last_pred_cr] pred_cr_seq = [last_pred_cr]
success = 0
last_pts_num = PtsUtil.voxel_downsample_point_cloud(data["first_scanned_pts"][0], 0.002).shape[0]
import time
while len(pred_cr_seq) < max_iter and retry < max_retry: while len(pred_cr_seq) < max_iter and retry < max_retry:
start_time = time.time()
output = self.pipeline(input_data) output = self.pipeline(input_data)
end_time = time.time()
print(f"Time taken for inference: {end_time - start_time} seconds")
pred_pose_9d = output["pred_pose_9d"] pred_pose_9d = output["pred_pose_9d"]
pred_pose = torch.eye(4, device=pred_pose_9d.device) pred_pose = torch.eye(4, device=pred_pose_9d.device)
@ -143,24 +125,7 @@ class Inferencer(Runner):
pred_pose[:3,3] = pred_pose_9d[0,6:] pred_pose[:3,3] = pred_pose_9d[0,6:]
try: try:
start_time = time.time() new_target_pts_world, new_pts_world = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose, require_full_scene=True)
new_target_pts, new_target_normals, new_scan_points_indices = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
#import ipdb; ipdb.set_trace()
if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
curr_overlap_area_threshold = overlap_area_threshold
else:
curr_overlap_area_threshold = overlap_area_threshold * 0.5
downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold)
overlap, _ = ReconstructionUtil.check_overlap(downsampled_new_target_pts, down_sampled_model_pts, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=voxel_threshold, require_new_added_pts_num = True)
if not overlap:
retry += 1
retry_overlap_pose.append(pred_pose.cpu().numpy().tolist())
continue
history_indices.append(new_scan_points_indices)
end_time = time.time()
print(f"Time taken for rendering: {end_time - start_time} seconds")
except Exception as e: except Exception as e:
Log.warning(f"Error in scene {scene_path}, {e}") Log.warning(f"Error in scene {scene_path}, {e}")
print("current pose: ", pred_pose) print("current pose: ", pred_pose)
@ -169,42 +134,61 @@ class Inferencer(Runner):
retry += 1 retry += 1
continue continue
if new_target_pts.shape[0] == 0:
print("no pts in new target")
retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
retry += 1
continue
start_time = time.time() pred_cr = self.compute_coverage_rate(scanned_view_pts, new_target_pts_world, down_sampled_model_pts, threshold=voxel_threshold)
pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold)
end_time = time.time() print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"])
print(f"Time taken for coverage rate computation: {end_time - start_time} seconds") if pred_cr >= data["max_coverage_rate"]:
print(pred_cr, last_pred_cr, " max: ", data["seq_max_coverage_rate"]) print("max coverage rate reached!")
if pred_cr >= data["seq_max_coverage_rate"] - 1e-3: if pred_cr <= last_pred_cr + cr_increase_threshold:
print("max coverage rate reached!: ", pred_cr) retry += 1
success += 1 retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
continue
retry = 0 retry = 0
pred_cr_seq.append(pred_cr) pred_cr_seq.append(pred_cr)
scanned_view_pts.append(new_target_pts) scanned_view_pts.append(new_target_pts_world)
down_sampled_new_pts_world = PtsUtil.random_downsample_point_cloud(new_pts_world, input_pts_N)
new_pts_world_aug = np.hstack([down_sampled_new_pts_world, np.ones((down_sampled_new_pts_world.shape[0], 1))])
new_pts = np.dot(np.linalg.inv(first_frame_to_world.cpu()), new_pts_world_aug.T).T[:,:3]
new_pts_tensor = torch.tensor(new_pts, dtype=torch.float32).unsqueeze(0).to(self.device)
input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)]
input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)] input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)]
combined_scanned_views_pts = np.concatenate(input_data["scanned_pts"][0].tolist(), axis=0)
combined_scanned_pts = np.vstack(scanned_view_pts) voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_pts, 0.002)
random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N) random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N)
input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device) input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device)
if success > 3:
break
last_pred_cr = pred_cr last_pred_cr = pred_cr
pts_num = voxel_downsampled_combined_scanned_pts_np.shape[0]
if pts_num - last_pts_num < 10 and pred_cr < data["seq_max_coverage_rate"] - 1e-3:
retry += 1 input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist()
retry_duplication_pose.append(pred_pose.cpu().numpy().tolist()) input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
print("delta pts num < 10:", pts_num, last_pts_num) result = {
last_pts_num = pts_num "pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"],
"pts_seq": input_data["scanned_pts"],
"target_pts_seq": scanned_view_pts,
"coverage_rate_seq": pred_cr_seq,
"max_coverage_rate": data["max_coverage_rate"][0],
"pred_max_coverage_rate": max(pred_cr_seq),
"scene_name": scene_name,
"retry_no_pts_pose": retry_no_pts_pose,
"retry_duplication_pose": retry_duplication_pose,
"best_seq_len": data["best_seq_len"][0],
}
self.stat_result[scene_name] = {
"max_coverage_rate": data["max_coverage_rate"][0],
"success_rate": max(pred_cr_seq)/ data["max_coverage_rate"][0],
"coverage_rate_seq": pred_cr_seq,
"pred_max_coverage_rate": max(pred_cr_seq),
"pred_seq_len": len(pred_cr_seq),
}
print('success rate: ', max(pred_cr_seq) / data["max_coverage_rate"][0])
return result
def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005): def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005):
if new_pts is not None: if new_pts is not None:
@ -222,7 +206,7 @@ class Inferencer(Runner):
os.makedirs(dataset_dir) os.makedirs(dataset_dir)
output_path = os.path.join(dataset_dir, f"{scene_name}.pkl") output_path = os.path.join(dataset_dir, f"{scene_name}.pkl")
pickle.dump(output, open(output_path, "wb")) pickle.dump(output, open(output_path, "wb"))
with open(self.stat_result_path, "w") as f: with open(os.path.join(dataset_dir, "stat.json"), "w") as f:
json.dump(self.stat_result, f) json.dump(self.stat_result, f)

View File

@ -24,6 +24,8 @@ class DataLoadUtil:
for channel in float_channels: for channel in float_channels:
channel_data = exr_file.channel(channel) channel_data = exr_file.channel(channel)
img_data.append(np.frombuffer(channel_data, dtype=np.float16).reshape((height, width))) img_data.append(np.frombuffer(channel_data, dtype=np.float16).reshape((height, width)))
# 将各通道组合成一个 (height, width, 3) 的 RGB 图像
img = np.stack(img_data, axis=-1) img = np.stack(img_data, axis=-1)
return img return img

View File

@ -14,27 +14,38 @@ class PtsUtil:
downsampled_points = point_cloud[idx_unique] downsampled_points = point_cloud[idx_unique]
return downsampled_points, idx_unique return downsampled_points, idx_unique
else: else:
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True) import ipdb; ipdb.set_trace()
return unique_voxels[0]*voxel_size unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=False)
return unique_voxels*voxel_size
@staticmethod @staticmethod
def voxel_downsample_point_cloud_random(point_cloud, voxel_size=0.005, require_idx=False): def voxel_downsample_point_cloud_o3d(point_cloud, voxel_size=0.005):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32) pcd = o3d.geometry.PointCloud()
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True) pcd.points = o3d.utility.Vector3dVector(point_cloud)
idx_sort = np.argsort(inverse) pcd = pcd.voxel_down_sample(voxel_size)
idx_unique = idx_sort[np.cumsum(counts)-counts] return np.asarray(pcd.points)
downsampled_points = point_cloud[idx_unique]
if require_idx:
return downsampled_points, inverse
return downsampled_points
@staticmethod @staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False): def voxel_downsample_point_cloud_and_trace_o3d(point_cloud, voxel_size=0.005):
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(point_cloud)
max_bound = pcd.get_max_bound()
min_bound = pcd.get_min_bound()
pcd = pcd.voxel_down_sample_and_trace(voxel_size, max_bound, min_bound, True)
return np.asarray(pcd.points)
@staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False, replace=True):
if point_cloud.shape[0] == 0: if point_cloud.shape[0] == 0:
if require_idx: if require_idx:
return point_cloud, np.array([]) return point_cloud, np.array([])
return point_cloud return point_cloud
idx = np.random.choice(len(point_cloud), num_points, replace=True) if not replace and num_points > len(point_cloud):
if require_idx:
return point_cloud, np.arange(len(point_cloud))
return point_cloud
idx = np.random.choice(len(point_cloud), num_points, replace=replace)
if require_idx: if require_idx:
return point_cloud[idx], idx return point_cloud[idx], idx
return point_cloud[idx] return point_cloud[idx]

View File

@ -32,15 +32,13 @@ class ReconstructionUtil:
@staticmethod @staticmethod
def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01, require_new_added_pts_num=False): def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01):
kdtree = cKDTree(combined_point_cloud) kdtree = cKDTree(combined_point_cloud)
distances, _ = kdtree.query(new_point_cloud) distances, _ = kdtree.query(new_point_cloud)
overlapping_points_num = np.sum(distances < voxel_size*2) overlapping_points = np.sum(distances < voxel_size*2)
cm = 0.01 cm = 0.01
voxel_size_cm = voxel_size / cm voxel_size_cm = voxel_size / cm
overlap_area = overlapping_points_num * voxel_size_cm * voxel_size_cm overlap_area = overlapping_points * voxel_size_cm * voxel_size_cm
if require_new_added_pts_num:
return overlap_area > overlap_area_threshold, len(new_point_cloud)-np.sum(distances < voxel_size*1.2)
return overlap_area > overlap_area_threshold return overlap_area > overlap_area_threshold

View File

@ -1,75 +1,16 @@
import os import os
import json import json
import time
import subprocess import subprocess
import tempfile import tempfile
import shutil import shutil
import numpy as np
from utils.data_load import DataLoadUtil from utils.data_load import DataLoadUtil
from utils.reconstruction import ReconstructionUtil from utils.reconstruction import ReconstructionUtil
from utils.pts import PtsUtil from utils.pts import PtsUtil
class RenderUtil: class RenderUtil:
target_mask_label = (0, 255, 0)
display_table_mask_label = (0, 0, 255)
random_downsample_N = 32768
min_z = 0.2
max_z = 0.5
@staticmethod @staticmethod
def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N): def render_pts(cam_pose, scene_path, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[mask].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
return np.zeros((0, 3)), np.zeros((0, 3))
sampled_normal_camera = normal_camera[idx]
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world, sampled_normal_camera
@staticmethod
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
sampled_target_points = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N
)
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world
@staticmethod
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
points_image_homogeneous /= points_image_homogeneous[:, 2:]
pixel_x = points_image_homogeneous[:, 0].astype(int)
pixel_y = points_image_homogeneous[:, 1].astype(int)
h, w = mask.shape[:2]
valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
return selected_points_indices
@staticmethod
def render_pts(cam_pose, scene_path, script_path, scan_points, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path) nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
@ -84,58 +25,28 @@ class RenderUtil:
params_data_path = os.path.join(temp_dir, "params.json") params_data_path = os.path.join(temp_dir, "params.json")
with open(params_data_path, 'w') as f: with open(params_data_path, 'w') as f:
json.dump(params, f) json.dump(params, f)
start_time = time.time()
result = subprocess.run([ result = subprocess.run([
'/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', script_path, '--', temp_dir 'blender', '-b', '-P', script_path, '--', temp_dir
], capture_output=True, text=True) ], capture_output=True, text=True)
end_time = time.time() if result.returncode != 0:
print("Blender script failed:")
print(f"-- Time taken for blender: {end_time - start_time} seconds") print(result.stderr)
return None
path = os.path.join(temp_dir, "tmp") path = os.path.join(temp_dir, "tmp")
cam_info = DataLoadUtil.load_cam_info(path, binocular=True) point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth( cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
path, cam_info["near_plane"],
cam_info["far_plane"], ''' TODO: old code: filter_points api is changed, need to update the code '''
binocular=True filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
) full_scene_point_cloud = None
start_time = time.time() if require_full_scene:
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True) depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True) point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
''' target points ''' point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
mask_img_L = mask_L
mask_img_R = mask_R point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
target_mask_img_L = (mask_L == RenderUtil.target_mask_label).all(axis=-1) full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
target_mask_img_R = (mask_R == RenderUtil.target_mask_label).all(axis=-1)
sampled_target_points_L, sampled_target_normal_L = RenderUtil.get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], RenderUtil.random_downsample_N) return filtered_point_cloud, full_scene_point_cloud
sampled_target_points_R = RenderUtil.get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], RenderUtil.random_downsample_N )
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points, overlap_idx = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_threshold, require_idx=True
)
sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
target_points, target_normals = PtsUtil.filter_points(
target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(RenderUtil.min_z, RenderUtil.max_z)
)
scan_points_indices_L = RenderUtil.get_scan_points_indices(scan_points, mask_img_L, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
scan_points_indices_R = RenderUtil.get_scan_points_indices(scan_points, mask_img_R, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
if not has_points:
target_points = np.zeros((0, 3))
target_normals = np.zeros((0, 3))
end_time = time.time()
print(f"-- Time taken for processing: {end_time - start_time} seconds")
#import ipdb; ipdb.set_trace()
return target_points, target_normals, scan_points_indices