fix bug for training

This commit is contained in:
2024-09-12 15:11:09 +08:00
parent a79ca7749d
commit 4c69ed777b
15 changed files with 201 additions and 120 deletions

View File

@@ -1,10 +1,10 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.stereotype as stereotype
from torch.nn.utils.rnn import pad_sequence
import torch
import sys
sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction")
sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
@@ -56,18 +56,25 @@ class NBVReconstructionDataset(BaseDataset):
scene_name = data_item_info["scene_name"]
scanned_views_pts, scanned_coverages_rate, scanned_n_to_1_pose = [], [], []
first_frame_idx = scanned_views[0][0]
first_frame_to_world = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx))["cam_to_world"]
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
first_frame_to_world = first_cam_info["cam_to_world"]
for view in scanned_views:
frame_idx = view[0]
coverage_rate = view[1]
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
depth = DataLoadUtil.load_depth(view_path)
cam_info = DataLoadUtil.load_cam_info(view_path)
mask = DataLoadUtil.load_seg(view_path)
frame_curr_to_world = cam_info["cam_to_world"]
n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), frame_curr_to_world)
target_point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info["cam_intrinsic"], n_to_1_pose, mask)["points_world"]
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(target_point_cloud, self.pts_num)
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
n_to_world_pose = cam_info["cam_to_world"]
nR_to_world_pose = cam_info["cam_to_world_R"]
n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), n_to_world_pose)
nR_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), nR_to_world_pose)
depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
scanned_views_pts.append(downsampled_target_point_cloud)
scanned_coverages_rate.append(coverage_rate)
n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3]))
@@ -86,10 +93,10 @@ class NBVReconstructionDataset(BaseDataset):
data_item = {
"scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32),
"scanned_coverage_rate": np.asarray(scanned_coverages_rate,dtype=np.float32),
"scanned_coverage_rate": scanned_coverages_rate,
"scanned_n_to_1_pose_9d": np.asarray(scanned_n_to_1_pose,dtype=np.float32),
"best_coverage_rate": nbv_coverage_rate,
"best_to_1_pose_9d": best_to_1_9d,
"best_to_1_pose_9d": np.asarray(best_to_1_9d,dtype=np.float32),
"max_coverage_rate": max_coverage_rate,
"scene_name": scene_name
}
@@ -101,23 +108,14 @@ class NBVReconstructionDataset(BaseDataset):
def get_collate_fn(self):
def collate_fn(batch):
scanned_pts = [item['scanned_pts'] for item in batch]
scanned_n_to_1_pose_9d = [item['scanned_n_to_1_pose_9d'] for item in batch]
rest = {}
collate_data = {}
collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch]
collate_data["scanned_n_to_1_pose_9d"] = [torch.tensor(item['scanned_n_to_1_pose_9d']) for item in batch]
collate_data["best_to_1_pose_9d"] = torch.stack([torch.tensor(item['best_to_1_pose_9d']) for item in batch])
for key in batch[0].keys():
if key in ['scanned_pts', 'scanned_n_to_1_pose_9d']:
continue
if isinstance(batch[0][key], torch.Tensor):
rest[key] = torch.stack([item[key] for item in batch])
elif isinstance(batch[0][key], str):
rest[key] = [item[key] for item in batch]
else:
rest[key] = [item[key] for item in batch]
return {
'scanned_pts': scanned_pts,
'scanned_n_to_1_pose_9d': scanned_n_to_1_pose_9d,
**rest
}
if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
if __name__ == "__main__":
@@ -126,36 +124,48 @@ if __name__ == "__main__":
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "C:\\Document\\Local Project\\nbv_rec\\data\\sample",
"split_file": "C:\\Document\\Local Project\\nbv_rec\\data\\OmniObject3d_train.txt",
"root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes",
"split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt",
"ratio": 0.5,
"batch_size": 2,
"num_workers": 0,
"pts_num": 2048
"pts_num": 32684
}
ds = NBVReconstructionDataset(config)
print(len(ds))
#ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
cnt=0
print(data["scene_name"])
print(data["scanned_coverage_rate"])
print(data["best_coverage_rate"])
for pts in data["scanned_pts"][0]:
#np.savetxt(f"pts_{cnt}.txt", pts)
cnt+=1
#np.savetxt("best_pts.txt", best_pts)
for key, value in data.items():
if isinstance(value, torch.Tensor):
print(key, ":" ,value.shape)
else:
print(key, ":" ,len(value))
if key == "scanned_n_to_1_pose_9d":
for val in value:
print(val.shape)
if key == "scanned_pts":
for val in value:
print(val.shape)
data = ds.process_batch(data, "cuda:0")
print(data)
break
#
# for idx, data in enumerate(dl):
# cnt=0
# print(data["scene_name"])
# print(data["scanned_coverage_rate"])
# print(data["best_coverage_rate"])
# for pts in data["scanned_pts"][0]:
# #np.savetxt(f"pts_{cnt}.txt", pts)
# cnt+=1
# #np.savetxt("best_pts.txt", best_pts)
# for key, value in data.items():
# if isinstance(value, torch.Tensor):
# print(key, ":" ,value.shape)
# else:
# print(key, ":" ,len(value))
# if key == "scanned_n_to_1_pose_9d":
# for val in value:
# print(val.shape)
# if key == "scanned_pts":
# print("scanned_pts")
# for val in value:
# print(val.shape)
# cnt = 0
# for v in val:
# import ipdb;ipdb.set_trace()
# np.savetxt(f"pts_{cnt}.txt", v)
# cnt+=1
print()
# print()

View File

@@ -14,12 +14,11 @@ class NBVReconstructionPipeline(nn.Module):
self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pose_encoder"])
self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["seq_encoder"])
self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, config["view_finder"])
self.eps = 1e-5
def forward(self, data):
mode = data["mode"]
# ----- Debug Trace ----- #
import ipdb; ipdb.set_trace()
# ------------------------ #
if mode == namespace.Mode.TRAIN:
return self.forward_train(data)
elif mode == namespace.Mode.TEST:
@@ -27,29 +26,22 @@ class NBVReconstructionPipeline(nn.Module):
else:
Log.error("Unknown mode: {}".format(mode), True)
def pertube_data(self, gt_delta_rot_6d):
bs = gt_delta_rot_6d.shape[0]
random_t = torch.rand(bs, device=self.device) * (1. - self.eps) + self.eps
def pertube_data(self, gt_delta_9d):
bs = gt_delta_9d.shape[0]
random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps
random_t = random_t.unsqueeze(-1)
mu, std = self.view_finder.marginal_prob(gt_delta_rot_6d, random_t)
mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t)
std = std.view(-1, 1)
z = torch.randn_like(gt_delta_rot_6d)
z = torch.randn_like(gt_delta_9d)
perturbed_x = mu + z * std
target_score = - z * std / (std ** 2)
return perturbed_x, random_t, target_score, std
def forward_train(self, data):
pts_list = data['pts_list']
pose_list = data['pose_list']
gt_rot_6d = data["nbv_cam_pose"]
pts_feat_list = []
pose_feat_list = []
for pts,pose in zip(pts_list,pose_list):
pts_feat_list.append(self.pts_encoder.encode_points(pts))
pose_feat_list.append(self.pose_encoder.encode_pose(pose))
seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list)
seq_feat = self.get_seq_feat(data)
''' get std '''
perturbed_x, random_t, target_score, std = self.pertube_data(gt_rot_6d)
best_to_1_pose_9d_batch = data["best_to_1_pose_9d"]
perturbed_x, random_t, target_score, std = self.pertube_data(best_to_1_pose_9d_batch)
input_data = {
"sampled_pose": perturbed_x,
"t": random_t,
@@ -64,14 +56,7 @@ class NBVReconstructionPipeline(nn.Module):
return output
def forward_test(self,data):
pts_list = data['pts_list']
pose_list = data['pose_list']
pts_feat_list = []
pose_feat_list = []
for pts,pose in zip(pts_list,pose_list):
pts_feat_list.append(self.pts_encoder.encode_points(pts))
pose_feat_list.append(self.pose_encoder.encode_pose(pose))
seq_feat = self.seq_encoder.encode_sequence(pts_feat_list, pose_feat_list)
seq_feat = self.get_seq_feat(data)
estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(seq_feat)
result = {
"pred_pose_9d": estimated_delta_rot_9d,
@@ -79,4 +64,19 @@ class NBVReconstructionPipeline(nn.Module):
}
return result
def get_seq_feat(self, data):
scanned_pts_batch = data['scanned_pts']
scanned_n_to_1_pose_9d_batch = data['scanned_n_to_1_pose_9d']
best_to_1_pose_9d_batch = data["best_to_1_pose_9d"]
pts_feat_seq_list = []
pose_feat_seq_list = []
for scanned_pts,scanned_n_to_1_pose_9d in zip(scanned_pts_batch,scanned_n_to_1_pose_9d_batch):
print(scanned_n_to_1_pose_9d.shape)
scanned_pts = scanned_pts.to(best_to_1_pose_9d_batch.device)
scanned_n_to_1_pose_9d = scanned_n_to_1_pose_9d.to(best_to_1_pose_9d_batch.device)
pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts))
pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_1_pose_9d))
seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list)
return seq_feat