success
This commit is contained in:
224
baselines/grasping/GSNet/models/backbone_resunet14.py
Executable file
224
baselines/grasping/GSNet/models/backbone_resunet14.py
Executable file
@@ -0,0 +1,224 @@
|
||||
import MinkowskiEngine as ME
|
||||
from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
|
||||
from models.resnet import ResNetBase
|
||||
|
||||
|
||||
class MinkUNetBase(ResNetBase):
|
||||
BLOCK = None
|
||||
PLANES = None
|
||||
DILATIONS = (1, 1, 1, 1, 1, 1, 1, 1)
|
||||
LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)
|
||||
PLANES = (32, 64, 128, 256, 256, 128, 96, 96)
|
||||
INIT_DIM = 32
|
||||
OUT_TENSOR_STRIDE = 1
|
||||
|
||||
# To use the model, must call initialize_coords before forward pass.
|
||||
# Once data is processed, call clear to reset the model before calling
|
||||
# initialize_coords
|
||||
def __init__(self, in_channels, out_channels, D=3):
|
||||
ResNetBase.__init__(self, in_channels, out_channels, D)
|
||||
|
||||
def network_initialization(self, in_channels, out_channels, D):
|
||||
# Output of the first conv concated to conv6
|
||||
self.inplanes = self.INIT_DIM
|
||||
self.conv0p1s1 = ME.MinkowskiConvolution(
|
||||
in_channels, self.inplanes, kernel_size=5, dimension=D)
|
||||
|
||||
self.bn0 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
|
||||
self.conv1p1s2 = ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
|
||||
self.bn1 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
|
||||
self.block1 = self._make_layer(self.BLOCK, self.PLANES[0],
|
||||
self.LAYERS[0])
|
||||
|
||||
self.conv2p2s2 = ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
|
||||
self.bn2 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
|
||||
self.block2 = self._make_layer(self.BLOCK, self.PLANES[1],
|
||||
self.LAYERS[1])
|
||||
|
||||
self.conv3p4s2 = ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
|
||||
|
||||
self.bn3 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
self.block3 = self._make_layer(self.BLOCK, self.PLANES[2],
|
||||
self.LAYERS[2])
|
||||
|
||||
self.conv4p8s2 = ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
|
||||
self.bn4 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
self.block4 = self._make_layer(self.BLOCK, self.PLANES[3],
|
||||
self.LAYERS[3])
|
||||
|
||||
self.convtr4p16s2 = ME.MinkowskiConvolutionTranspose(
|
||||
self.inplanes, self.PLANES[4], kernel_size=2, stride=2, dimension=D)
|
||||
self.bntr4 = ME.MinkowskiBatchNorm(self.PLANES[4])
|
||||
|
||||
self.inplanes = self.PLANES[4] + self.PLANES[2] * self.BLOCK.expansion
|
||||
self.block5 = self._make_layer(self.BLOCK, self.PLANES[4],
|
||||
self.LAYERS[4])
|
||||
self.convtr5p8s2 = ME.MinkowskiConvolutionTranspose(
|
||||
self.inplanes, self.PLANES[5], kernel_size=2, stride=2, dimension=D)
|
||||
self.bntr5 = ME.MinkowskiBatchNorm(self.PLANES[5])
|
||||
|
||||
self.inplanes = self.PLANES[5] + self.PLANES[1] * self.BLOCK.expansion
|
||||
self.block6 = self._make_layer(self.BLOCK, self.PLANES[5],
|
||||
self.LAYERS[5])
|
||||
self.convtr6p4s2 = ME.MinkowskiConvolutionTranspose(
|
||||
self.inplanes, self.PLANES[6], kernel_size=2, stride=2, dimension=D)
|
||||
self.bntr6 = ME.MinkowskiBatchNorm(self.PLANES[6])
|
||||
|
||||
self.inplanes = self.PLANES[6] + self.PLANES[0] * self.BLOCK.expansion
|
||||
self.block7 = self._make_layer(self.BLOCK, self.PLANES[6],
|
||||
self.LAYERS[6])
|
||||
self.convtr7p2s2 = ME.MinkowskiConvolutionTranspose(
|
||||
self.inplanes, self.PLANES[7], kernel_size=2, stride=2, dimension=D)
|
||||
self.bntr7 = ME.MinkowskiBatchNorm(self.PLANES[7])
|
||||
|
||||
self.inplanes = self.PLANES[7] + self.INIT_DIM
|
||||
self.block8 = self._make_layer(self.BLOCK, self.PLANES[7],
|
||||
self.LAYERS[7])
|
||||
|
||||
self.final = ME.MinkowskiConvolution(
|
||||
self.PLANES[7] * self.BLOCK.expansion,
|
||||
out_channels,
|
||||
kernel_size=1,
|
||||
bias=True,
|
||||
dimension=D)
|
||||
self.relu = ME.MinkowskiReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv0p1s1(x)
|
||||
out = self.bn0(out)
|
||||
out_p1 = self.relu(out)
|
||||
|
||||
out = self.conv1p1s2(out_p1)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
out_b1p2 = self.block1(out)
|
||||
|
||||
out = self.conv2p2s2(out_b1p2)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
out_b2p4 = self.block2(out)
|
||||
|
||||
out = self.conv3p4s2(out_b2p4)
|
||||
out = self.bn3(out)
|
||||
out = self.relu(out)
|
||||
out_b3p8 = self.block3(out)
|
||||
|
||||
# tensor_stride=16
|
||||
out = self.conv4p8s2(out_b3p8)
|
||||
out = self.bn4(out)
|
||||
out = self.relu(out)
|
||||
out = self.block4(out)
|
||||
|
||||
# tensor_stride=8
|
||||
out = self.convtr4p16s2(out)
|
||||
out = self.bntr4(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = ME.cat(out, out_b3p8)
|
||||
out = self.block5(out)
|
||||
|
||||
# tensor_stride=4
|
||||
out = self.convtr5p8s2(out)
|
||||
out = self.bntr5(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = ME.cat(out, out_b2p4)
|
||||
out = self.block6(out)
|
||||
|
||||
# tensor_stride=2
|
||||
out = self.convtr6p4s2(out)
|
||||
out = self.bntr6(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = ME.cat(out, out_b1p2)
|
||||
out = self.block7(out)
|
||||
|
||||
# tensor_stride=1
|
||||
out = self.convtr7p2s2(out)
|
||||
out = self.bntr7(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = ME.cat(out, out_p1)
|
||||
out = self.block8(out)
|
||||
|
||||
return self.final(out)
|
||||
|
||||
|
||||
class MinkUNet14(MinkUNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (1, 1, 1, 1, 1, 1, 1, 1)
|
||||
|
||||
|
||||
class MinkUNet18(MinkUNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)
|
||||
|
||||
|
||||
class MinkUNet34(MinkUNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)
|
||||
|
||||
|
||||
class MinkUNet50(MinkUNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)
|
||||
|
||||
|
||||
class MinkUNet101(MinkUNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (2, 3, 4, 23, 2, 2, 2, 2)
|
||||
|
||||
|
||||
class MinkUNet14A(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 128, 128, 96, 96)
|
||||
|
||||
|
||||
class MinkUNet14B(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 128, 128, 128, 128)
|
||||
|
||||
|
||||
class MinkUNet14C(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 192, 192, 128, 128)
|
||||
|
||||
|
||||
class MinkUNet14Dori(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
|
||||
|
||||
|
||||
class MinkUNet14E(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
|
||||
|
||||
|
||||
class MinkUNet14D(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 192, 192, 192, 192)
|
||||
|
||||
|
||||
class MinkUNet18A(MinkUNet18):
|
||||
PLANES = (32, 64, 128, 256, 128, 128, 96, 96)
|
||||
|
||||
|
||||
class MinkUNet18B(MinkUNet18):
|
||||
PLANES = (32, 64, 128, 256, 128, 128, 128, 128)
|
||||
|
||||
|
||||
class MinkUNet18D(MinkUNet18):
|
||||
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
|
||||
|
||||
|
||||
class MinkUNet34A(MinkUNet34):
|
||||
PLANES = (32, 64, 128, 256, 256, 128, 64, 64)
|
||||
|
||||
|
||||
class MinkUNet34B(MinkUNet34):
|
||||
PLANES = (32, 64, 128, 256, 256, 128, 64, 32)
|
||||
|
||||
|
||||
class MinkUNet34C(MinkUNet34):
|
||||
PLANES = (32, 64, 128, 256, 256, 128, 96, 96)
|
126
baselines/grasping/GSNet/models/graspnet.py
Executable file
126
baselines/grasping/GSNet/models/graspnet.py
Executable file
@@ -0,0 +1,126 @@
|
||||
""" GraspNet baseline model definition.
|
||||
Author: chenxi-wang
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import MinkowskiEngine as ME
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
ROOT_DIR = os.path.dirname(BASE_DIR)
|
||||
sys.path.append(ROOT_DIR)
|
||||
|
||||
from models.backbone_resunet14 import MinkUNet14D
|
||||
from models.modules import ApproachNet, GraspableNet, CloudCrop, SWADNet
|
||||
from loss_utils import GRASP_MAX_WIDTH, NUM_VIEW, NUM_ANGLE, NUM_DEPTH, GRASPNESS_THRESHOLD, M_POINT
|
||||
from label_generation import process_grasp_labels, match_grasp_view_and_label, batch_viewpoint_params_to_matrix
|
||||
from pointnet2.pointnet2_utils import furthest_point_sample, gather_operation
|
||||
|
||||
|
||||
class GraspNet(nn.Module):
|
||||
def __init__(self, cylinder_radius=0.05, seed_feat_dim=512, is_training=True):
|
||||
super().__init__()
|
||||
self.is_training = is_training
|
||||
self.seed_feature_dim = seed_feat_dim
|
||||
self.num_depth = NUM_DEPTH
|
||||
self.num_angle = NUM_ANGLE
|
||||
self.M_points = M_POINT
|
||||
self.num_view = NUM_VIEW
|
||||
|
||||
self.backbone = MinkUNet14D(in_channels=3, out_channels=self.seed_feature_dim, D=3)
|
||||
self.graspable = GraspableNet(seed_feature_dim=self.seed_feature_dim)
|
||||
self.rotation = ApproachNet(self.num_view, seed_feature_dim=self.seed_feature_dim, is_training=self.is_training)
|
||||
self.crop = CloudCrop(nsample=16, cylinder_radius=cylinder_radius, seed_feature_dim=self.seed_feature_dim)
|
||||
self.swad = SWADNet(num_angle=self.num_angle, num_depth=self.num_depth)
|
||||
|
||||
def forward(self, end_points):
|
||||
seed_xyz = end_points['point_clouds'] # use all sampled point cloud, B*Ns*3
|
||||
B, point_num, _ = seed_xyz.shape # batch _size
|
||||
# point-wise features
|
||||
coordinates_batch = end_points['coors']
|
||||
features_batch = end_points['feats']
|
||||
mink_input = ME.SparseTensor(features_batch, coordinates=coordinates_batch)
|
||||
seed_features = self.backbone(mink_input).F
|
||||
seed_features = seed_features[end_points['quantize2original']].view(B, point_num, -1).transpose(1, 2)
|
||||
|
||||
end_points = self.graspable(seed_features, end_points)
|
||||
seed_features_flipped = seed_features.transpose(1, 2) # B*Ns*feat_dim
|
||||
objectness_score = end_points['objectness_score']
|
||||
graspness_score = end_points['graspness_score'].squeeze(1)
|
||||
objectness_pred = torch.argmax(objectness_score, 1)
|
||||
objectness_mask = (objectness_pred == 1)
|
||||
graspness_mask = graspness_score > GRASPNESS_THRESHOLD
|
||||
graspable_mask = objectness_mask & graspness_mask
|
||||
|
||||
seed_features_graspable = []
|
||||
seed_xyz_graspable = []
|
||||
graspable_num_batch = 0.
|
||||
for i in range(B):
|
||||
cur_mask = graspable_mask[i]
|
||||
graspable_num_batch += cur_mask.sum()
|
||||
if graspable_num_batch == 0:
|
||||
return None
|
||||
cur_feat = seed_features_flipped[i][cur_mask] # Ns*feat_dim
|
||||
cur_seed_xyz = seed_xyz[i][cur_mask] # Ns*3
|
||||
|
||||
cur_seed_xyz = cur_seed_xyz.unsqueeze(0) # 1*Ns*3
|
||||
fps_idxs = furthest_point_sample(cur_seed_xyz, self.M_points)
|
||||
cur_seed_xyz_flipped = cur_seed_xyz.transpose(1, 2).contiguous() # 1*3*Ns
|
||||
cur_seed_xyz = gather_operation(cur_seed_xyz_flipped, fps_idxs).transpose(1, 2).squeeze(0).contiguous() # Ns*3
|
||||
cur_feat_flipped = cur_feat.unsqueeze(0).transpose(1, 2).contiguous() # 1*feat_dim*Ns
|
||||
cur_feat = gather_operation(cur_feat_flipped, fps_idxs).squeeze(0).contiguous() # feat_dim*Ns
|
||||
|
||||
seed_features_graspable.append(cur_feat)
|
||||
seed_xyz_graspable.append(cur_seed_xyz)
|
||||
seed_xyz_graspable = torch.stack(seed_xyz_graspable, 0) # B*Ns*3
|
||||
seed_features_graspable = torch.stack(seed_features_graspable) # B*feat_dim*Ns
|
||||
|
||||
end_points['xyz_graspable'] = seed_xyz_graspable
|
||||
end_points['graspable_count_stage1'] = graspable_num_batch / B
|
||||
|
||||
end_points, res_feat = self.rotation(seed_features_graspable, end_points)
|
||||
seed_features_graspable = seed_features_graspable + res_feat
|
||||
|
||||
if self.is_training:
|
||||
end_points = process_grasp_labels(end_points)
|
||||
grasp_top_views_rot, end_points = match_grasp_view_and_label(end_points)
|
||||
else:
|
||||
grasp_top_views_rot = end_points['grasp_top_view_rot']
|
||||
|
||||
group_features = self.crop(seed_xyz_graspable.contiguous(), seed_features_graspable.contiguous(), grasp_top_views_rot)
|
||||
end_points = self.swad(group_features, end_points)
|
||||
|
||||
return end_points
|
||||
|
||||
|
||||
def pred_decode(end_points):
|
||||
batch_size = len(end_points['point_clouds'])
|
||||
grasp_preds = []
|
||||
for i in range(batch_size):
|
||||
grasp_center = end_points['xyz_graspable'][i].float()
|
||||
|
||||
grasp_score = end_points['grasp_score_pred'][i].float()
|
||||
grasp_score = grasp_score.view(M_POINT, NUM_ANGLE*NUM_DEPTH)
|
||||
grasp_score, grasp_score_inds = torch.max(grasp_score, -1) # [M_POINT]
|
||||
grasp_score = grasp_score.view(-1, 1)
|
||||
grasp_angle = (grasp_score_inds // NUM_DEPTH) * np.pi / 12
|
||||
grasp_depth = (grasp_score_inds % NUM_DEPTH + 1) * 0.01
|
||||
grasp_depth = grasp_depth.view(-1, 1)
|
||||
grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10.
|
||||
grasp_width = grasp_width.view(M_POINT, NUM_ANGLE*NUM_DEPTH)
|
||||
grasp_width = torch.gather(grasp_width, 1, grasp_score_inds.view(-1, 1))
|
||||
grasp_width = torch.clamp(grasp_width, min=0., max=GRASP_MAX_WIDTH)
|
||||
|
||||
approaching = -end_points['grasp_top_view_xyz'][i].float()
|
||||
grasp_rot = batch_viewpoint_params_to_matrix(approaching, grasp_angle)
|
||||
grasp_rot = grasp_rot.view(M_POINT, 9)
|
||||
|
||||
# merge preds
|
||||
grasp_height = 0.02 * torch.ones_like(grasp_score)
|
||||
obj_ids = -1 * torch.ones_like(grasp_score)
|
||||
grasp_preds.append(
|
||||
torch.cat([grasp_score, grasp_width, grasp_height, grasp_depth, grasp_rot, grasp_center, obj_ids], axis=-1))
|
||||
return grasp_preds
|
80
baselines/grasping/GSNet/models/loss.py
Executable file
80
baselines/grasping/GSNet/models/loss.py
Executable file
@@ -0,0 +1,80 @@
|
||||
import torch.nn as nn
|
||||
import torch
|
||||
|
||||
|
||||
def get_loss(end_points):
|
||||
objectness_loss, end_points = compute_objectness_loss(end_points)
|
||||
graspness_loss, end_points = compute_graspness_loss(end_points)
|
||||
view_loss, end_points = compute_view_graspness_loss(end_points)
|
||||
score_loss, end_points = compute_score_loss(end_points)
|
||||
width_loss, end_points = compute_width_loss(end_points)
|
||||
loss = objectness_loss + 10 * graspness_loss + 100 * view_loss + 15 * score_loss + 10 * width_loss
|
||||
end_points['loss/overall_loss'] = loss
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_objectness_loss(end_points):
|
||||
criterion = nn.CrossEntropyLoss(reduction='mean')
|
||||
objectness_score = end_points['objectness_score']
|
||||
objectness_label = end_points['objectness_label']
|
||||
loss = criterion(objectness_score, objectness_label)
|
||||
end_points['loss/stage1_objectness_loss'] = loss
|
||||
|
||||
objectness_pred = torch.argmax(objectness_score, 1)
|
||||
end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean()
|
||||
end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[
|
||||
objectness_pred == 1].float().mean()
|
||||
end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[
|
||||
objectness_label == 1].float().mean()
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_graspness_loss(end_points):
|
||||
criterion = nn.SmoothL1Loss(reduction='none')
|
||||
graspness_score = end_points['graspness_score'].squeeze(1)
|
||||
graspness_label = end_points['graspness_label'].squeeze(-1)
|
||||
loss_mask = end_points['objectness_label'].bool()
|
||||
loss = criterion(graspness_score, graspness_label)
|
||||
loss = loss[loss_mask]
|
||||
loss = loss.mean()
|
||||
|
||||
graspness_score_c = graspness_score.detach().clone()[loss_mask]
|
||||
graspness_label_c = graspness_label.detach().clone()[loss_mask]
|
||||
graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99)
|
||||
graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99)
|
||||
rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean()
|
||||
end_points['stage1_graspness_acc_rank_error'] = rank_error
|
||||
|
||||
end_points['loss/stage1_graspness_loss'] = loss
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_view_graspness_loss(end_points):
|
||||
criterion = nn.SmoothL1Loss(reduction='mean')
|
||||
view_score = end_points['view_score']
|
||||
view_label = end_points['batch_grasp_view_graspness']
|
||||
loss = criterion(view_score, view_label)
|
||||
end_points['loss/stage2_view_loss'] = loss
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_score_loss(end_points):
|
||||
criterion = nn.SmoothL1Loss(reduction='mean')
|
||||
grasp_score_pred = end_points['grasp_score_pred']
|
||||
grasp_score_label = end_points['batch_grasp_score']
|
||||
loss = criterion(grasp_score_pred, grasp_score_label)
|
||||
|
||||
end_points['loss/stage3_score_loss'] = loss
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_width_loss(end_points):
|
||||
criterion = nn.SmoothL1Loss(reduction='none')
|
||||
grasp_width_pred = end_points['grasp_width_pred']
|
||||
grasp_width_label = end_points['batch_grasp_width'] * 10
|
||||
loss = criterion(grasp_width_pred, grasp_width_label)
|
||||
grasp_score_label = end_points['batch_grasp_score']
|
||||
loss_mask = grasp_score_label > 0
|
||||
loss = loss[loss_mask].mean()
|
||||
end_points['loss/stage3_width_loss'] = loss
|
||||
return loss, end_points
|
116
baselines/grasping/GSNet/models/modules.py
Executable file
116
baselines/grasping/GSNet/models/modules.py
Executable file
@@ -0,0 +1,116 @@
|
||||
import os
|
||||
import sys
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
ROOT_DIR = os.path.dirname(BASE_DIR)
|
||||
sys.path.append(ROOT_DIR)
|
||||
|
||||
import pointnet2.pytorch_utils as pt_utils
|
||||
from pointnet2.pointnet2_utils import CylinderQueryAndGroup
|
||||
from loss_utils import generate_grasp_views, batch_viewpoint_params_to_matrix
|
||||
|
||||
|
||||
class GraspableNet(nn.Module):
|
||||
def __init__(self, seed_feature_dim):
|
||||
super().__init__()
|
||||
self.in_dim = seed_feature_dim
|
||||
self.conv_graspable = nn.Conv1d(self.in_dim, 3, 1)
|
||||
|
||||
def forward(self, seed_features, end_points):
|
||||
graspable_score = self.conv_graspable(seed_features) # (B, 3, num_seed)
|
||||
end_points['objectness_score'] = graspable_score[:, :2]
|
||||
end_points['graspness_score'] = graspable_score[:, 2]
|
||||
return end_points
|
||||
|
||||
|
||||
class ApproachNet(nn.Module):
|
||||
def __init__(self, num_view, seed_feature_dim, is_training=True):
|
||||
super().__init__()
|
||||
self.num_view = num_view
|
||||
self.in_dim = seed_feature_dim
|
||||
self.is_training = is_training
|
||||
self.conv1 = nn.Conv1d(self.in_dim, self.in_dim, 1)
|
||||
self.conv2 = nn.Conv1d(self.in_dim, self.num_view, 1)
|
||||
|
||||
def forward(self, seed_features, end_points):
|
||||
B, _, num_seed = seed_features.size()
|
||||
res_features = F.relu(self.conv1(seed_features), inplace=True)
|
||||
features = self.conv2(res_features)
|
||||
view_score = features.transpose(1, 2).contiguous() # (B, num_seed, num_view)
|
||||
end_points['view_score'] = view_score
|
||||
|
||||
if self.is_training:
|
||||
# normalize view graspness score to 0~1
|
||||
view_score_ = view_score.clone().detach()
|
||||
view_score_max, _ = torch.max(view_score_, dim=2)
|
||||
view_score_min, _ = torch.min(view_score_, dim=2)
|
||||
view_score_max = view_score_max.unsqueeze(-1).expand(-1, -1, self.num_view)
|
||||
view_score_min = view_score_min.unsqueeze(-1).expand(-1, -1, self.num_view)
|
||||
view_score_ = (view_score_ - view_score_min) / (view_score_max - view_score_min + 1e-8)
|
||||
|
||||
top_view_inds = []
|
||||
for i in range(B):
|
||||
top_view_inds_batch = torch.multinomial(view_score_[i], 1, replacement=False)
|
||||
top_view_inds.append(top_view_inds_batch)
|
||||
top_view_inds = torch.stack(top_view_inds, dim=0).squeeze(-1) # B, num_seed
|
||||
else:
|
||||
_, top_view_inds = torch.max(view_score, dim=2) # (B, num_seed)
|
||||
|
||||
top_view_inds_ = top_view_inds.view(B, num_seed, 1, 1).expand(-1, -1, -1, 3).contiguous()
|
||||
template_views = generate_grasp_views(self.num_view).to(features.device) # (num_view, 3)
|
||||
template_views = template_views.view(1, 1, self.num_view, 3).expand(B, num_seed, -1, -1).contiguous()
|
||||
vp_xyz = torch.gather(template_views, 2, top_view_inds_).squeeze(2) # (B, num_seed, 3)
|
||||
vp_xyz_ = vp_xyz.view(-1, 3)
|
||||
batch_angle = torch.zeros(vp_xyz_.size(0), dtype=vp_xyz.dtype, device=vp_xyz.device)
|
||||
vp_rot = batch_viewpoint_params_to_matrix(-vp_xyz_, batch_angle).view(B, num_seed, 3, 3)
|
||||
end_points['grasp_top_view_xyz'] = vp_xyz
|
||||
end_points['grasp_top_view_rot'] = vp_rot
|
||||
|
||||
end_points['grasp_top_view_inds'] = top_view_inds
|
||||
return end_points, res_features
|
||||
|
||||
|
||||
class CloudCrop(nn.Module):
|
||||
def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02, hmax=0.04):
|
||||
super().__init__()
|
||||
self.nsample = nsample
|
||||
self.in_dim = seed_feature_dim
|
||||
self.cylinder_radius = cylinder_radius
|
||||
mlps = [3 + self.in_dim, 256, 256] # use xyz, so plus 3
|
||||
|
||||
self.grouper = CylinderQueryAndGroup(radius=cylinder_radius, hmin=hmin, hmax=hmax, nsample=nsample,
|
||||
use_xyz=True, normalize_xyz=True)
|
||||
self.mlps = pt_utils.SharedMLP(mlps, bn=True)
|
||||
|
||||
def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot):
|
||||
grouped_feature = self.grouper(seed_xyz_graspable, seed_xyz_graspable, vp_rot,
|
||||
seed_features_graspable) # B*3 + feat_dim*M*K
|
||||
new_features = self.mlps(grouped_feature) # (batch_size, mlps[-1], M, K)
|
||||
new_features = F.max_pool2d(new_features, kernel_size=[1, new_features.size(3)]) # (batch_size, mlps[-1], M, 1)
|
||||
new_features = new_features.squeeze(-1) # (batch_size, mlps[-1], M)
|
||||
return new_features
|
||||
|
||||
|
||||
class SWADNet(nn.Module):
|
||||
def __init__(self, num_angle, num_depth):
|
||||
super().__init__()
|
||||
self.num_angle = num_angle
|
||||
self.num_depth = num_depth
|
||||
|
||||
self.conv1 = nn.Conv1d(256, 256, 1) # input feat dim need to be consistent with CloudCrop module
|
||||
self.conv_swad = nn.Conv1d(256, 2*num_angle*num_depth, 1)
|
||||
|
||||
def forward(self, vp_features, end_points):
|
||||
B, _, num_seed = vp_features.size()
|
||||
vp_features = F.relu(self.conv1(vp_features), inplace=True)
|
||||
vp_features = self.conv_swad(vp_features)
|
||||
vp_features = vp_features.view(B, 2, self.num_angle, self.num_depth, num_seed)
|
||||
vp_features = vp_features.permute(0, 1, 4, 2, 3)
|
||||
|
||||
# split prediction
|
||||
end_points['grasp_score_pred'] = vp_features[:, 0] # B * num_seed * num angle * num_depth
|
||||
end_points['grasp_width_pred'] = vp_features[:, 1]
|
||||
return end_points
|
196
baselines/grasping/GSNet/models/resnet.py
Executable file
196
baselines/grasping/GSNet/models/resnet.py
Executable file
@@ -0,0 +1,196 @@
|
||||
import torch.nn as nn
|
||||
|
||||
try:
|
||||
import open3d as o3d
|
||||
except ImportError:
|
||||
raise ImportError("Please install open3d with `pip install open3d`.")
|
||||
|
||||
import MinkowskiEngine as ME
|
||||
from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
|
||||
|
||||
|
||||
class ResNetBase(nn.Module):
|
||||
BLOCK = None
|
||||
LAYERS = ()
|
||||
INIT_DIM = 64
|
||||
PLANES = (64, 128, 256, 512)
|
||||
|
||||
def __init__(self, in_channels, out_channels, D=3):
|
||||
nn.Module.__init__(self)
|
||||
self.D = D
|
||||
assert self.BLOCK is not None
|
||||
|
||||
self.network_initialization(in_channels, out_channels, D)
|
||||
self.weight_initialization()
|
||||
|
||||
def network_initialization(self, in_channels, out_channels, D):
|
||||
|
||||
self.inplanes = self.INIT_DIM
|
||||
self.conv1 = nn.Sequential(
|
||||
ME.MinkowskiConvolution(
|
||||
in_channels, self.inplanes, kernel_size=3, stride=2, dimension=D
|
||||
),
|
||||
ME.MinkowskiInstanceNorm(self.inplanes),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiMaxPooling(kernel_size=2, stride=2, dimension=D),
|
||||
)
|
||||
|
||||
self.layer1 = self._make_layer(
|
||||
self.BLOCK, self.PLANES[0], self.LAYERS[0], stride=2
|
||||
)
|
||||
self.layer2 = self._make_layer(
|
||||
self.BLOCK, self.PLANES[1], self.LAYERS[1], stride=2
|
||||
)
|
||||
self.layer3 = self._make_layer(
|
||||
self.BLOCK, self.PLANES[2], self.LAYERS[2], stride=2
|
||||
)
|
||||
self.layer4 = self._make_layer(
|
||||
self.BLOCK, self.PLANES[3], self.LAYERS[3], stride=2
|
||||
)
|
||||
|
||||
self.conv5 = nn.Sequential(
|
||||
ME.MinkowskiDropout(),
|
||||
ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=3, stride=3, dimension=D
|
||||
),
|
||||
ME.MinkowskiInstanceNorm(self.inplanes),
|
||||
ME.MinkowskiGELU(),
|
||||
)
|
||||
|
||||
self.glob_pool = ME.MinkowskiGlobalMaxPooling()
|
||||
|
||||
self.final = ME.MinkowskiLinear(self.inplanes, out_channels, bias=True)
|
||||
|
||||
def weight_initialization(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, ME.MinkowskiConvolution):
|
||||
ME.utils.kaiming_normal_(m.kernel, mode="fan_out", nonlinearity="relu")
|
||||
|
||||
if isinstance(m, ME.MinkowskiBatchNorm):
|
||||
nn.init.constant_(m.bn.weight, 1)
|
||||
nn.init.constant_(m.bn.bias, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
ME.MinkowskiConvolution(
|
||||
self.inplanes,
|
||||
planes * block.expansion,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
dimension=self.D,
|
||||
),
|
||||
ME.MinkowskiBatchNorm(planes * block.expansion),
|
||||
)
|
||||
layers = []
|
||||
layers.append(
|
||||
block(
|
||||
self.inplanes,
|
||||
planes,
|
||||
stride=stride,
|
||||
dilation=dilation,
|
||||
downsample=downsample,
|
||||
dimension=self.D,
|
||||
)
|
||||
)
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(
|
||||
block(
|
||||
self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D
|
||||
)
|
||||
)
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x: ME.SparseTensor):
|
||||
x = self.conv1(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.conv5(x)
|
||||
x = self.glob_pool(x)
|
||||
return self.final(x)
|
||||
|
||||
|
||||
class ResNet14(ResNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (1, 1, 1, 1)
|
||||
|
||||
|
||||
class ResNet18(ResNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (2, 2, 2, 2)
|
||||
|
||||
|
||||
class ResNet34(ResNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (3, 4, 6, 3)
|
||||
|
||||
|
||||
class ResNet50(ResNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (3, 4, 6, 3)
|
||||
|
||||
|
||||
class ResNet101(ResNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (3, 4, 23, 3)
|
||||
|
||||
|
||||
class ResFieldNetBase(ResNetBase):
|
||||
def network_initialization(self, in_channels, out_channels, D):
|
||||
field_ch = 32
|
||||
field_ch2 = 64
|
||||
self.field_network = nn.Sequential(
|
||||
ME.MinkowskiSinusoidal(in_channels, field_ch),
|
||||
ME.MinkowskiBatchNorm(field_ch),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiLinear(field_ch, field_ch),
|
||||
ME.MinkowskiBatchNorm(field_ch),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiToSparseTensor(),
|
||||
)
|
||||
self.field_network2 = nn.Sequential(
|
||||
ME.MinkowskiSinusoidal(field_ch + in_channels, field_ch2),
|
||||
ME.MinkowskiBatchNorm(field_ch2),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiLinear(field_ch2, field_ch2),
|
||||
ME.MinkowskiBatchNorm(field_ch2),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiToSparseTensor(),
|
||||
)
|
||||
|
||||
ResNetBase.network_initialization(self, field_ch2, out_channels, D)
|
||||
|
||||
def forward(self, x: ME.TensorField):
|
||||
otensor = self.field_network(x)
|
||||
otensor2 = self.field_network2(otensor.cat_slice(x))
|
||||
return ResNetBase.forward(self, otensor2)
|
||||
|
||||
|
||||
class ResFieldNet14(ResFieldNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (1, 1, 1, 1)
|
||||
|
||||
|
||||
class ResFieldNet18(ResFieldNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (2, 2, 2, 2)
|
||||
|
||||
|
||||
class ResFieldNet34(ResFieldNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (3, 4, 6, 3)
|
||||
|
||||
|
||||
class ResFieldNet50(ResFieldNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (3, 4, 6, 3)
|
||||
|
||||
|
||||
class ResFieldNet101(ResFieldNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (3, 4, 23, 3)
|
Reference in New Issue
Block a user