475 lines
18 KiB
Python
Raw Normal View History

2025-01-15 14:42:54 +08:00
import cv2
import numpy as np
import open3d as o3d
import sys, os
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from utils.control_util import ControlUtil
from utils.communicate_util import CommunicateUtil
import pickle
from tqdm import tqdm
import json
import pyrealsense2 as rs
# marker parameters
MARKER_TYPE = "aruco"
MARKER_INFO = {"id": 582, "size": 0.1}
# view parameters
VIEW_NUM = 6
LOOK_AT_PT = np.array([0.5, 0, 0.2])
TOP_DOWN_DIST = [0.3, 0.45, 0.55]
TOP_DOWN_DIRECTION = np.array([0., 0., -1.])
RADIUS = [0.25, 0.2, 0.1]
INIT_GRIPPER_POSE = np.array([
[1, 0, 0, 0.56],
[0, -1, 0, 0],
[0, 0, -1, 0.6],
[0, 0, 0, 1]
])
DEFAULT_EE_TO_BASE = np.array([
[0, -1, 0, 0],
[1, 0, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
])
class HandEyeCalibration:
"""
A class to perform hand-eye calibration for a robotic system using various camera types and markers.
Attributes:
marker_to_cam (list): List to store marker to camera transformation matrices.
end_effector_to_base (list): List to store end-effector to base transformation matrices.
marker_type (str): Type of marker used for calibration (default is "aruco").
marker_info (dict): Information about the marker, including its ID and size.
camera (object): Camera object used for capturing images.
intrinsics (dict): Intrinsic parameters of the camera.
dist (optional): Distortion coefficients of the camera.
marker_solver (object): Solver object for detecting and solving marker poses.
camera_type (str): Type of camera used (default is "RealSense"). "RealSense" and "Inspire" are supported.
Methods:
get_marker_to_cam_pose(color_image, visualize=False):
Gets the pose of the marker relative to the camera from a color image.
capture_single_frame():
Captures a single frame from the camera and gets the current end-effector pose.
capture_frames(view_num, look_at_pt, top_down_dist, top_down_direrction, radius, visualize=False):
Captures multiple frames by moving the end-effector to different poses and records the marker and end-effector poses.
get_hand_eye(marker_to_cam_poses, end_effector_to_base_poses):
Computes the hand-eye calibration matrix using the captured marker and end-effector poses.
calibrate(view_num, look_at_pt, top_down_dist, top_down_direrction, radius):
Performs the full calibration process by capturing frames and computing the hand-eye calibration matrix.
"""
def __init__(self, dist=None, marker_type="aruco", marker_info={"id": 0, "size": 0.1}, camera_type="RealSense"):
self.marker_to_cam = []
self.end_effector_to_base = []
self.marker_type = marker_type
self.marker_info = marker_info
ControlUtil.connect_robot()
ControlUtil.franka_reset()
if camera_type == "RealSense":
self.camera = RealSenseCamera()
elif camera_type == "Inspire":
self.camera = InspireCamera()
else:
assert False, "Not implemented yet"
self.intrinsics = self.camera.get_color_intrinsics()
self.dist = dist
self.marker_type = marker_type
if self.marker_type == "aruco":
self.marker_solver = ArcuoMarkerSolver(self.intrinsics, dist, marker_info)
self.camera_type = camera_type
def get_marker_to_cam_pose(self, color_image, visualize=False):
res = self.marker_solver.get_marker_to_cam_pose(color_image, visualize)
return res
def capture_single_frame(self):
color_image = self.camera.get_color_image()
end_effector_pose = ControlUtil.get_curr_gripper_to_base_pose()
return color_image, end_effector_pose
def capture_frames(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius, visualize=False):
circle_num = len(radius)
poses = []
for i in range(circle_num):
reverse_order = i % 2 == 1
pose_generator = EndEffectorPoseGenerator(view_num, look_at_pt, top_down_dist[i], top_down_direrction, radius[i], reverse_order)
poses += [pose @ DEFAULT_EE_TO_BASE for pose in pose_generator.poses]
if visualize == True:
pose_visualizer = PoseVisualizer(poses)
pose_visualizer.visualize()
pbar = tqdm(total=len(poses), desc="Capturing frames")
for pose in poses:
pbar.update(1)
ControlUtil.set_gripper_pose(pose)
color_image, end_effector_pose = self.capture_single_frame()
if color_image is None:
print("Failed to capture the frame")
continue
res = self.get_marker_to_cam_pose(color_image, visualize=True)
if res is None:
print("Failed to get marker pose")
continue
else:
marker_to_cam_pose = np.eye(4)
rvec = res["rvec"]
tvec = res["tvec"]
rot_mat = cv2.Rodrigues(rvec)[0]
marker_to_cam_pose[:3, :3] = rot_mat
marker_to_cam_pose[:3, 3] = tvec
self.marker_to_cam.append(marker_to_cam_pose)
self.end_effector_to_base.append(end_effector_pose)
def get_hand_eye(self, marker_to_cam_poses, end_effector_to_base_poses):
############################## Debug ################################
# with open("marker_to_cam_poses.pkl", "wb") as f:
# pickle.dump(marker_to_cam_poses, f)
# with open("end_effector_to_base_poses.pkl", "wb") as f:
# pickle.dump(end_effector_to_base_poses, f)
# with open("marker_to_cam_poses.pkl", "rb") as f:
# marker_to_cam_poses = pickle.load(f)
# with open("end_effector_to_base_poses.pkl", "rb") as f:
# end_effector_to_base_poses = pickle.load(f)
#####################################################################
R_marker_to_cam = []
t_marker_to_cam = []
R_end_effector_to_base = []
t_end_effector_to_base = []
for index, marker_to_cam_pose in enumerate(marker_to_cam_poses):
if marker_to_cam_pose is None:
continue
R_marker_to_cam.append(marker_to_cam_pose[:3, :3])
t_marker_to_cam.append(marker_to_cam_pose[:3, 3])
R_end_effector_to_base.append(end_effector_to_base_poses[index][:3, :3])
t_end_effector_to_base.append(end_effector_to_base_poses[index][:3, 3])
if len(R_marker_to_cam) < 3:
print("Not enough data to calibrate")
return None
R_marker_to_cam = np.array(R_marker_to_cam)
t_marker_to_cam = np.array(t_marker_to_cam)
R_end_effector_to_base = np.array(R_end_effector_to_base)
t_end_effector_to_base = np.array(t_end_effector_to_base)
from ipdb import set_trace; set_trace()
hand_eye = cv2.calibrateHandEye(R_end_effector_to_base, t_end_effector_to_base, R_marker_to_cam, t_marker_to_cam, cv2.CALIB_HAND_EYE_TSAI)
cv2.destroyAllWindows()
return hand_eye
def calibrate(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius):
self.capture_frames(view_num, look_at_pt, top_down_dist, top_down_direrction, radius)
marker_to_cam = np.array(self.marker_to_cam)
end_effector_to_base = np.array(self.end_effector_to_base)
hand_eye = self.get_hand_eye(marker_to_cam, end_effector_to_base)
# hand_eye = self.get_hand_eye(None, None)
return hand_eye
class ArcuoMarkerSolver:
def __init__(self, intrinsics, dist=None, marker_info={"id": 0, "size": 0.1}):
self.intrinsics = intrinsics
self.dist = dist
self.marker_info = marker_info
def solve_arcuo_marker_pose(self, color_image):
self.res = []
aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_ARUCO_ORIGINAL)
aruco_params = cv2.aruco.DetectorParameters()
cv2.imshow("color_image", color_image)
cv2.waitKey(500)
corners, ids, rejectedImgPoints = cv2.aruco.detectMarkers(color_image, aruco_dict, parameters=aruco_params)
if ids is None:
return None
for i in range(len(ids)):
rvec, tvec, _ = cv2.aruco.estimatePoseSingleMarkers(corners[i], self.marker_info["size"], self.intrinsics, self.dist)
self.res.append({
'rvec': rvec,
'tvec': tvec,
'corners': corners[i],
'ids': ids[i]
})
for res in self.res:
if res["ids"][0] == self.marker_info["id"]:
return res
return None
def get_marker_to_cam_pose(self, color_image, visualize=False):
res = self.solve_arcuo_marker_pose(color_image)
if visualize and res is not None:
self.visualize_res(color_image, res)
return res
def visualize_res(self, color_image, res):
rvec = res["rvec"]
tvec = res["tvec"]
corners = res["corners"]
ids = res["ids"]
aruco_frame = cv2.drawFrameAxes(color_image, self.intrinsics, self.dist, rvec, tvec, 0.05)
aruco_frame = cv2.aruco.drawDetectedMarkers(aruco_frame, (corners, ), ids)
cv2.imshow("aruco_frame", aruco_frame)
cv2.waitKey(500)
class EndEffectorPoseGenerator:
def __init__(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius, reverse_order=False):
self.view_num = view_num
self.look_at_pt = look_at_pt
self.top_down_dist = top_down_dist
self.top_down_direrction = top_down_direrction
self.radius = radius
self.poses = self.generate_poses(reverse_order)
def generate_poses(self, reverse_order=False):
poses = []
for i in range(self.view_num):
order = i if not reverse_order else self.view_num - i - 1
pose = self.get_pose(order)
poses.append(pose)
return poses
def get_pose(self, i):
angle = 0.8 * np.pi * i / self.view_num + 0.7 * np.pi
start_point_x = self.look_at_pt[0] + self.radius * np.cos(angle)
start_point_y = self.look_at_pt[1] + self.radius * np.sin(angle)
start_point_z = self.look_at_pt[2] + self.top_down_dist
look_at_vector = self.look_at_pt - np.array([start_point_x, start_point_y, start_point_z])
look_at_vector = look_at_vector / np.linalg.norm(look_at_vector)
up_vector = self.top_down_direrction
noise = np.random.uniform(0, 0.1, up_vector.shape)
right_vector = np.cross(look_at_vector, up_vector + noise)
right_vector = right_vector / np.linalg.norm(right_vector)
up_vector = np.cross(look_at_vector, right_vector)
up_vector = up_vector / np.linalg.norm(up_vector)
pose = np.eye(4)
pose[:3, :3] = self.get_rotation(right_vector, up_vector, look_at_vector)
pose[:3, 3] = np.array([start_point_x, start_point_y, start_point_z])
return pose
def get_rotation(self, right_vector, up_vector, look_at_vector):
rotation = np.eye(3)
rotation[:, 0] = right_vector
rotation[:, 1] = up_vector
rotation[:, 2] = look_at_vector
return rotation
class PoseVisualizer:
def __init__(self, poses):
self.poses = poses
def visualize(self):
vis = o3d.visualization.Visualizer()
vis.create_window()
for pose in self.poses:
mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.05)
mesh_frame.transform(pose)
vis.add_geometry(mesh_frame)
print("Press q to close the window")
vis.run()
class CAMERA:
def __init__(self):
self.color_intrinsics = None
self.depth_intrinsics = None
self.color = None
self.depth = None
def get_color_intrinsics(self):
pass
def get_depth_intrinsics(self):
pass
def get_color_image(self):
pass
def get_depth_image(self):
pass
class RealSenseCamera(CAMERA):
def __init__(self):
super().__init__()
self.pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
align = rs.align(rs.stream.color)
self.pipeline.start(config)
self.color_intrinsics = self.get_intrinsics("color")
self.depth_intrinsics = self.get_intrinsics("depth")
def get_color_intrinsics(self):
return self.color_intrinsics
def get_depth_intrinsics(self):
return self.depth_intrinsics
def get_color_image(self):
frames = self.pipeline.wait_for_frames()
color_frame = frames.get_color_frame()
color_image = np.asanyarray(color_frame.get_data())
return color_image
def get_depth_image(self):
frames = self.pipeline.wait_for_frames()
depth_frame = frames.get_depth_frame()
depth_image = np.asanyarray(depth_frame.get_data())
return depth_image
def get_intrinsics(self, camra_type):
if camra_type == "color":
profile = self.pipeline.get_active_profile()
color_profile = rs.video_stream_profile(profile.get_stream(rs.stream.color))
color_intrinsics = color_profile.get_intrinsics()
K = [[color_intrinsics.fx, 0, color_intrinsics.ppx], [0, color_intrinsics.fy, color_intrinsics.ppy], [0, 0, 1]]
return np.array(K)
elif camra_type == "depth":
profile = self.pipeline.get_active_profile()
depth_profile = rs.video_stream_profile(profile.get_stream(rs.stream.depth))
depth_intrinsics = depth_profile.get_intrinsics()
K = [[depth_intrinsics.fx, 0, depth_intrinsics.ppx], [0, depth_intrinsics.fy, depth_intrinsics.ppy], [0, 0, 1]]
return np.array(K)
class InspireCamera(CAMERA):
def __init__(self):
super().__init__()
ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
view_data = CommunicateUtil.get_view_data(init=True)
self.color_intrinsics = self.get_intrinsics(view_data["color_intrinsics"], view_data["color_image"])
self.depth_intrinsics = self.get_intrinsics(view_data["depth_intrinsics"], view_data["depth_image"])
def get_color_intrinsics(self):
return self.color_intrinsics
def get_depth_intrinsics(self):
return self.depth_intrinsics
def get_color_image(self):
view_data = CommunicateUtil.get_view_data()
if view_data is None:
return None
else:
color_image = view_data["color_image"]
return color_image
def get_depth_image(self):
view_data = CommunicateUtil.get_view_data()
if view_data is None:
return None
else:
depth_image = view_data["depth_image"]
return depth_image
def get_intrinsics(self, intrinsics, image):
height = image.shape[0]
width = image.shape[1]
scale_h = height / intrinsics["height"]
scale_w = width / intrinsics["width"]
fx = intrinsics["fx"] * scale_w
fy = intrinsics["fy"] * scale_h
cx = intrinsics["cx"] * scale_w
cy = intrinsics["cy"] * scale_h
K = np.array([
[fx, 0, cx],
[ 0, fy, cy],
[ 0, 0, 1]
])
return K
def example_view_generator():
pose_generator = EndEffectorPoseGenerator(VIEW_NUM, LOOK_AT_PT, TOP_DOWN_DIST, TOP_DOWN_DIRECTION, RADIUS)
base_pose = np.eye(4)
test_pose = np.array([
[1, 0, 0, 0],
[0, -1, 0, 0],
[0, 0, -1, 0.2],
[0, 0, 0, 1]
])
defalut_ee2base = np.array([
[0, -1, 0, 0],
[1, 0, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
])
poses = [pose @ defalut_ee2base for pose in pose_generator.poses] + [base_pose, test_pose]
visualizer = PoseVisualizer(poses)
visualizer.visualize()
def example_calibration():
# ControlUtil.connect_robot()
# ControlUtil.franka_reset()
# ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
# view_data = CommunicateUtil.get_view_data(init=True)
# intrinsics = get_intrinsics(view_data["color_intrinsics"], view_data["color_image"])
calibration = HandEyeCalibration(
# intrinsics=intrinsics,
dist=None,
marker_type=MARKER_TYPE,
marker_info=MARKER_INFO,
camera_type="Inspire"
)
hand_eye = calibration.calibrate(
view_num=VIEW_NUM,
look_at_pt=LOOK_AT_PT,
top_down_dist=TOP_DOWN_DIST,
top_down_direrction=TOP_DOWN_DIRECTION,
radius=RADIUS
)
print(hand_eye)
res = {
'rotation': hand_eye[0].tolist(),
'translation': hand_eye[1].tolist()
}
with open("hand_eye.json", "w") as f:
json.dump(res, f, indent=4)
print("Hand eye calibration finished")
return res
def get_depth_to_end_effector_pose(calibration_res_path):
with open(calibration_res_path, "r") as f:
data = json.load(f)
rotation = np.array(data["rotation"])
translation = np.array(data["translation"])
color_to_ee = np.eye(4)
color_to_ee[:3, :3] = rotation
color_to_ee[:3, 3:] = translation
ControlUtil.connect_robot()
ControlUtil.franka_reset()
ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
view_data = CommunicateUtil.get_view_data(init=True)
depth_to_color = np.array(view_data["depth_to_color"])
depth_to_ee = color_to_ee @ depth_to_color
print(depth_to_ee)
with open("depth_to_ee.json", "w") as f:
json.dump(depth_to_ee.tolist(), f, indent=4)
if __name__ == "__main__":
res = example_calibration()
get_depth_to_end_effector_pose("hand_eye.json")
# example_view_generator()