import cv2 import numpy as np import open3d as o3d import sys, os sys.path.append(os.path.join(os.path.dirname(__file__), "..")) from utils.control_util import ControlUtil from utils.communicate_util import CommunicateUtil import pickle from tqdm import tqdm import json import pyrealsense2 as rs # marker parameters MARKER_TYPE = "aruco" MARKER_INFO = {"id": 582, "size": 0.1} # view parameters VIEW_NUM = 6 LOOK_AT_PT = np.array([0.5, 0, 0.2]) TOP_DOWN_DIST = [0.3, 0.45, 0.55] TOP_DOWN_DIRECTION = np.array([0., 0., -1.]) RADIUS = [0.25, 0.2, 0.1] INIT_GRIPPER_POSE = np.array([ [1, 0, 0, 0.56], [0, -1, 0, 0], [0, 0, -1, 0.6], [0, 0, 0, 1] ]) DEFAULT_EE_TO_BASE = np.array([ [0, -1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1] ]) class HandEyeCalibration: """ A class to perform hand-eye calibration for a robotic system using various camera types and markers. Attributes: marker_to_cam (list): List to store marker to camera transformation matrices. end_effector_to_base (list): List to store end-effector to base transformation matrices. marker_type (str): Type of marker used for calibration (default is "aruco"). marker_info (dict): Information about the marker, including its ID and size. camera (object): Camera object used for capturing images. intrinsics (dict): Intrinsic parameters of the camera. dist (optional): Distortion coefficients of the camera. marker_solver (object): Solver object for detecting and solving marker poses. camera_type (str): Type of camera used (default is "RealSense"). "RealSense" and "Inspire" are supported. Methods: get_marker_to_cam_pose(color_image, visualize=False): Gets the pose of the marker relative to the camera from a color image. capture_single_frame(): Captures a single frame from the camera and gets the current end-effector pose. capture_frames(view_num, look_at_pt, top_down_dist, top_down_direrction, radius, visualize=False): Captures multiple frames by moving the end-effector to different poses and records the marker and end-effector poses. get_hand_eye(marker_to_cam_poses, end_effector_to_base_poses): Computes the hand-eye calibration matrix using the captured marker and end-effector poses. calibrate(view_num, look_at_pt, top_down_dist, top_down_direrction, radius): Performs the full calibration process by capturing frames and computing the hand-eye calibration matrix. """ def __init__(self, dist=None, marker_type="aruco", marker_info={"id": 0, "size": 0.1}, camera_type="RealSense"): self.marker_to_cam = [] self.end_effector_to_base = [] self.marker_type = marker_type self.marker_info = marker_info ControlUtil.connect_robot() ControlUtil.franka_reset() if camera_type == "RealSense": self.camera = RealSenseCamera() elif camera_type == "Inspire": self.camera = InspireCamera() else: assert False, "Not implemented yet" self.intrinsics = self.camera.get_color_intrinsics() self.dist = dist self.marker_type = marker_type if self.marker_type == "aruco": self.marker_solver = ArcuoMarkerSolver(self.intrinsics, dist, marker_info) self.camera_type = camera_type def get_marker_to_cam_pose(self, color_image, visualize=False): res = self.marker_solver.get_marker_to_cam_pose(color_image, visualize) return res def capture_single_frame(self): color_image = self.camera.get_color_image() end_effector_pose = ControlUtil.get_curr_gripper_to_base_pose() return color_image, end_effector_pose def capture_frames(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius, visualize=False): circle_num = len(radius) poses = [] for i in range(circle_num): reverse_order = i % 2 == 1 pose_generator = EndEffectorPoseGenerator(view_num, look_at_pt, top_down_dist[i], top_down_direrction, radius[i], reverse_order) poses += [pose @ DEFAULT_EE_TO_BASE for pose in pose_generator.poses] if visualize == True: pose_visualizer = PoseVisualizer(poses) pose_visualizer.visualize() pbar = tqdm(total=len(poses), desc="Capturing frames") for pose in poses: pbar.update(1) ControlUtil.set_gripper_pose(pose) color_image, end_effector_pose = self.capture_single_frame() if color_image is None: print("Failed to capture the frame") continue res = self.get_marker_to_cam_pose(color_image, visualize=True) if res is None: print("Failed to get marker pose") continue else: marker_to_cam_pose = np.eye(4) rvec = res["rvec"] tvec = res["tvec"] rot_mat = cv2.Rodrigues(rvec)[0] marker_to_cam_pose[:3, :3] = rot_mat marker_to_cam_pose[:3, 3] = tvec self.marker_to_cam.append(marker_to_cam_pose) self.end_effector_to_base.append(end_effector_pose) def get_hand_eye(self, marker_to_cam_poses, end_effector_to_base_poses): ############################## Debug ################################ # with open("marker_to_cam_poses.pkl", "wb") as f: # pickle.dump(marker_to_cam_poses, f) # with open("end_effector_to_base_poses.pkl", "wb") as f: # pickle.dump(end_effector_to_base_poses, f) # with open("marker_to_cam_poses.pkl", "rb") as f: # marker_to_cam_poses = pickle.load(f) # with open("end_effector_to_base_poses.pkl", "rb") as f: # end_effector_to_base_poses = pickle.load(f) ##################################################################### R_marker_to_cam = [] t_marker_to_cam = [] R_end_effector_to_base = [] t_end_effector_to_base = [] for index, marker_to_cam_pose in enumerate(marker_to_cam_poses): if marker_to_cam_pose is None: continue R_marker_to_cam.append(marker_to_cam_pose[:3, :3]) t_marker_to_cam.append(marker_to_cam_pose[:3, 3]) R_end_effector_to_base.append(end_effector_to_base_poses[index][:3, :3]) t_end_effector_to_base.append(end_effector_to_base_poses[index][:3, 3]) if len(R_marker_to_cam) < 3: print("Not enough data to calibrate") return None R_marker_to_cam = np.array(R_marker_to_cam) t_marker_to_cam = np.array(t_marker_to_cam) R_end_effector_to_base = np.array(R_end_effector_to_base) t_end_effector_to_base = np.array(t_end_effector_to_base) from ipdb import set_trace; set_trace() hand_eye = cv2.calibrateHandEye(R_end_effector_to_base, t_end_effector_to_base, R_marker_to_cam, t_marker_to_cam, cv2.CALIB_HAND_EYE_TSAI) cv2.destroyAllWindows() return hand_eye def calibrate(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius): self.capture_frames(view_num, look_at_pt, top_down_dist, top_down_direrction, radius) marker_to_cam = np.array(self.marker_to_cam) end_effector_to_base = np.array(self.end_effector_to_base) hand_eye = self.get_hand_eye(marker_to_cam, end_effector_to_base) # hand_eye = self.get_hand_eye(None, None) return hand_eye class ArcuoMarkerSolver: def __init__(self, intrinsics, dist=None, marker_info={"id": 0, "size": 0.1}): self.intrinsics = intrinsics self.dist = dist self.marker_info = marker_info def solve_arcuo_marker_pose(self, color_image): self.res = [] aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_ARUCO_ORIGINAL) aruco_params = cv2.aruco.DetectorParameters() cv2.imshow("color_image", color_image) cv2.waitKey(500) corners, ids, rejectedImgPoints = cv2.aruco.detectMarkers(color_image, aruco_dict, parameters=aruco_params) if ids is None: return None for i in range(len(ids)): rvec, tvec, _ = cv2.aruco.estimatePoseSingleMarkers(corners[i], self.marker_info["size"], self.intrinsics, self.dist) self.res.append({ 'rvec': rvec, 'tvec': tvec, 'corners': corners[i], 'ids': ids[i] }) for res in self.res: if res["ids"][0] == self.marker_info["id"]: return res return None def get_marker_to_cam_pose(self, color_image, visualize=False): res = self.solve_arcuo_marker_pose(color_image) if visualize and res is not None: self.visualize_res(color_image, res) return res def visualize_res(self, color_image, res): rvec = res["rvec"] tvec = res["tvec"] corners = res["corners"] ids = res["ids"] aruco_frame = cv2.drawFrameAxes(color_image, self.intrinsics, self.dist, rvec, tvec, 0.05) aruco_frame = cv2.aruco.drawDetectedMarkers(aruco_frame, (corners, ), ids) cv2.imshow("aruco_frame", aruco_frame) cv2.waitKey(500) class EndEffectorPoseGenerator: def __init__(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius, reverse_order=False): self.view_num = view_num self.look_at_pt = look_at_pt self.top_down_dist = top_down_dist self.top_down_direrction = top_down_direrction self.radius = radius self.poses = self.generate_poses(reverse_order) def generate_poses(self, reverse_order=False): poses = [] for i in range(self.view_num): order = i if not reverse_order else self.view_num - i - 1 pose = self.get_pose(order) poses.append(pose) return poses def get_pose(self, i): angle = 0.8 * np.pi * i / self.view_num + 0.7 * np.pi start_point_x = self.look_at_pt[0] + self.radius * np.cos(angle) start_point_y = self.look_at_pt[1] + self.radius * np.sin(angle) start_point_z = self.look_at_pt[2] + self.top_down_dist look_at_vector = self.look_at_pt - np.array([start_point_x, start_point_y, start_point_z]) look_at_vector = look_at_vector / np.linalg.norm(look_at_vector) up_vector = self.top_down_direrction noise = np.random.uniform(0, 0.1, up_vector.shape) right_vector = np.cross(look_at_vector, up_vector + noise) right_vector = right_vector / np.linalg.norm(right_vector) up_vector = np.cross(look_at_vector, right_vector) up_vector = up_vector / np.linalg.norm(up_vector) pose = np.eye(4) pose[:3, :3] = self.get_rotation(right_vector, up_vector, look_at_vector) pose[:3, 3] = np.array([start_point_x, start_point_y, start_point_z]) return pose def get_rotation(self, right_vector, up_vector, look_at_vector): rotation = np.eye(3) rotation[:, 0] = right_vector rotation[:, 1] = up_vector rotation[:, 2] = look_at_vector return rotation class PoseVisualizer: def __init__(self, poses): self.poses = poses def visualize(self): vis = o3d.visualization.Visualizer() vis.create_window() for pose in self.poses: mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.05) mesh_frame.transform(pose) vis.add_geometry(mesh_frame) print("Press q to close the window") vis.run() class CAMERA: def __init__(self): self.color_intrinsics = None self.depth_intrinsics = None self.color = None self.depth = None def get_color_intrinsics(self): pass def get_depth_intrinsics(self): pass def get_color_image(self): pass def get_depth_image(self): pass class RealSenseCamera(CAMERA): def __init__(self): super().__init__() self.pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) align = rs.align(rs.stream.color) self.pipeline.start(config) self.color_intrinsics = self.get_intrinsics("color") self.depth_intrinsics = self.get_intrinsics("depth") def get_color_intrinsics(self): return self.color_intrinsics def get_depth_intrinsics(self): return self.depth_intrinsics def get_color_image(self): frames = self.pipeline.wait_for_frames() color_frame = frames.get_color_frame() color_image = np.asanyarray(color_frame.get_data()) return color_image def get_depth_image(self): frames = self.pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() depth_image = np.asanyarray(depth_frame.get_data()) return depth_image def get_intrinsics(self, camra_type): if camra_type == "color": profile = self.pipeline.get_active_profile() color_profile = rs.video_stream_profile(profile.get_stream(rs.stream.color)) color_intrinsics = color_profile.get_intrinsics() K = [[color_intrinsics.fx, 0, color_intrinsics.ppx], [0, color_intrinsics.fy, color_intrinsics.ppy], [0, 0, 1]] return np.array(K) elif camra_type == "depth": profile = self.pipeline.get_active_profile() depth_profile = rs.video_stream_profile(profile.get_stream(rs.stream.depth)) depth_intrinsics = depth_profile.get_intrinsics() K = [[depth_intrinsics.fx, 0, depth_intrinsics.ppx], [0, depth_intrinsics.fy, depth_intrinsics.ppy], [0, 0, 1]] return np.array(K) class InspireCamera(CAMERA): def __init__(self): super().__init__() ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE) view_data = CommunicateUtil.get_view_data(init=True) self.color_intrinsics = self.get_intrinsics(view_data["color_intrinsics"], view_data["color_image"]) self.depth_intrinsics = self.get_intrinsics(view_data["depth_intrinsics"], view_data["depth_image"]) def get_color_intrinsics(self): return self.color_intrinsics def get_depth_intrinsics(self): return self.depth_intrinsics def get_color_image(self): view_data = CommunicateUtil.get_view_data() if view_data is None: return None else: color_image = view_data["color_image"] return color_image def get_depth_image(self): view_data = CommunicateUtil.get_view_data() if view_data is None: return None else: depth_image = view_data["depth_image"] return depth_image def get_intrinsics(self, intrinsics, image): height = image.shape[0] width = image.shape[1] scale_h = height / intrinsics["height"] scale_w = width / intrinsics["width"] fx = intrinsics["fx"] * scale_w fy = intrinsics["fy"] * scale_h cx = intrinsics["cx"] * scale_w cy = intrinsics["cy"] * scale_h K = np.array([ [fx, 0, cx], [ 0, fy, cy], [ 0, 0, 1] ]) return K def example_view_generator(): pose_generator = EndEffectorPoseGenerator(VIEW_NUM, LOOK_AT_PT, TOP_DOWN_DIST, TOP_DOWN_DIRECTION, RADIUS) base_pose = np.eye(4) test_pose = np.array([ [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0.2], [0, 0, 0, 1] ]) defalut_ee2base = np.array([ [0, -1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1] ]) poses = [pose @ defalut_ee2base for pose in pose_generator.poses] + [base_pose, test_pose] visualizer = PoseVisualizer(poses) visualizer.visualize() def example_calibration(): # ControlUtil.connect_robot() # ControlUtil.franka_reset() # ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE) # view_data = CommunicateUtil.get_view_data(init=True) # intrinsics = get_intrinsics(view_data["color_intrinsics"], view_data["color_image"]) calibration = HandEyeCalibration( # intrinsics=intrinsics, dist=None, marker_type=MARKER_TYPE, marker_info=MARKER_INFO, camera_type="Inspire" ) hand_eye = calibration.calibrate( view_num=VIEW_NUM, look_at_pt=LOOK_AT_PT, top_down_dist=TOP_DOWN_DIST, top_down_direrction=TOP_DOWN_DIRECTION, radius=RADIUS ) print(hand_eye) res = { 'rotation': hand_eye[0].tolist(), 'translation': hand_eye[1].tolist() } with open("hand_eye.json", "w") as f: json.dump(res, f, indent=4) print("Hand eye calibration finished") return res def get_depth_to_end_effector_pose(calibration_res_path): with open(calibration_res_path, "r") as f: data = json.load(f) rotation = np.array(data["rotation"]) translation = np.array(data["translation"]) color_to_ee = np.eye(4) color_to_ee[:3, :3] = rotation color_to_ee[:3, 3:] = translation ControlUtil.connect_robot() ControlUtil.franka_reset() ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE) view_data = CommunicateUtil.get_view_data(init=True) depth_to_color = np.array(view_data["depth_to_color"]) depth_to_ee = color_to_ee @ depth_to_color print(depth_to_ee) with open("depth_to_ee.json", "w") as f: json.dump(depth_to_ee.tolist(), f, indent=4) if __name__ == "__main__": res = example_calibration() get_depth_to_end_effector_pose("hand_eye.json") # example_view_generator()