update

2024-10-19 19:06:09 +08:00
parent 5dae3c53db
commit be7ec1a433
4 changed files with 71 additions and 42 deletions
--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -4,11 +4,37 @@ import json
 import cv2
 import trimesh
 import torch
+import OpenEXR
+import Imath
 from utils.pts import PtsUtil


 class DataLoadUtil:
    TABLE_POSITION = np.asarray([0, 0, 0.8215])
+    
+    @staticmethod
+    def load_exr_image(file_path):
+        # 打开 EXR 文件
+        exr_file = OpenEXR.InputFile(file_path)
+        
+        # 获取 EXR 文件的头部信息，包括尺寸
+        header = exr_file.header()
+        dw = header['dataWindow']
+        width = dw.max.x - dw.min.x + 1
+        height = dw.max.y - dw.min.y + 1
+
+        # 定义通道，通常法线图像是 RGB
+        float_channels = ['R', 'G', 'B']
+
+        # 读取 EXR 文件中的每个通道并转化为浮点数数组
+        img_data = []
+        for channel in float_channels:
+            channel_data = exr_file.channel(channel, Imath.PixelType(Imath.PixelType.FLOAT))
+            img_data.append(np.frombuffer(channel_data, dtype=np.float32).reshape((height, width)))
+
+        # 将各通道组合成一个 (height, width, 3) 的 RGB 图像
+        img = np.stack(img_data, axis=-1)
+        return img

    @staticmethod
    def get_display_table_info(root, scene_name):
@@ -148,34 +174,31 @@ class DataLoadUtil:
            return mask_image
        
    @staticmethod
-    def load_normal(path, binocular=False, left_only=False):
+    def load_normal(path, binocular=False, left_only=False, file_type="exr"):
        if binocular and not left_only:
            normal_path_L = os.path.join(
-                os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
            )
-            normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
-            normal_image_L = cv2.cvtColor(normal_image_L, cv2.COLOR_BGR2RGB)
-            normal_path_R = os.path.join(
-                os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
-            )
-            normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
-            normal_image_R = cv2.cvtColor(normal_image_R, cv2.COLOR_BGR2RGB)
+            normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
            
-            normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
-            normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
+            normal_path_R = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
+            )
+            normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
+            normalized_normal_image_L = normal_image_L * 2.0 - 1.0
+            normalized_normal_image_R = normal_image_R * 2.0 - 1.0
            return normalized_normal_image_L, normalized_normal_image_R
        else:
            if binocular and left_only:
                normal_path = os.path.join(
-                    os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+                    os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
                )
            else:
                normal_path = os.path.join(
-                    os.path.dirname(path), "normal", os.path.basename(path) + ".png"
+                    os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
                )
-            normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
-            normal_image = cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB)
-            normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
+            normal_image = DataLoadUtil.load_exr_image(normal_path)
+            normalized_normal_image = normal_image * 2.0 - 1.0
            return normalized_normal_image

    @staticmethod
@@ -213,11 +236,12 @@ class DataLoadUtil:
            label_data = json.load(f)
        cam_to_world = np.asarray(label_data["extrinsic"])
        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
-        world_to_display_table = np.eye(4)
-        world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
-            root_dir, scene_name
-        )
+        
        if display_table_as_world_space_origin:
+            world_to_display_table = np.eye(4)
+            world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
+                root_dir, scene_name
+            )
            cam_to_world = np.dot(world_to_display_table, cam_to_world)
        cam_intrinsic = np.asarray(label_data["intrinsic"])
        cam_info = {