success

2024-10-09 16:13:22 +00:00
commit 0ea3f048dc
437 changed files with 44406 additions and 0 deletions
--- a/modules/rgb_encoder/init.py
+++ b/modules/rgb_encoder/init.py
--- a/modules/rgb_encoder/abstract_rgb_encoder.py
+++ b/modules/rgb_encoder/abstract_rgb_encoder.py
@@ -0,0 +1,51 @@
+from abc import abstractmethod
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+import torch
+from torch import nn
+import numpy as np
+
+
+class RGBEncoder(nn.Module):
+    def __init__(self):
+        super(RGBEncoder, self).__init__()
+
+    @abstractmethod
+    def encode_rgb(self, rgb):
+        pass
+
+    @staticmethod
+    def visualize_features(features, save_path=None):
+        patch,feat_dim = features.shape
+        patch_h = int(patch ** 0.5)
+        patch_w = patch_h
+        total_features = features.reshape(patch_h * patch_w, feat_dim)
+        pca = PCA(n_components=3)
+        if isinstance(total_features, torch.Tensor):
+            total_features = total_features.cpu().numpy()
+        pca.fit(total_features)
+        pca_features = pca.transform(total_features)
+        pca_features[:, 0] = (pca_features[:, 0] - pca_features[:, 0].min()) / \
+                     (pca_features[:, 0].max() - pca_features[:, 0].min())
+        plt.subplot(1, 3, 1)
+        plt.imshow(pca_features[:,0].reshape(patch_h, patch_w))
+        pca_features_bg = pca_features[:, 0] > 0.5 # from first histogram
+        pca_features_fg = np.ones_like(pca_features_bg)
+        plt.subplot(1, 3, 2)
+        plt.imshow(pca_features_bg.reshape(patch_h, patch_w))
+        pca.fit(total_features[pca_features_fg]) 
+        pca_features_left = pca.transform(total_features[pca_features_fg])
+        for i in range(3):
+            pca_features_left[:, i] = (pca_features_left[:, i] - pca_features_left[:, i].min()) / (pca_features_left[:, i].max() - pca_features_left[:, i].min())
+
+        pca_features_rgb = pca_features.copy()
+        pca_features_rgb[pca_features_bg] = 0
+        pca_features_rgb[pca_features_fg] = pca_features_left
+        pca_features_rgb = pca_features_rgb.reshape(1, patch_h, patch_w, 3)
+        
+        plt.subplot(1, 3, 3)
+        if save_path:
+            plt.imsave(save_path, pca_features_rgb[0])
+        else:
+            plt.imshow(pca_features_rgb[0])
+            plt.show()
--- a/modules/rgb_encoder/dinov2_encoder.py
+++ b/modules/rgb_encoder/dinov2_encoder.py
@@ -0,0 +1,20 @@
+
+import torch
+from modules.rgb_encoder.abstract_rgb_encoder import RGBEncoder
+from annotations.external_module import external_freeze
+
+@external_freeze
+class Dinov2Encoder(RGBEncoder):
+    def __init__(self, model_name):
+        super(Dinov2Encoder, self).__init__()   
+        self.model_name = model_name 
+        self.load()
+        
+    def load(self):
+        self.dinov2 = torch.hub.load('modules/module_lib/dinov2', self.model_name, source='local').cuda()
+
+    def encode_rgb(self, rgb):
+        with torch.no_grad():
+            features_dict = self.dinov2.forward_features(rgb)
+            features = features_dict['x_norm_patchtokens']
+        return features
--- a/modules/rgb_encoder/rgb_encoder_factory.py
+++ b/modules/rgb_encoder/rgb_encoder_factory.py
@@ -0,0 +1,59 @@
+import sys
+import os
+path = os.path.abspath(__file__)
+for i in range(3):
+    path = os.path.dirname(path)
+PROJECT_ROOT = path
+sys.path.append(PROJECT_ROOT)
+
+from modules.rgb_encoder.abstract_rgb_encoder import RGBEncoder
+from modules.rgb_encoder.dinov2_encoder import Dinov2Encoder
+
+
+class RGBEncoderFactory:
+    @staticmethod
+    def create(name, config) -> RGBEncoder:
+        general_config = config["general"]
+        rgb_encoder_config = config["rgb_encoder"][name]
+        if name == "dinov2":
+            return Dinov2Encoder(
+                model_name=rgb_encoder_config["model_name"]
+            )
+        else:
+            raise ValueError(f"Unknown encoder name: {name}")
+
+
+''' ------------ Debug ------------ '''
+if __name__ == "__main__":
+    from configs.config import ConfigManager
+    import torch
+    from PIL import Image
+    import cv2
+    from torchvision import transforms
+    ConfigManager.load_config_with('configs/local_train_config.yaml')
+    ConfigManager.print_config()
+    image_size = 480
+    path = "/mnt/h/BaiduSyncdisk/workspace/ws_active_pose/project/ActivePerception/test/img0.jpg"
+    img = cv2.imread(path)
+    img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+    
+    transform = transforms.Compose([           
+                        transforms.Resize(image_size),
+                        transforms.CenterCrop(int(image_size//14)*14),              
+                        transforms.ToTensor(),                    
+                        transforms.Normalize(mean=0.5, std=0.2)
+                        ])
+    
+    rgb = transform(img)
+    print(rgb.shape)
+    rgb_encoder = RGBEncoderFactory.create(name="dinov2", config=ConfigManager.get("modules"))
+    rgb_encoder.load()
+    print(rgb_encoder)
+    rgb = rgb.to("cuda:0")
+    rgb = rgb.unsqueeze(0)
+    rgb_encoder = rgb_encoder.to("cuda:0")
+    
+    rgb_feat = rgb_encoder.encode_rgb(rgb)
+
+    print(rgb_feat.shape)
+    rgb_encoder.visualize_features(rgb_feat[0])