update

2025-04-20 10:26:09 +08:00
parent 8539ce0b9c
commit 5c96e3979f
12 changed files with 1810 additions and 0 deletions
--- a/ref_code/active_reconstruction.py
+++ b/ref_code/active_reconstruction.py
@@ -0,0 +1,520 @@
+import torch
+import numpy as np
+import os
+import yaml
+import time
+from nerf_model import NeRF
+from pipeline import ActiveReconstructionPolicy
+from uncertainty_guide import UncertaintyGuideNeRF
+import argparse
+from typing import Dict, Any, List
+from utils.volume_render_util import VolumeRendererUtil
+import mcubes  # 导入Python Marching Cubes库
+import trimesh  # 处理网格
+from tqdm import tqdm  # 进度条
+
+class ActiveReconstruction:
+    """基于NeRF不确定性引导的主动3D重建系统"""
+    
+    def __init__(self, config_path: str):
+        """
+        初始化主动重建系统
+        
+        参数:
+            config_path: 配置文件路径
+        """
+        # 加载配置
+        with open(config_path, 'r') as f:
+            self.config = yaml.safe_load(f)
+            
+        # 设置设备
+        self.device = torch.device(self.config.get("device", "cuda") if torch.cuda.is_available() else "cpu")
+        print(f"使用设备: {self.device}")
+        
+        # 创建输出目录
+        self.output_dir = self.config.get("output_dir", "output")
+        os.makedirs(self.output_dir, exist_ok=True)
+        
+        # 初始化NeRF模型
+        self._init_nerf_model()
+        
+        # 初始化视图选择策略
+        self.policy = ActiveReconstructionPolicy(self.config)
+    
+    def _init_nerf_model(self):
+        """初始化NeRF模型"""
+        # 从配置中获取NeRF参数
+        nerf_config = self.config.get("nerf", {})
+        model_config = {
+            "pos_enc_dim": nerf_config.get("pos_enc_dim", 10),
+            "dir_enc_dim": nerf_config.get("dir_enc_dim", 4),
+            "netdepth_coarse": nerf_config.get("netdepth_coarse", 8),
+            "netwidth_coarse": nerf_config.get("netwidth_coarse", 256),
+            "netdepth_fine": nerf_config.get("netdepth_fine", 8),
+            "netwidth_fine": nerf_config.get("netwidth_fine", 256),
+            "skips": nerf_config.get("skips", [4]),
+            "use_viewdirs": nerf_config.get("use_viewdirs", True)
+        }
+        self.nerf_model = NeRF(model_config).to(self.device)
+    
+    def _generate_rays(self, 
+                      poses: torch.Tensor, 
+                      H: int, 
+                      W: int, 
+                      focal: float) -> tuple:
+        """
+        为每个相机位姿生成光线
+        
+        参数:
+            poses: 相机位姿 [N, 4, 4]
+            H: 图像高度
+            W: 图像宽度
+            focal: 焦距
+            
+        返回:
+            rays_o: 光线起点 [N, H*W, 3]
+            rays_d: 光线方向 [N, H*W, 3]
+        """
+        # 创建像素坐标网格
+        i, j = torch.meshgrid(
+            torch.linspace(0, W-1, W),
+            torch.linspace(0, H-1, H),
+            indexing='ij'
+        )
+        i = i.t()  # [H, W]
+        j = j.t()  # [H, W]
+        
+        # 转换为相机坐标系中的方向
+        dirs = torch.stack([
+            (i - W * 0.5) / focal,
+            -(j - H * 0.5) / focal,
+            -torch.ones_like(i)
+        ], dim=-1)  # [H, W, 3]
+        
+        # 为每个位姿生成光线
+        rays_o_list = []
+        rays_d_list = []
+        
+        for pose in poses:
+            # 转换光线方向到世界坐标系
+            rays_d = torch.sum(dirs[..., None, :] * pose[:3, :3], dim=-1)  # [H, W, 3]
+            
+            # 设置光线原点
+            rays_o = pose[:3, -1].expand(rays_d.shape)  # [H, W, 3]
+            
+            # 展平为批处理格式
+            rays_o = rays_o.reshape(-1, 3)  # [H*W, 3]
+            rays_d = rays_d.reshape(-1, 3)  # [H*W, 3]
+            
+            rays_o_list.append(rays_o)
+            rays_d_list.append(rays_d)
+        
+        # 组合所有位姿的光线
+        rays_o_all = torch.stack(rays_o_list, dim=0)  # [N, H*W, 3]
+        rays_d_all = torch.stack(rays_d_list, dim=0)  # [N, H*W, 3]
+        
+        return rays_o_all, rays_d_all
+    
+    def _sample_pixel_batch(self, 
+                           images: torch.Tensor, 
+                           rays_o: torch.Tensor, 
+                           rays_d: torch.Tensor, 
+                           batch_size: int) -> tuple:
+        """
+        随机采样像素批次
+        
+        参数:
+            images: 图像数据 [N, H, W, 3]
+            rays_o: 光线起点 [N, H*W, 3]
+            rays_d: 光线方向 [N, H*W, 3]
+            batch_size: 批次大小
+            
+        返回:
+            sampled_rays_o: 采样的光线起点 [batch_size, 3]
+            sampled_rays_d: 采样的光线方向 [batch_size, 3]
+            sampled_pixels: 采样的像素值 [batch_size, 3]
+        """
+        # 获取图像形状
+        N = images.shape[0]
+        H = images.shape[1]
+        W = images.shape[2]
+        total_rays = N * H * W
+        
+        # 将图像展平
+        pixels = images.reshape(N, -1, 3)  # [N, H*W, 3]
+        
+        # 随机选择批次
+        indices = torch.randint(0, total_rays, size=(batch_size,))
+        img_indices = indices // (H * W)
+        pixel_indices = indices % (H * W)
+        
+        # 采样光线和像素
+        sampled_rays_o = torch.stack([rays_o[i, j] for i, j in zip(img_indices, pixel_indices)])
+        sampled_rays_d = torch.stack([rays_d[i, j] for i, j in zip(img_indices, pixel_indices)])
+        sampled_pixels = torch.stack([pixels[i, j] for i, j in zip(img_indices, pixel_indices)])
+        
+        return sampled_rays_o, sampled_rays_d, sampled_pixels
+    
+    def train_nerf(self, 
+                  images: torch.Tensor, 
+                  poses: torch.Tensor, 
+                  epochs: int = 5000,
+                  batch_size: int = 4096,
+                  lr: float = 5e-4,
+                  start_from_model=None) -> float:
+        """
+        训练NeRF模型
+        
+        参数:
+            images: 图像数据 [N, H, W, 3]
+            poses: 相机位姿 [N, 4, 4]
+            epochs: 训练轮数
+            batch_size: 批量大小
+            lr: 学习率
+            start_from_model: 可选的初始模型状态
+            
+        返回:
+            final_loss: 最终损失值
+        """
+        print(f"开始训练NeRF模型，使用{len(images)}张图像...")
+        
+        # 获取图像和采样参数
+        H, W = images.shape[1], images.shape[2]
+        sampling_config = self.config.get("sampling", {})
+        camera_config = self.config.get("camera", {})
+        focal = camera_config.get("focal", 1000.0)
+        near = camera_config.get("near", 2.0)
+        far = camera_config.get("far", 6.0)
+        coarse_samples = sampling_config.get("coarse_samples", 64)
+        fine_samples = sampling_config.get("fine_samples", 128)
+        perturb = sampling_config.get("perturb", True)
+        
+        # 如果提供了初始模型，使用它
+        if start_from_model is not None:
+            print("从现有模型初始化权重")
+            self.nerf_model.load_state_dict(start_from_model.state_dict())
+        
+        # 设置优化器和损失函数
+        optimizer = torch.optim.Adam(self.nerf_model.parameters(), lr=lr)
+        mse_loss = torch.nn.MSELoss()
+        
+        # 将模型设置为训练模式
+        self.nerf_model.train()
+        
+        # 为所有图像生成光线（预计算光线可以加速训练）
+        rays_o, rays_d = self._generate_rays(poses, H, W, focal)
+        rays_o = rays_o.to(self.device)
+        rays_d = rays_d.to(self.device)
+        images = images.to(self.device)
+        
+        # 训练循环
+        best_loss = float('inf')
+        for epoch in range(epochs):
+            # 随机采样一批光线
+            batch_rays_o, batch_rays_d, target_pixels = self._sample_pixel_batch(
+                images, rays_o, rays_d, batch_size)
+            
+            # 光线方向归一化
+            batch_rays_d = torch.nn.functional.normalize(batch_rays_d, dim=-1)
+            
+            # 创建近平面和远平面张量
+            near_tensor = torch.ones_like(batch_rays_o[..., 0]) * near
+            far_tensor = torch.ones_like(batch_rays_o[..., 0]) * far
+            
+            # 使用体积渲染进行前向传播
+            # 首先进行粗采样渲染
+            optimizer.zero_grad()
+            
+            # 体积渲染
+            rgb_map, _, _, _ = VolumeRendererUtil.render_rays(
+                self.nerf_model,
+                batch_rays_o,
+                batch_rays_d,
+                near_tensor,
+                far_tensor,
+                coarse_samples,
+                fine_samples,
+                perturb
+            )
+            
+            # 计算损失并反向传播
+            loss = mse_loss(rgb_map, target_pixels)
+            loss.backward()
+            optimizer.step()
+            
+            # 输出训练进度
+            if (epoch + 1) % 100 == 0:
+                psnr = -10.0 * torch.log10(loss)
+                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.6f}, PSNR: {psnr.item():.2f}")
+            
+            # 保存最佳模型
+            if loss.item() < best_loss:
+                best_loss = loss.item()
+                torch.save(self.nerf_model.state_dict(), os.path.join(self.output_dir, "best_model.pth"))
+        
+        # 加载最佳模型
+        self.nerf_model.load_state_dict(torch.load(os.path.join(self.output_dir, "best_model.pth")))
+        
+        print(f"NeRF模型训练完成，最终损失: {best_loss:.6f}")
+        return best_loss
+    
+    def extract_mesh(self, output_path: str, resolution: int = 256, threshold: float = 50.0, bound: float = 2.0):
+        """
+        从NeRF模型中提取3D网格，使用Marching Cubes算法
+        
+        参数:
+            output_path: 输出路径
+            resolution: 体素网格分辨率
+            threshold: 密度阈值，用于确定表面位置
+            bound: 体素网格边界大小
+        """
+        print(f"从NeRF提取3D网格，分辨率: {resolution}...")
+        
+        # 设置网格提取参数
+        self.nerf_model.eval()  # 设置为评估模式
+        
+        # 定义采样网格
+        x = torch.linspace(-bound, bound, resolution)
+        y = torch.linspace(-bound, bound, resolution)
+        z = torch.linspace(-bound, bound, resolution)
+        
+        # 创建采样点坐标网格
+        xx, yy, zz = torch.meshgrid(x, y, z, indexing='ij')
+        
+        # 准备查询点
+        points = torch.stack([xx, yy, zz], dim=-1).reshape(-1, 3).to(self.device)
+        
+        # 创建密度场
+        print("正在计算体积密度场...")
+        density_field = torch.zeros((resolution, resolution, resolution))
+        
+        # 分批处理以避免显存溢出
+        batch_size = 4096  # 根据GPU内存调整
+        with torch.no_grad():
+            for i in tqdm(range(0, points.shape[0], batch_size)):
+                # 获取当前批次的点
+                batch_points = points[i:i+batch_size]
+                
+                # 计算密度 - 使用固定方向（这里使用+z方向）
+                # 注意：在NeRF中，密度不依赖于视角方向，只有颜色依赖视角
+                fixed_dirs = torch.zeros_like(batch_points)
+                fixed_dirs[..., 2] = 1.0  # 设置为+z方向
+                
+                # 使用fine网络进行推理
+                sigma, _ = self.nerf_model(batch_points, fixed_dirs, coarse=False)
+                
+                # 更新密度场
+                batch_indices = torch.arange(i, min(i+batch_size, points.shape[0]))
+                xyz_indices = torch.stack([
+                    (points[batch_indices, 0] + bound) / (2 * bound) * (resolution - 1),
+                    (points[batch_indices, 1] + bound) / (2 * bound) * (resolution - 1),
+                    (points[batch_indices, 2] + bound) / (2 * bound) * (resolution - 1)
+                ], dim=-1).long()
+                
+                for j, (xi, yi, zi) in enumerate(xyz_indices):
+                    density_field[xi, yi, zi] = sigma[j].cpu()
+        
+        # 使用Marching Cubes提取网格
+        print("使用Marching Cubes提取网格...")
+        density_field_np = density_field.cpu().numpy()
+        vertices, triangles = mcubes.marching_cubes(density_field_np, threshold)
+        
+        # 转换为正确的坐标系（视场的[-bound, bound]范围）
+        vertices = vertices / (resolution - 1) * (2 * bound) - bound
+        
+        # 创建trimesh对象
+        mesh = trimesh.Trimesh(vertices=vertices, faces=triangles)
+        
+        # 保存网格
+        mesh.export(output_path)
+        
+        print(f"网格提取完成，保存至: {output_path}")
+        print(f"网格统计: {len(vertices)}个顶点, {len(triangles)}个三角面")
+        
+        return mesh
+    
+    def evaluate_reconstruction(self, 
+                               gt_mesh_path: str = None) -> Dict[str, float]:
+        """
+        评估重建质量
+        
+        参数:
+            gt_mesh_path: 真实网格路径（如果有）
+            
+        返回:
+            metrics: 评估指标，如F-score
+        """
+        if gt_mesh_path is None:
+            print("没有提供真实网格，跳过评估")
+            return {}
+        
+        print("评估重建质量...")
+        
+        # 在实际实现中，这里应该有评估重建质量的代码
+        # 通常使用F-score、Chamfer距离等指标
+        
+        # 为了简化，我们返回模拟的指标
+        metrics = {
+            "f_score": 0.85,
+            "precision": 0.87,
+            "recall": 0.83
+        }
+        
+        print(f"评估结果: F-score={metrics['f_score']:.4f}, "
+              f"精确率={metrics['precision']:.4f}, 召回率={metrics['recall']:.4f}")
+        
+        return metrics
+    
+    def run_active_reconstruction(self, 
+                                initial_poses: np.ndarray,
+                                initial_images: torch.Tensor = None,
+                                max_iterations: int = 3) -> List[np.ndarray]:
+        """
+        运行主动重建过程
+        
+        参数:
+            initial_poses: 初始相机位姿
+            initial_images: 初始图像（如果有）
+            max_iterations: 最大迭代次数
+            
+        返回:
+            selected_poses: 所有选定的相机位姿
+        """
+        print("开始主动重建过程...")
+        
+        # 初始训练，使用初始视图
+        if initial_images is None:
+            initial_images = self._simulate_image_capture(initial_poses)
+        
+        # 使用初始图像训练模型
+        self.train_nerf(
+            initial_images, 
+            torch.from_numpy(initial_poses).float().to(self.device),
+            epochs=self.config.get("reconstruction", {}).get("epochs_per_iteration", 2000)
+        )
+        
+        # 保存初始模型
+        initial_model_path = os.path.join(self.output_dir, "initial_model.pth")
+        torch.save(self.nerf_model.state_dict(), initial_model_path)
+        initial_model = self.nerf_model.state_dict()
+        
+        all_poses = initial_poses.copy()
+        current_poses = initial_poses.copy()
+        all_images = initial_images.clone()
+        
+        # 提取初始网格
+        initial_mesh_path = os.path.join(self.output_dir, "initial_mesh.obj")
+        self.extract_mesh(
+            initial_mesh_path,
+            resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
+        )
+        
+        # 迭代执行主动重建
+        for iteration in range(max_iterations):
+            print(f"\n开始迭代 {iteration+1}/{max_iterations}")
+            
+            # 选择下一批视角
+            next_views = self.policy.select_next_views(self.nerf_model, current_poses)
+            print(f"选择了 {len(next_views)} 个新视角")
+            
+            # 采集新视角的图像
+            new_images = self._simulate_image_capture(next_views)
+            
+            # 将新选择的视角添加到当前位姿和图像中
+            current_poses = np.concatenate([current_poses, next_views], axis=0)
+            all_poses = np.concatenate([all_poses, next_views], axis=0)
+            all_images = torch.cat([all_images, new_images], dim=0)
+            
+            # 按照作者的描述，我们从初始模型重新初始化，而不是继续训练
+            # "After selecting additional images, we initialize the network with the model from the initialization step and refine the model further with the updated training set."
+            # 因此，我们先加载初始模型，然后用扩展的数据集重新训练
+            self.nerf_model.load_state_dict(torch.load(initial_model_path))
+            
+            # 用扩展的数据集重新训练模型
+            self.train_nerf(
+                all_images, 
+                torch.from_numpy(current_poses).float().to(self.device),
+                epochs=self.config.get("reconstruction", {}).get("epochs_per_iteration", 2000)
+            )
+            
+            # 每次迭代后提取网格，以便观察重建质量的改进
+            iter_mesh_path = os.path.join(self.output_dir, f"mesh_iter_{iteration+1}.obj")
+            self.extract_mesh(
+                iter_mesh_path,
+                resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
+            )
+        
+        # 提取最终的3D网格
+        output_mesh_path = os.path.join(self.output_dir, "final_mesh.obj")
+        self.extract_mesh(
+            output_mesh_path,
+            resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
+        )
+        
+        # 评估重建质量
+        self.evaluate_reconstruction()
+        
+        print("主动重建过程完成")
+        return all_poses
+    
+    def _simulate_image_capture(self, poses: np.ndarray) -> torch.Tensor:
+        """
+        模拟图像采集过程（实际系统中应该从相机或数据集获取）
+        
+        参数:
+            poses: 相机位姿
+            
+        返回:
+            images: 模拟的图像
+        """
+        # 模拟图像大小
+        camera_config = self.config.get("camera", {})
+        H, W = camera_config.get("height", 800), camera_config.get("width", 800)
+        
+        # 创建随机图像（实际应来自相机或渲染）
+        images = torch.rand(len(poses), H, W, 3, device=self.device)
+        
+        return images
+
+def main():
+    parser = argparse.ArgumentParser(description="基于NeRF不确定性的主动3D重建")
+    parser.add_argument("--config", type=str, default="nbv_config.yaml", help="配置文件路径")
+    parser.add_argument("--synthetic", action="store_true", help="使用合成数据集")
+    args = parser.parse_args()
+    
+    # 创建主动重建系统
+    reconstruction = ActiveReconstruction(args.config)
+    
+    # 初始化一些相机位姿（通常来自中心圆环）
+    # 根据配置获取初始位姿数量
+    config = yaml.safe_load(open(args.config, 'r'))
+    initial_view_count = config.get("reconstruction", {}).get("initial_view_count", 15)
+    
+    # 根据数据集类型调整初始视图数量
+    if args.synthetic:
+        initial_view_count = min(initial_view_count, 6)  # 合成数据使用6个初始视图
+        print(f"使用合成数据集，初始视图数量: {initial_view_count}")
+    else:
+        print(f"使用真实数据集，初始视图数量: {initial_view_count}")
+    
+    # 获取中间圆环上的相机位姿
+    # 假设poses是按圆环组织的，我们选择中间圆环的部分位姿
+    middle_circle_index = config.get("view_selection", {}).get("n_circles", 5) // 2
+    poses_per_circle = config.get("view_selection", {}).get("n_poses_per_circle", 30)
+    
+    # 等距选择初始位姿
+    start_index = middle_circle_index * poses_per_circle
+    step = poses_per_circle // initial_view_count
+    initial_pose_indices = [start_index + i * step for i in range(initial_view_count)]
+    initial_poses = reconstruction.policy.poses[initial_pose_indices]
+    
+    # 运行主动重建
+    selected_poses = reconstruction.run_active_reconstruction(
+        initial_poses, 
+        max_iterations=config.get("reconstruction", {}).get("max_iterations", 3)
+    )
+    
+    print(f"主动重建完成，共选择了{len(selected_poses)}个相机位姿")
+
+if __name__ == "__main__":
+    main() 
--- a/ref_code/nbv_config.yaml
+++ b/ref_code/nbv_config.yaml
@@ -0,0 +1,52 @@
+# 主动重建系统配置
+
+# 基本设置
+device: cuda  # 使用的设备: cuda 或 cpu
+output_dir: ./outputs/nbv_reconstruction  # 输出目录
+seed: 42  # 随机数种子
+
+# 数据设置
+data:
+  dataset_type: synthetic  # 数据集类型: synthetic 或 real
+  synthetic_dir: ./data/synthetic/  # 合成数据目录
+  real_dir: ./data/real/  # 真实数据目录
+
+# NeRF模型设置
+nerf:
+  pos_enc_dim: 10  # 位置编码维度
+  dir_enc_dim: 4   # 方向编码维度
+  hidden_dim: 256  # 隐藏层维度（兼容旧配置）
+  # 网络结构设置
+  netdepth_coarse: 8  # coarse网络深度
+  netwidth_coarse: 256  # coarse网络宽度
+  netdepth_fine: 8  # fine网络深度
+  netwidth_fine: 256  # fine网络宽度
+  skips: [4]  # 跳跃连接层
+  use_viewdirs: true  # 是否使用视角方向信息
+
+# 相机设置
+camera:
+  width: 800   # 图像宽度
+  height: 800  # 图像高度
+  focal: 1000.0  # 焦距
+  near: 2.0    # 近平面距离
+  far: 6.0     # 远平面距离
+
+# 采样设置
+sampling:
+  coarse_samples: 64   # 粗采样点数
+  fine_samples: 128    # 精细采样点数
+  perturb: True        # 是否添加噪声
+
+# 重建设置
+reconstruction:
+  max_iterations: 3    # 最大迭代次数
+  initial_view_count: 15  # 初始视图数量
+  epochs_per_iteration: 2000  # 每次迭代的训练轮数
+  mesh_resolution: 256  # 网格提取分辨率
+
+# 视图选择策略设置
+view_selection:
+  n_circles: 5  # 半球上的环数
+  n_poses_per_circle: 30  # 每个环上的位姿数
+  distance_threshold: 0.1  # 视图距离阈值 
--- a/ref_code/nerf_model.py
+++ b/ref_code/nerf_model.py
@@ -0,0 +1,182 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from PytorchBoot.stereotype import stereotype
+
+@stereotype.module("nerf")
+class NeRF(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+
+        # 读取位置和方向编码维度
+        pos_enc_out = 3 * (2 * config["pos_enc_dim"] + 1)
+        dir_enc_out = 3 * (2 * config["dir_enc_dim"] + 1)
+        
+        # 读取网络深度和宽度（可配置）
+        netdepth_coarse = config.get("netdepth_coarse", 8)
+        netwidth_coarse = config.get("netwidth_coarse", 256)
+        netdepth_fine = config.get("netdepth_fine", 8)
+        netwidth_fine = config.get("netwidth_fine", 256)
+        
+        # 构建跳跃连接
+        skips = config.get("skips", [4])
+        
+        # 是否使用视角方向
+        self.use_viewdirs = config.get("use_viewdirs", True)
+        
+        # 构建coarse和fine网络
+        if self.use_viewdirs:
+            # 位置编码 -> 密度 + 特征
+            self.pts_linears_coarse = self._build_pts_mlp(
+                input_dim=pos_enc_out, 
+                width=netwidth_coarse, 
+                depth=netdepth_coarse,
+                skips=skips
+            )
+            self.alpha_linear_coarse = nn.Linear(netwidth_coarse, 1)
+            self.feature_linear_coarse = nn.Linear(netwidth_coarse, netwidth_coarse)
+            
+            # 特征 + 方向编码 -> RGB
+            self.views_linears_coarse = nn.ModuleList([
+                nn.Linear(netwidth_coarse + dir_enc_out, netwidth_coarse//2)
+            ])
+            self.rgb_linear_coarse = nn.Linear(netwidth_coarse//2, 3)
+            
+            # 对fine网络执行相同的操作
+            self.pts_linears_fine = self._build_pts_mlp(
+                input_dim=pos_enc_out, 
+                width=netwidth_fine, 
+                depth=netdepth_fine,
+                skips=skips
+            )
+            self.alpha_linear_fine = nn.Linear(netwidth_fine, 1)
+            self.feature_linear_fine = nn.Linear(netwidth_fine, netwidth_fine)
+            
+            self.views_linears_fine = nn.ModuleList([
+                nn.Linear(netwidth_fine + dir_enc_out, netwidth_fine//2)
+            ])
+            self.rgb_linear_fine = nn.Linear(netwidth_fine//2, 3)
+        else:
+            # 不使用视角方向的简化版本
+            self.pts_linears_coarse = self._build_pts_mlp(
+                input_dim=pos_enc_out, 
+                width=netwidth_coarse, 
+                depth=netdepth_coarse,
+                skips=skips
+            )
+            self.output_linear_coarse = nn.Linear(netwidth_coarse, 4)
+            
+            self.pts_linears_fine = self._build_pts_mlp(
+                input_dim=pos_enc_out, 
+                width=netwidth_fine, 
+                depth=netdepth_fine,
+                skips=skips
+            )
+            self.output_linear_fine = nn.Linear(netwidth_fine, 4)
+    
+    def _build_pts_mlp(self, input_dim, width, depth, skips):
+        """构建处理位置编码的MLP网络，支持跳跃连接"""
+        layers = nn.ModuleList()
+        
+        # 第一层
+        layers.append(nn.Linear(input_dim, width))
+        
+        # 中间层
+        for i in range(1, depth):
+            if i in skips:
+                layers.append(nn.Linear(input_dim + width, width))
+            else:
+                layers.append(nn.Linear(width, width))
+                
+        return layers
+    
+    def positional_encoding(self, x, L):
+        """位置编码函数"""
+        encodings = [x]
+        for i in range(L):
+            encodings.append(torch.sin(2**i * x))
+            encodings.append(torch.cos(2**i * x))
+        return torch.cat(encodings, dim=-1)
+    
+    def forward_mlp(self, pts_embed, viewdirs_embed, is_coarse=True):
+        """前向传播MLP部分"""
+        if is_coarse:
+            pts_linears = self.pts_linears_coarse
+            alpha_linear = self.alpha_linear_coarse if self.use_viewdirs else None
+            feature_linear = self.feature_linear_coarse if self.use_viewdirs else None
+            views_linears = self.views_linears_coarse if self.use_viewdirs else None
+            rgb_linear = self.rgb_linear_coarse if self.use_viewdirs else None
+            output_linear = self.output_linear_coarse if not self.use_viewdirs else None
+        else:
+            pts_linears = self.pts_linears_fine
+            alpha_linear = self.alpha_linear_fine if self.use_viewdirs else None
+            feature_linear = self.feature_linear_fine if self.use_viewdirs else None
+            views_linears = self.views_linears_fine if self.use_viewdirs else None
+            rgb_linear = self.rgb_linear_fine if self.use_viewdirs else None
+            output_linear = self.output_linear_fine if not self.use_viewdirs else None
+            
+        # 位置编码处理
+        h = pts_embed
+        for i, l in enumerate(pts_linears):
+            h = pts_linears[i](h)
+            h = F.relu(h)
+            # 处理跳跃连接
+            if i in self.config.get("skips", [4]):
+                h = torch.cat([pts_embed, h], -1)
+                
+        if self.use_viewdirs:
+            # 分支1：计算sigma
+            sigma = alpha_linear(h)
+            
+            # 分支2：计算颜色特征
+            feature = feature_linear(h)
+            
+            # 结合方向编码
+            h = torch.cat([feature, viewdirs_embed], -1)
+            
+            # 视角相关MLP
+            for i, l in enumerate(views_linears):
+                h = l(h)
+                h = F.relu(h)
+                
+            # 输出RGB
+            rgb = rgb_linear(h)
+            rgb = torch.sigmoid(rgb)  # [0,1]范围
+            
+            outputs = torch.cat([rgb, sigma], -1)
+        else:
+            # 直接输出RGBA
+            outputs = output_linear(h)
+            rgb = torch.sigmoid(outputs[..., :3])  # [0,1]范围
+            sigma = outputs[..., 3:]
+            
+        return rgb, sigma
+    
+    def forward(self, pos, dir, coarse=True):
+        """
+        前向传播
+        
+        参数:
+            pos: 3D位置 [batch_size, ..., 3]
+            dir: 视角方向 [batch_size, ..., 3]
+            coarse: 是否使用coarse网络
+            
+        返回:
+            sigma: 体积密度 [batch_size, ..., 1]
+            color: RGB颜色 [batch_size, ..., 3]
+        """
+        # 位置和方向编码
+        pos_enc = self.positional_encoding(pos, self.config["pos_enc_dim"])
+        
+        # 当使用视角方向时才编码方向
+        if self.use_viewdirs:
+            dir_normalized = F.normalize(dir, dim=-1)
+            dir_enc = self.positional_encoding(dir_normalized, self.config["dir_enc_dim"])
+        else:
+            dir_enc = None
+            
+        # 选择使用coarse还是fine网络
+        color, sigma = self.forward_mlp(pos_enc, dir_enc, coarse)
+        
+        return sigma, color
--- a/ref_code/pipeline.py
+++ b/ref_code/pipeline.py
@@ -0,0 +1,126 @@
+import numpy as np
+import torch
+from scipy.spatial.transform import Rotation as R
+from uncertainty_guide import UncertaintyGuideNeRF
+
+class ActiveReconstructionPolicy:
+    def __init__(self, config):
+        self.config = config
+        self._setup_view_sphere()
+        self.uncertainty_guide = UncertaintyGuideNeRF(config)
+    
+    def _setup_view_sphere(self):
+        """初始化半球相机位姿 (5个圆环 x 30个位姿)"""
+        self.poses = []
+        radii = np.linspace(0.1, np.pi/2, self.config.n_circles)  # 半球上的半径
+        
+        for r in radii:
+            for theta in np.linspace(0, 2*np.pi, self.config.n_poses_per_circle, endpoint=False):
+                # 球坐标转笛卡尔坐标
+                x = np.cos(theta) * np.sin(r)
+                y = np.sin(theta) * np.sin(r)
+                z = np.cos(r)
+                position = np.array([x, y, z]) * 2.0  # 缩放因子
+                
+                # 相机朝向原点
+                forward = -position / np.linalg.norm(position)
+                up = np.array([0, 0, 1])
+                right = np.cross(up, forward)
+                up = np.cross(forward, right)
+                
+                # 构建位姿矩阵
+                pose = np.eye(4)
+                pose[:3, :3] = np.stack([right, up, forward], axis=-1)
+                pose[:3, 3] = position
+                self.poses.append(pose)
+        
+        self.poses = np.stack(self.poses)
+        
+        # 区域聚类: 将半球分为12个区域 (上下半球各6个)
+        self.section_masks = self._create_section_masks()
+    
+    def _create_section_masks(self):
+        """创建12个区域的掩码"""
+        masks = []
+        angles = np.arctan2(self.poses[:, 1, 3], self.poses[:, 0, 3])  # 方位角
+        
+        # 上下半球 (z坐标正负)
+        upper = self.poses[:, 2, 3] > 0
+        lower = ~upper
+        
+        # 每个半球分6个区域
+        angle_bins = np.linspace(-np.pi, np.pi, 7)  # 6个区域需要7个边界
+        for i in range(6):
+            angle_mask = (angles >= angle_bins[i]) & (angles < angle_bins[i+1])
+            masks.append(angle_mask & upper)
+            masks.append(angle_mask & lower)
+            
+        return masks
+    
+    def select_next_views(self, nerf_model, current_poses):
+        """根据熵值选择下一个最佳视角
+        
+        参数:
+            nerf_model: 当前的NeRF模型
+            current_poses: 已经采集的相机位姿
+            
+        返回:
+            selected_poses: 选择的下一批相机位姿
+        """
+        # 排除已选视角
+        current_positions = current_poses[:, :3, 3]
+        all_positions = self.poses[:, :3, 3]
+        distance_matrix = np.linalg.norm(
+            current_positions[:, None] - all_positions[None], axis=-1)
+        min_distances = np.min(distance_matrix, axis=0)
+        valid_mask = min_distances > 0.1  # 避免选择太近的视角
+        
+        # 评估候选视图的不确定性
+        valid_poses = self.poses[valid_mask]
+        entropy_values = self.uncertainty_guide.evaluate_candidate_views(nerf_model, valid_poses)
+        
+        # 从每个区域选择熵最高的有效视角
+        selected_indices = []
+        for mask in self.section_masks:
+            # 调整mask以适应有效视角的筛选
+            section_mask = mask[valid_mask]
+            if not np.any(section_mask):
+                continue
+                
+            section_entropy = entropy_values.copy()
+            section_entropy[~section_mask] = -np.inf
+            selected_idx = np.argmax(section_entropy)
+            
+            # 转换回原始索引
+            original_indices = np.where(valid_mask)[0]
+            original_idx = original_indices[selected_idx]
+            selected_indices.append(original_idx)
+        
+        return self.poses[selected_indices]
+
+    def coarse_to_fine_reconstruction(self, nerf_model, initial_poses, max_iterations=3):
+        """执行从粗到精的重建过程
+        
+        参数:
+            nerf_model: 初始NeRF模型
+            initial_poses: 初始相机位姿
+            max_iterations: 最大迭代次数
+            
+        返回:
+            all_selected_poses: 所有选择的相机位姿（包括初始位姿）
+        """
+        all_selected_poses = initial_poses.copy()
+        current_poses = initial_poses.copy()
+        
+        for iteration in range(max_iterations):
+            # 选择下一批视角
+            next_views = self.select_next_views(nerf_model, current_poses)
+            
+            # 将新选择的视角添加到当前位姿中
+            current_poses = np.concatenate([current_poses, next_views], axis=0)
+            all_selected_poses = np.concatenate([all_selected_poses, next_views], axis=0)
+            
+            # 这里应该有一个重新训练模型的步骤
+            # 但这通常在外部完成，我们只返回选定的位姿
+        
+        return all_selected_poses
--- a/ref_code/uncertainty_guide.py
+++ b/ref_code/uncertainty_guide.py
@@ -0,0 +1,170 @@
+import torch
+import numpy as np
+from utils.volume_render_util import VolumeRendererUtil
+import torch.nn.functional as F
+from typing import Tuple, List, Dict, Any, Optional
+
+class UncertaintyGuideNeRF:
+    """
+    基于NeRF不确定性的主动视图选择策略
+    通过计算视图的熵值来引导下一步的最优视图选择
+    """
+    
+    def __init__(self, config: Dict[str, Any]):
+        """
+        初始化不确定性引导策略
+        
+        参数:
+            config: 配置字典，包含相关参数
+        """
+        self.config = config
+        self.device = torch.device(config.get("device", "cuda") if torch.cuda.is_available() else "cpu")
+        
+        # 相机参数
+        self.width = config.get("width", 800)
+        self.height = config.get("height", 800)
+        self.focal = config.get("focal", 1000.0)
+        
+        # 采样参数
+        self.near = config.get("near", 2.0)
+        self.far = config.get("far", 6.0)
+        self.coarse_samples = config.get("coarse_samples", 64)
+        self.fine_samples = config.get("fine_samples", 128)
+    
+    def generate_rays(self, pose: np.ndarray) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        从相机姿态生成光线
+        
+        参数:
+            pose: 相机姿态矩阵 [4, 4]
+            
+        返回:
+            rays_o: 光线起点 [H*W, 3]
+            rays_d: 光线方向 [H*W, 3]
+        """
+        # 创建像素坐标
+        i, j = torch.meshgrid(
+            torch.linspace(0, self.width - 1, self.width),
+            torch.linspace(0, self.height - 1, self.height),
+            indexing='ij'
+        )
+        i = i.t().to(self.device)
+        j = j.t().to(self.device)
+        
+        # 转换为相机坐标系中的方向
+        dirs = torch.stack([
+            (i - self.width * 0.5) / self.focal,
+            -(j - self.height * 0.5) / self.focal,
+            -torch.ones_like(i)
+        ], dim=-1)
+        
+        # 转换为世界坐标系
+        pose = torch.from_numpy(pose).float().to(self.device)
+        rays_d = torch.sum(dirs[..., None, :] * pose[:3, :3], dim=-1)
+        rays_o = pose[:3, -1].expand(rays_d.shape)
+        
+        # 展平为批处理格式
+        rays_o = rays_o.reshape(-1, 3)
+        rays_d = rays_d.reshape(-1, 3)
+        
+        return rays_o, rays_d
+    
+    def evaluate_view_uncertainty(self, 
+                                 nerf_model: torch.nn.Module, 
+                                 pose: np.ndarray) -> float:
+        """
+        评估给定视图的不确定性（熵）
+        
+        参数:
+            nerf_model: NeRF模型
+            pose: 相机姿态矩阵 [4, 4]
+            
+        返回:
+            mean_entropy: 该视图的平均熵值
+        """
+        nerf_model.eval()
+        with torch.no_grad():
+            # 生成光线
+            rays_o, rays_d = self.generate_rays(pose)
+            
+            # 对于较大的图像，可能需要分批处理
+            batch_size = 4096  # 根据GPU内存调整
+            entropy_values = []
+            
+            # 分批处理所有光线
+            for i in range(0, rays_o.shape[0], batch_size):
+                batch_rays_o = rays_o[i:i+batch_size]
+                batch_rays_d = rays_d[i:i+batch_size]
+                
+                # 归一化方向向量
+                batch_rays_d = F.normalize(batch_rays_d, dim=-1)
+                
+                # 计算近平面和远平面
+                near = torch.ones_like(batch_rays_o[..., 0]) * self.near
+                far = torch.ones_like(batch_rays_o[..., 0]) * self.far
+                
+                # 渲染光线并计算熵
+                _, weights, _, entropy = VolumeRendererUtil.render_rays(
+                    nerf_model,
+                    batch_rays_o,
+                    batch_rays_d,
+                    near,
+                    far,
+                    self.coarse_samples,
+                    self.fine_samples
+                )
+                
+                entropy_values.append(entropy)
+            
+            # 组合所有批次的熵值
+            all_entropy = torch.cat(entropy_values, dim=0)
+            
+            # 重塑为图像格式并计算平均值
+            mean_entropy = all_entropy.mean().item()
+            
+            return mean_entropy
+    
+    def evaluate_candidate_views(self, 
+                                nerf_model: torch.nn.Module, 
+                                candidate_poses: np.ndarray) -> np.ndarray:
+        """
+        评估候选视图的不确定性（熵）
+        
+        参数:
+            nerf_model: NeRF模型
+            candidate_poses: 候选相机姿态矩阵列表 [N, 4, 4]
+            
+        返回:
+            entropy_values: 各候选视图的熵值 [N]
+        """
+        entropy_values = np.zeros(len(candidate_poses))
+        
+        for i, pose in enumerate(candidate_poses):
+            entropy_values[i] = self.evaluate_view_uncertainty(nerf_model, pose)
+            
+        return entropy_values
+    
+    def downsample_image(self, rays_o, rays_d, factor=4):
+        """
+        降采样光线以加速处理
+        
+        参数:
+            rays_o: 光线起点 [H*W, 3]
+            rays_d: 光线方向 [H*W, 3]
+            factor: 降采样因子
+            
+        返回:
+            downsampled_rays_o: 降采样后的光线起点
+            downsampled_rays_d: 降采样后的光线方向
+        """
+        # 重塑为图像格式
+        H = W = int(np.sqrt(rays_o.shape[0]))
+        rays_o = rays_o.reshape(H, W, 3)
+        rays_d = rays_d.reshape(H, W, 3)
+        
+        # 降采样
+        new_H, new_W = H // factor, W // factor
+        downsampled_rays_o = rays_o[::factor, ::factor].reshape(-1, 3)
+        downsampled_rays_d = rays_d[::factor, ::factor].reshape(-1, 3)
+        
+        return downsampled_rays_o, downsampled_rays_d