WaveGen / nano_WaveGen /utils /visualize_training.py
FangSen9000's picture
Update nano_WaveGen/utils/visualize_training.py
1173bb2 verified
#!/usr/bin/env python3
"""
WaveGen 训练结果可视化工具 (独立版本)
自动检索 core_space 目录并可视化训练输出
Usage:
cd code/WaveGen/nano_WaveGen
python utils/visualize_training.py
"""
import numpy as np
import viser
import viser.transforms as viser_tf
from typing import Optional, Dict, List, Tuple, Any
import os
from pathlib import Path
import json
import cv2
import time
import webbrowser
from scipy.spatial.transform import Rotation
import threading
# 导入深度转点云模块
try:
from depth_to_pointcloud import DepthToPointCloud
except ImportError:
# 尝试从当前目录导入
import sys
sys.path.append(str(Path(__file__).parent))
from depth_to_pointcloud import DepthToPointCloud
class TrainingVisualizer:
"""WaveGen训练结果可视化器"""
def __init__(self, core_space_dir: str = "core_space", port: int = 8080):
"""
初始化可视化器
Args:
core_space_dir: core_space目录路径(相对于当前工作目录)
port: 起始端口号(如果占用会自动尝试下一个)
"""
self.core_space_dir = Path(core_space_dir)
if not self.core_space_dir.is_absolute():
self.core_space_dir = Path.cwd() / self.core_space_dir
# 启动Viser服务器,自动寻找可用端口
self.server = None
self.port = port
max_attempts = 10
for attempt in range(max_attempts):
try_port = port + attempt
try:
# 不显示默认的config/diagnostics页,直接进入UI
self.server = viser.ViserServer(port=try_port, show_config=False)
self.port = try_port
print(f"🌐 Viser服务器已启动: http://localhost:{try_port}")
if attempt > 0:
print(f" (端口 {port} 被占用,自动使用端口 {try_port})")
break
except OSError as e:
if "Address already in use" in str(e):
if attempt == max_attempts - 1:
print(f"❌ 无法找到可用端口 (尝试了 {port}-{try_port})")
print(f" 请手动关闭其他实例: pkill -f visualize_training.py")
raise
continue
else:
raise
# 可视化句柄
self.superquadric_handles = []
self.gt_superquadric_handles = []
self.camera_handles = []
self.camera_frustum_handles = []
self.point_cloud_handle = None
self.camera_rgb_handle = None
self.coordinate_frame_handle = None
self.mesh_handles_pool = {}
self.object_label_handles = [] # 物体信息标签
# 当前数据
self.predictions_npz = None
self.targets_npz = None
self.current_sample_path = None
self.current_frame = 0
self.original_frame_count = 0
self.scene_center = np.array([0, 0, 0])
self.scene_scale = 1.0
# GUI控件
self.gui_controls = {}
# 播放状态
self.is_playing = False
# 视频导出状态
self.is_exporting = False
self.export_progress = 0
self.export_camera_pos = None
self.export_camera_wxyz = None
# 设置场景
self.setup_scene()
# 扫描训练输出
self.scan_training_outputs()
# 设置GUI (立即创建,不等待客户端连接)
self.setup_gui()
print("✅ 训练可视化器已初始化")
print(f"📁 监控目录: {self.core_space_dir}")
if len(self.training_outputs) == 0:
print("⚠️ 未找到训练输出,请检查 core_space 目录")
def setup_scene(self):
"""设置场景背景和坐标系"""
# 设置深蓝色背景(默认)
self.update_background(wireframe_mode=False)
# 设置坐标系方向
self.server.scene.set_up_direction("+y")
def update_background(self, wireframe_mode: bool):
"""更新场景背景颜色"""
if wireframe_mode:
# 线框模式:全黑背景
bg_color = [0, 0, 0]
else:
# 正常模式:深蓝色背景
bg_color = [13, 13, 38]
width, height = 1920, 1080
solid_color_image = np.full((height, width, 3), bg_color, dtype=np.uint8)
self.server.scene.set_background_image(solid_color_image, format="png")
def scan_training_outputs(self):
"""扫描core_space目录下的训练输出"""
self.training_outputs = []
if not self.core_space_dir.exists():
print(f"⚠️ core_space目录不存在: {self.core_space_dir}")
return
# 查找所有训练输出目录 (格式: YYYYMMDD_HHMMSS_stepN_text2wave)
# 按时间倒序(最新在前)方便默认选择最新样本
for output_dir in sorted(self.core_space_dir.glob("*_text2wave"), reverse=True):
if output_dir.is_dir():
# 查找样本目录
sample_dirs = sorted(output_dir.glob("sample_*"))
if sample_dirs:
self.training_outputs.append({
'path': output_dir,
'name': output_dir.name,
'samples': len(sample_dirs)
})
print(f"📦 找到 {len(self.training_outputs)} 个训练输出")
for output in self.training_outputs:
print(f" - {output['name']} ({output['samples']} 样本)")
def setup_gui(self):
"""设置GUI控件"""
# 训练输出选择
with self.server.gui.add_folder("训练输出"):
if self.training_outputs:
output_names = [out['name'] for out in self.training_outputs]
self.gui_controls['output_selector'] = self.server.gui.add_dropdown(
"选择训练输出",
options=output_names,
initial_value=output_names[0]
)
self.gui_controls['output_selector'].on_update(self._on_output_change)
# 样本选择
self.gui_controls['sample_slider'] = self.server.gui.add_slider(
"样本索引",
min=0,
max=max(0, self.training_outputs[0]['samples'] - 1),
step=1,
initial_value=0
)
self.gui_controls['sample_slider'].on_update(self._on_sample_change)
self.gui_controls['load_button'] = self.server.gui.add_button("加载样本")
self.gui_controls['load_button'].on_click(self._on_load_sample)
else:
self.server.gui.add_text("状态", initial_value="未找到训练输出")
# 帧控制
with self.server.gui.add_folder("帧控制"):
self.gui_controls['frame_slider'] = self.server.gui.add_slider(
"当前帧",
min=0,
max=23,
step=1,
initial_value=0
)
self.gui_controls['frame_slider'].on_update(self._on_frame_change)
self.gui_controls['play_button'] = self.server.gui.add_button("▶ 播放")
self.gui_controls['play_button'].on_click(self._on_play)
self.gui_controls['pause_button'] = self.server.gui.add_button("⏸ 暂停")
self.gui_controls['pause_button'].on_click(self._on_pause)
self.gui_controls['fps_slider'] = self.server.gui.add_slider(
"播放FPS",
min=1,
max=30,
step=1,
initial_value=8
)
# 生成结果控制
with self.server.gui.add_folder("生成结果"):
self.gui_controls['show_generated'] = self.server.gui.add_checkbox(
"显示生成的超二次曲面", initial_value=True
)
self.gui_controls['show_generated'].on_update(self._on_visibility_change)
self.gui_controls['generated_opacity'] = self.server.gui.add_slider(
"生成结果透明度", min=0.1, max=1.0, step=0.05, initial_value=0.7
)
self.gui_controls['generated_opacity'].on_update(self._on_opacity_change)
self.gui_controls['generated_color'] = self.server.gui.add_rgb(
"生成结果颜色", initial_value=(100, 149, 237) # 蓝色
)
self.gui_controls['generated_color'].on_update(self._on_color_change)
# Ground Truth控制
with self.server.gui.add_folder("Ground Truth"):
self.gui_controls['show_gt'] = self.server.gui.add_checkbox(
"显示GT超二次曲面", initial_value=True
)
self.gui_controls['show_gt'].on_update(self._on_visibility_change)
self.gui_controls['gt_opacity'] = self.server.gui.add_slider(
"GT透明度", min=0.1, max=1.0, step=0.05, initial_value=0.5
)
self.gui_controls['gt_opacity'].on_update(self._on_opacity_change)
self.gui_controls['gt_color'] = self.server.gui.add_rgb(
"GT颜色", initial_value=(255, 99, 71) # 红色
)
self.gui_controls['gt_color'].on_update(self._on_color_change)
self.gui_controls['show_object_info'] = self.server.gui.add_checkbox(
"显示物体信息", initial_value=False
)
self.gui_controls['show_object_info'].on_update(self._on_visibility_change)
# 点云控制
with self.server.gui.add_folder("点云显示"):
self.gui_controls['show_pointcloud'] = self.server.gui.add_checkbox(
"显示点云", initial_value=True
)
self.gui_controls['show_pointcloud'].on_update(self._on_visibility_change)
self.gui_controls['pointcloud_size'] = self.server.gui.add_slider(
"点大小", min=0.001, max=0.02, step=0.001, initial_value=0.008
)
self.gui_controls['pointcloud_size'].on_update(self._on_visibility_change)
# 网格质量
with self.server.gui.add_folder("渲染设置"):
self.gui_controls['mesh_resolution'] = self.server.gui.add_slider(
"网格分辨率", min=10, max=50, step=5, initial_value=25
)
self.gui_controls['mesh_resolution'].on_update(self._on_mesh_resolution_change)
self.gui_controls['show_coordinate'] = self.server.gui.add_checkbox(
"显示坐标系", initial_value=False
)
self.gui_controls['show_coordinate'].on_update(self._on_visibility_change)
self.gui_controls['wireframe_mode'] = self.server.gui.add_checkbox(
"线框模式 (黑白边缘)", initial_value=False
)
self.gui_controls['wireframe_mode'].on_update(self._on_wireframe_mode_change)
# 相机控制
with self.server.gui.add_folder("相机控制"):
self.gui_controls['reset_view'] = self.server.gui.add_button("重置视角")
self.gui_controls['reset_view'].on_click(self._on_reset_view)
self.gui_controls['match_camera'] = self.server.gui.add_button("匹配GT相机")
self.gui_controls['match_camera'].on_click(self._on_match_camera)
self.gui_controls['show_target_frustum'] = self.server.gui.add_checkbox(
"显示GT相机椎体", initial_value=True
)
self.gui_controls['show_pred_frustum'] = self.server.gui.add_checkbox(
"显示预测相机椎体", initial_value=True
)
self.gui_controls['show_camera_rgb'] = self.server.gui.add_checkbox(
"相机视锥显示RGB", initial_value=True
)
self.gui_controls['show_target_frustum'].on_update(self._on_visibility_change)
self.gui_controls['show_pred_frustum'].on_update(self._on_visibility_change)
self.gui_controls['show_camera_rgb'].on_update(self._on_visibility_change)
# 视频导出
with self.server.gui.add_folder("视频导出"):
self.gui_controls['export_status'] = self.server.gui.add_text(
"状态", initial_value="就绪"
)
self.gui_controls['export_resolution'] = self.server.gui.add_slider(
"导出分辨率", min=480, max=1080, step=120, initial_value=720
)
self.gui_controls['capture_camera_button'] = self.server.gui.add_button(
"📸 捕获当前视角"
)
self.gui_controls['capture_camera_button'].on_click(self._on_capture_camera)
self.gui_controls['export_viser_button'] = self.server.gui.add_button(
"💾 导出场景(.viser)"
)
self.gui_controls['export_viser_button'].on_click(self._on_export_viser)
self.gui_controls['export_button'] = self.server.gui.add_button("🎬 导出视频(MP4)")
self.gui_controls['export_button'].on_click(self._on_export_video)
print(f"✅ GUI 已设置 - 创建了 {len(self.gui_controls)} 个控件")
def _on_output_change(self, event):
"""训练输出选择改变"""
selected_name = event.target.value
for i, output in enumerate(self.training_outputs):
if output['name'] == selected_name:
# 更新样本滑块范围
max_sample = max(0, output['samples'] - 1)
self.gui_controls['sample_slider'].max = max_sample
self.gui_controls['sample_slider'].value = 0
break
def _on_sample_change(self, event):
"""样本索引改变"""
pass # 用户需要点击"加载样本"按钮
def _on_load_sample(self, event):
"""加载选中的样本"""
selected_name = self.gui_controls['output_selector'].value
sample_idx = int(self.gui_controls['sample_slider'].value)
# 找到对应的训练输出
output_path = None
for output in self.training_outputs:
if output['name'] == selected_name:
output_path = output['path']
break
if output_path is None:
print(f"❌ 未找到训练输出: {selected_name}")
return
self.load_sample(output_path, sample_idx)
def load_sample(self, output_path: Path, sample_idx: int):
"""加载样本数据"""
sample_path = output_path / f"sample_{sample_idx}"
if not sample_path.exists():
print(f"❌ 样本目录不存在: {sample_path}")
return
print(f"\n{'='*60}")
print(f"📂 加载样本: {output_path.name}/sample_{sample_idx}")
print(f"{'='*60}")
self.current_sample_path = sample_path
# 加载predictions.npz
pred_file = sample_path / "predictions.npz"
if pred_file.exists():
npz_data = np.load(pred_file, allow_pickle=True)
self.predictions_npz = {key: npz_data[key] for key in npz_data.files}
npz_data.close()
print(f"✅ 加载predictions.npz: {pred_file}")
if 'frames' in self.predictions_npz:
print(f" 帧数: {len(self.predictions_npz['frames'])}")
if 'text' in self.predictions_npz:
print(f" 文本: {self.predictions_npz['text']}")
else:
self.predictions_npz = None
print(f"⚠️ 未找到predictions.npz")
# 加载targets.npz
target_file = sample_path / "targets.npz"
if target_file.exists():
npz_data = np.load(target_file, allow_pickle=True)
self.targets_npz = {key: npz_data[key] for key in npz_data.files}
npz_data.close()
print(f"✅ 加载targets.npz: {target_file}")
if 'frames' in self.targets_npz:
print(f" 帧数: {len(self.targets_npz['frames'])}")
if 'text' in self.targets_npz:
print(f" 文本: {self.targets_npz['text']}")
else:
self.targets_npz = None
print(f"⚠️ 未找到targets.npz")
# 更新帧数
self.original_frame_count = 0
if self.predictions_npz and 'frames' in self.predictions_npz:
self.original_frame_count = len(self.predictions_npz['frames'])
elif self.targets_npz and 'objects' in self.targets_npz:
objects = self.targets_npz['objects']
if hasattr(objects, 'shape') and len(objects.shape) >= 1:
self.original_frame_count = objects.shape[0]
if self.original_frame_count > 0:
self.gui_controls['frame_slider'].max = self.original_frame_count - 1
self.gui_controls['frame_slider'].value = 0
self.current_frame = 0
print(f"📊 总帧数: {self.original_frame_count}")
# 可视化第一帧
self.visualize_frame(0)
def _on_frame_change(self, event):
"""帧滑块改变"""
frame_idx = int(event.target.value)
self.visualize_frame(frame_idx)
def _on_play(self, event):
"""开始播放"""
self.is_playing = True
print("▶ 开始播放")
# 在后台线程播放
import threading
threading.Thread(target=self._playback_loop, daemon=True).start()
def _on_pause(self, event):
"""暂停播放"""
self.is_playing = False
print("⏸ 暂停播放")
def _playback_loop(self):
"""播放循环"""
while self.is_playing:
current_frame = int(self.gui_controls['frame_slider'].value)
next_frame = (current_frame + 1) % self.original_frame_count
self.gui_controls['frame_slider'].value = next_frame
self.visualize_frame(next_frame)
fps = int(self.gui_controls['fps_slider'].value)
time.sleep(1.0 / fps)
def _on_visibility_change(self, event):
"""可见性改变"""
self.visualize_frame(self.current_frame)
def _on_opacity_change(self, event):
"""透明度改变"""
self.visualize_frame(self.current_frame)
def _on_color_change(self, event):
"""颜色改变"""
self.visualize_frame(self.current_frame)
def _on_mesh_resolution_change(self, event):
"""网格分辨率改变"""
# 清空对象池,强制重新生成mesh
for mesh in self.mesh_handles_pool.values():
mesh.remove()
self.mesh_handles_pool.clear()
self.visualize_frame(self.current_frame)
def _on_wireframe_mode_change(self, event):
"""线框模式改变"""
wireframe_mode = event.target.value
# 更新背景颜色
self.update_background(wireframe_mode)
# 清空对象池,强制重新生成mesh(应用线框模式)
for mesh in self.mesh_handles_pool.values():
mesh.remove()
self.mesh_handles_pool.clear()
# 重新可视化当前帧
self.visualize_frame(self.current_frame)
def _on_reset_view(self, event):
"""重置视角"""
# 设置默认相机位置
for client in self.server.get_clients().values():
client.camera.position = (3.0, 2.0, 3.0)
client.camera.look_at = (0.0, 0.0, 0.0)
def _on_match_camera(self, event):
"""匹配GT相机视角 (新格式)"""
if self.targets_npz is None or 'frames' not in self.targets_npz:
print("⚠️ 没有GT相机数据")
return
frame_idx = self.current_frame
frames = self.targets_npz['frames']
if frame_idx >= len(frames):
print("⚠️ 帧索引超出范围")
return
frame_data = frames[frame_idx]
# 转换为字典格式
if isinstance(frame_data, np.ndarray):
frame_data = frame_data.item()
if 'world_info' not in frame_data:
print("⚠️ 未找到world_info数据")
return
world_info = frame_data['world_info']
camera_position = world_info['camera_position']
# 存储的是xyzw,需要转成viser的wxyz
q_xyzw = np.array(world_info['camera_quaternion'], dtype=np.float32)
wxyz = (float(q_xyzw[3]), float(q_xyzw[0]), float(q_xyzw[1]), float(q_xyzw[2]))
# 对齐到可视化坐标系:减去scene_center并乘以scene_scale
cam_pos_vis = (np.array(camera_position) - self.scene_center) * self.scene_scale
print(f"📷 匹配相机: pos={camera_position}, quat={wxyz}")
# 设置所有客户端的相机
for client in self.server.get_clients().values():
client.camera.position = tuple(cam_pos_vis)
client.camera.wxyz = wxyz
def visualize_frame(self, frame_idx: int):
"""可视化指定帧"""
if self.original_frame_count <= 0:
return
frame_idx = int(np.clip(frame_idx, 0, self.original_frame_count - 1))
self.current_frame = frame_idx
print(f"\n🎨 可视化帧 {frame_idx}/{self.original_frame_count-1}")
# 清空旧的可视化
self.clear_visualization()
# 获取GUI参数
show_generated = self.gui_controls['show_generated'].value
show_gt = self.gui_controls['show_gt'].value
show_pointcloud = self.gui_controls['show_pointcloud'].value
show_coordinate = self.gui_controls['show_coordinate'].value
generated_opacity = self.gui_controls['generated_opacity'].value
gt_opacity = self.gui_controls['gt_opacity'].value
generated_color = tuple(self.gui_controls['generated_color'].value)
gt_color = tuple(self.gui_controls['gt_color'].value)
mesh_resolution = int(self.gui_controls['mesh_resolution'].value)
# 提取帧数据
predictions = self._extract_predictions(frame_idx)
targets = self._extract_targets(frame_idx)
# 场景对齐信息:优先使用scene_normalization.json或GT world_info的center/scale;超二次体保持原坐标,点云/相机用该center/scale
self.scene_center = np.zeros(3, dtype=np.float32)
self.scene_scale = 1.0
norm_path = None
if self.current_sample_path is not None:
norm_path = self.current_sample_path / "original_data" / "scene_normalization.json"
loaded_norm = False
if norm_path is not None and norm_path.exists():
try:
with open(norm_path) as f:
norm = json.load(f)
if 'scene_center' in norm:
self.scene_center = np.array(norm['scene_center'], dtype=np.float32)
if 'scene_scale' in norm:
self.scene_scale = float(norm['scene_scale'])
elif 'scene_extent' in norm and norm['scene_extent']:
self.scene_scale = 20.0 / float(norm['scene_extent'])
loaded_norm = True
except Exception:
loaded_norm = False
if not loaded_norm:
wi = self._get_world_info(frame_idx, source="targets")
if wi is not None:
if 'scene_center' in wi:
self.scene_center = np.array(wi['scene_center'], dtype=np.float32)
if 'scene_scale' in wi:
try:
self.scene_scale = float(wi['scene_scale'])
except Exception:
pass
# 线框模式下不显示点云(黑背景下点云不清晰)
wireframe_mode = self.gui_controls.get('wireframe_mode', None)
is_wireframe = wireframe_mode.value if wireframe_mode else False
if show_pointcloud and not is_wireframe:
# 点云用同一center/scale做归一化
self._visualize_pointcloud(frame_idx, scene_center=self.scene_center, scene_scale=self.scene_scale)
# 可视化生成的超二次曲面
if show_generated and predictions is not None:
self._visualize_superquadrics(
predictions,
color=generated_color,
opacity=generated_opacity,
mesh_resolution=mesh_resolution,
is_gt=False
)
# 可视化GT超二次曲面
if show_gt and targets is not None:
self._visualize_superquadrics(
targets,
color=gt_color,
opacity=gt_opacity,
mesh_resolution=mesh_resolution,
is_gt=True
)
# 显示物体信息(如果启用且不在线框模式)
show_info = self.gui_controls['show_object_info'].value
if show_info and not is_wireframe:
self._visualize_object_labels(frame_idx, targets, is_gt=True)
# 显示坐标系
if show_coordinate:
self.coordinate_frame_handle = self.server.scene.add_frame(
"/coordinate",
wxyz=(1, 0, 0, 0),
position=(0, 0, 0),
axes_length=1.0,
axes_radius=0.01
)
# 可视化相机椎体/RGB(线框模式下不显示)
if not is_wireframe:
self._visualize_cameras(frame_idx)
def _extract_predictions(self, frame_idx: int) -> Optional[np.ndarray]:
"""提取预测数据 (新格式)"""
if self.predictions_npz is None or 'frames' not in self.predictions_npz:
return None
frames = self.predictions_npz['frames']
if frame_idx >= len(frames):
return None
frame_data = frames[frame_idx]
# 转换为字典格式
if isinstance(frame_data, np.ndarray):
frame_data = frame_data.item()
if 'superquadrics' not in frame_data:
return None
superquadrics = frame_data['superquadrics']
objects_array = []
for sq in superquadrics:
# 转换为数组格式 [15 params]
obj_params = np.zeros(15, dtype=np.float32)
obj_params[0] = 1.0 if sq['exists'] else 0.0
obj_params[1:3] = sq['shape'] # epsilon1, epsilon2
obj_params[3:6] = sq['scale'] # a, b, c
obj_params[6:9] = sq['translation'] # x, y, z
obj_params[9:12] = sq['rotation'] # euler angles
obj_params[12:15] = sq['velocity'] # vx, vy, vz
objects_array.append(obj_params)
return np.array(objects_array, dtype=np.float32)
def _extract_targets(self, frame_idx: int) -> Optional[np.ndarray]:
"""提取GT数据 (新格式)"""
if self.targets_npz is None or 'frames' not in self.targets_npz:
return None
frames = self.targets_npz['frames']
if frame_idx >= len(frames):
return None
frame_data = frames[frame_idx]
# 转换为字典格式
if isinstance(frame_data, np.ndarray):
frame_data = frame_data.item()
if 'superquadrics' not in frame_data:
return None
superquadrics = frame_data['superquadrics']
objects_array = []
for sq in superquadrics:
# 转换为数组格式 [16 params - GT格式包含 inlier_ratio]
obj_params = np.zeros(16, dtype=np.float32)
obj_params[0] = 1.0 if sq['exists'] else 0.0
obj_params[1:3] = sq['shape'] # epsilon1, epsilon2
obj_params[3:6] = sq['scale'] # a, b, c
obj_params[6:9] = sq['translation'] # x, y, z
obj_params[9:12] = sq['rotation'] # euler angles
obj_params[12] = sq['inlier_ratio'] # inlier ratio (GT specific)
obj_params[13:16] = sq['velocity'] # vx, vy, vz
objects_array.append(obj_params)
return np.array(objects_array, dtype=np.float32)
def _visualize_superquadrics(self, objects: np.ndarray, color: Tuple,
opacity: float, mesh_resolution: int, is_gt: bool):
"""可视化超二次曲面"""
prefix = "gt" if is_gt else "gen"
num_active = 0
for obj_idx, obj_params in enumerate(objects):
# 检查存在标志
if obj_params[0] > 0.5:
num_active += 1
try:
# 生成mesh
vertices, faces = self.generate_superquadric_mesh(
obj_params, num_samples=mesh_resolution
)
# 使用对象池
mesh_key = f"{prefix}_{obj_idx}"
mesh = self.get_or_create_mesh(
mesh_key, vertices, faces, color, opacity
)
if is_gt:
self.gt_superquadric_handles.append(mesh)
else:
self.superquadric_handles.append(mesh)
except Exception as e:
print(f"❌ 可视化对象{obj_idx}失败: {e}")
label = "GT" if is_gt else "生成"
print(f" {label}对象数: {num_active}")
def _visualize_object_labels(self, frame_idx: int, objects: np.ndarray, is_gt: bool):
"""在物体上显示信息标签"""
# 获取原始字典数据以访问inlier_ratio等
if is_gt and self.targets_npz is not None and 'frames' in self.targets_npz:
frames = self.targets_npz['frames']
if frame_idx >= len(frames):
return
frame_data = frames[frame_idx]
if isinstance(frame_data, np.ndarray):
frame_data = frame_data.item()
if 'superquadrics' not in frame_data:
return
superquadrics = frame_data['superquadrics']
for obj_idx, sq in enumerate(superquadrics):
if not sq['exists']:
continue
# 获取物体位置(用于放置标签)
translation = sq['translation']
scale = sq['scale']
# 标签位置:物体中心上方
label_position = (
float(translation[0]),
float(translation[1]) + float(scale[1]) * 1.5, # 在物体上方
float(translation[2])
)
# 构建信息文本
inlier_ratio = sq.get('inlier_ratio', 0.0)
shape = sq.get('shape', [0, 0])
info_text = (
f"ID: {obj_idx}\n"
f"Density: {inlier_ratio:.3f}\n"
f"Shape: ε1={shape[0]:.2f}, ε2={shape[1]:.2f}\n"
f"Size: {scale[0]:.2f}×{scale[1]:.2f}×{scale[2]:.2f}"
)
# 添加文本标签
# 使用时间戳确保名称唯一,避免冲突
label_name = f"/object_label_f{frame_idx}_o{obj_idx}"
try:
label_handle = self.server.scene.add_label(
label_name,
text=info_text,
position=label_position
)
self.object_label_handles.append(label_handle)
except Exception as e:
print(f"⚠️ 创建标签失败: {e}")
def _visualize_pointcloud(self, frame_idx: int, scene_center: Optional[np.ndarray] = None, scene_scale: Optional[float] = None):
"""可视化点云"""
if self.current_sample_path is None:
return
# 查找原始数据
original_data_dir = self.current_sample_path / "original_data"
if not original_data_dir.exists():
print("⚠️ 未找到original_data目录")
return
# 加载深度图和RGB
depth_file = self._find_depth_file(original_data_dir, frame_idx)
rgb_file = original_data_dir / "rgb" / f"frame_{frame_idx:03d}.png"
if depth_file is None or not rgb_file.exists():
print(f"⚠️ 未找到帧{frame_idx}的深度图或RGB")
return
try:
# 加载数据
depth = self._load_depth(depth_file, frame_idx)
if depth.ndim == 2:
depth = depth[:, :, None]
rgb = self._load_rgb(rgb_file)
# 加载相机内参
camera_K = None
metadata_file = original_data_dir / "metadata.json"
if metadata_file.exists():
with open(metadata_file) as f:
metadata = json.load(f)
if 'camera' in metadata and 'K' in metadata['camera']:
camera_K = np.array(metadata['camera']['K'], dtype=np.float32)
if camera_K is None:
h, w = depth.shape[:2]
camera_K = np.array([[w, 0, w/2], [0, h, h/2], [0, 0, 1]], dtype=np.float32)
# 使用GT world_info作为相机位姿
world_info = self._get_world_info(frame_idx, source="targets")
camera_position = np.zeros(3, dtype=np.float32)
camera_quat_xyzw = np.array([0, 0, 0, 1], dtype=np.float32)
if world_info is not None and 'camera_position' in world_info:
camera_position = np.array(world_info['camera_position'], dtype=np.float32)
if 'camera_quaternion' in world_info:
# 存储为xyzw,直接使用
camera_quat_xyzw = np.array(world_info['camera_quaternion'], dtype=np.float32)
# 转换为点云(标准化到[-10,10]),使用提供的center/scale以与训练对齐
converter = DepthToPointCloud()
_, points_norm, _, depth_center, depth_extent = converter.depth_to_normalized_pointcloud_movi(
depth=depth,
segmentation=None,
camera_K=camera_K,
camera_position=camera_position,
camera_quaternion=camera_quat_xyzw,
resolution=depth.shape[0],
convert_to_zdepth=True,
scene_center_override=scene_center,
scene_scale_override=scene_scale
)
valid_mask = depth[:, :, 0] > 0
points = points_norm[valid_mask]
colors = rgb.reshape(-1, 3)[valid_mask.reshape(-1)]
# 更新场景中心和尺度(供相机椎体使用)
if scene_center is not None and scene_scale is not None:
self.scene_center = np.array(scene_center, dtype=np.float32)
self.scene_scale = float(scene_scale)
else:
self.scene_center = depth_center
self.scene_scale = 20.0 / max(depth_extent, 1e-6)
# 显示点云
point_size = self.gui_controls['pointcloud_size'].value
self.point_cloud_handle = self.server.scene.add_point_cloud(
"/pointcloud",
points=points,
colors=colors,
point_size=point_size
)
print(f" 点云: {len(points)} 个点")
except Exception as e:
print(f"❌ 加载点云失败: {e}")
def _find_depth_file(self, original_data_dir: Path, frame_idx: int) -> Optional[Path]:
"""查找深度文件(支持合并的npz和单独的npy)"""
depth_dir = original_data_dir / "depth"
if not depth_dir.exists():
return None
# 检查合并的npz
merged_npz = depth_dir / "depth_merge.npz"
if merged_npz.exists():
return merged_npz
# 检查单独的npy
npy_file = depth_dir / f"frame_{frame_idx:03d}.npy"
if npy_file.exists():
return npy_file
return None
def _load_depth(self, depth_file: Path, frame_idx: int) -> np.ndarray:
"""加载深度数据"""
if depth_file.suffix == '.npz':
# 从合并的npz加载
data = np.load(depth_file)
frame_key = f"frame_{frame_idx:03d}"
return data[frame_key]
else:
# 从单独的npy加载
return np.load(depth_file)
def _load_rgb(self, rgb_path: Path) -> np.ndarray:
"""加载RGB图像"""
img = cv2.imread(str(rgb_path))
if img is None:
raise FileNotFoundError(f"Failed to load RGB image: {rgb_path}")
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
def _get_world_info(self, frame_idx: int, source: str = "targets") -> Optional[Dict[str, np.ndarray]]:
"""从pred/target获取世界/相机信息"""
data = self.targets_npz if source == "targets" else self.predictions_npz
if data is None:
return None
if 'frames' in data:
frames = data['frames']
if frame_idx < len(frames):
entry = frames[frame_idx]
if hasattr(entry, 'item'):
try:
entry = entry.item()
except Exception:
pass
if isinstance(entry, dict) and 'world_info' in entry:
return entry['world_info']
# 兼容旧格式 world 张量
if 'world' in data:
world = data['world']
if hasattr(world, 'shape') and world.shape[0] > frame_idx and world.shape[-1] >= 7:
wp = world[frame_idx]
scene_center = world[frame_idx, 8:11] if world.shape[-1] >= 11 else np.zeros(3, dtype=np.float32)
return {
'camera_position': wp[:3],
'camera_quaternion': wp[3:7],
'scene_scale': float(wp[7]) if len(wp) > 7 else 1.0,
'scene_center': scene_center,
}
return None
def _visualize_cameras(self, frame_idx: int):
"""可视化相机椎体与RGB"""
for h in self.camera_frustum_handles:
h.remove()
self.camera_frustum_handles = []
if self.camera_rgb_handle is not None:
self.camera_rgb_handle.remove()
self.camera_rgb_handle = None
show_target = self.gui_controls.get('show_target_frustum', None)
show_pred = self.gui_controls.get('show_pred_frustum', None)
show_rgb = self.gui_controls.get('show_camera_rgb', None)
if show_target is None or show_pred is None or show_rgb is None:
return
if not (show_target.value or show_pred.value):
return
original_data_dir = None
rgb_image = None
if show_rgb.value and self.current_sample_path is not None:
original_data_dir = self.current_sample_path / "original_data"
if original_data_dir.exists():
rgb_path = original_data_dir / "rgb" / f"frame_{frame_idx:03d}.png"
if rgb_path.exists():
try:
rgb_image = self._load_rgb(rgb_path)
except Exception:
rgb_image = None
# FOV估计
fov = np.deg2rad(60.0)
aspect = 1.0
if rgb_image is not None:
h, w = rgb_image.shape[:2]
aspect = w / max(h, 1)
metadata_file = (self.current_sample_path / "original_data" / "metadata.json") if self.current_sample_path else None
fx = None
if metadata_file and metadata_file.exists():
try:
with open(metadata_file) as f:
metadata = json.load(f)
if 'camera' in metadata and 'K' in metadata['camera']:
K = np.array(metadata['camera']['K'], dtype=np.float32)
fx = K[0, 0]
except Exception:
fx = None
if fx is not None and w > 0:
fov = 2 * np.arctan(w / (2 * fx))
def add_frustum(world_info: Dict, name: str, color: Tuple[int, int, int]):
if world_info is None:
return
cam_pos = np.array(world_info.get('camera_position', np.zeros(3)), dtype=np.float32)
cam_quat = np.array(world_info.get('camera_quaternion', [0, 0, 0, 1]), dtype=np.float32) # xyzw
if cam_quat.shape[0] == 4:
wxyz = (float(cam_quat[3]), float(cam_quat[0]), float(cam_quat[1]), float(cam_quat[2]))
else:
wxyz = (1.0, 0.0, 0.0, 0.0)
# 将位置移到可视化坐标系(减去中心再按场景尺度缩放)
pos = (cam_pos - self.scene_center) * getattr(self, "scene_scale", 1.0)
frustum = self.server.scene.add_camera_frustum(
f"/{name}",
fov=fov,
aspect=aspect,
scale=2.0,
wxyz=wxyz,
position=pos,
image=rgb_image if show_rgb.value else None,
color=tuple(int(c) for c in color)
)
self.camera_frustum_handles.append(frustum)
if show_pred.value:
add_frustum(self._get_world_info(frame_idx, source="predictions"), "pred_camera_frustum", (100, 149, 237))
if show_target.value:
add_frustum(self._get_world_info(frame_idx, source="targets"), "gt_camera_frustum", (255, 99, 71))
def generate_superquadric_mesh(self, params, num_samples=25):
"""生成超二次曲面mesh"""
# 解析参数
epsilon = [params[1], params[2]]
scale = [params[3], params[4], params[5]]
translation = [params[6], params[7], params[8]]
rotation = [params[9], params[10], params[11]] if len(params) >= 12 else [0, 0, 0]
# 生成参数网格
eta = np.linspace(-np.pi/2, np.pi/2, num_samples)
omega = np.linspace(-np.pi, np.pi, num_samples)
vertices = []
faces = []
# 生成旋转矩阵
rot = Rotation.from_euler('ZYX', rotation)
rot_matrix = rot.as_matrix()
# 生成顶点
for i, e in enumerate(eta):
for j, w in enumerate(omega):
# 超二次曲面参数方程
cos_eta = np.sign(np.cos(e)) * np.abs(np.cos(e))**epsilon[0]
sin_eta = np.sign(np.sin(e)) * np.abs(np.sin(e))**epsilon[0]
cos_omega = np.sign(np.cos(w)) * np.abs(np.cos(w))**epsilon[1]
sin_omega = np.sign(np.sin(w)) * np.abs(np.sin(w))**epsilon[1]
# 局部坐标
x_local = scale[0] * cos_eta * cos_omega
y_local = scale[1] * cos_eta * sin_omega
z_local = scale[2] * sin_eta
# 应用旋转和平移
point_local = np.array([x_local, y_local, z_local])
point_global = rot_matrix @ point_local + np.array(translation)
vertices.append(point_global)
vertices = np.array(vertices)
# 生成面片
for i in range(num_samples - 1):
for j in range(num_samples - 1):
idx1 = i * num_samples + j
idx2 = i * num_samples + (j + 1) % num_samples
idx3 = (i + 1) * num_samples + j
idx4 = (i + 1) * num_samples + (j + 1) % num_samples
faces.append([idx1, idx2, idx3])
faces.append([idx2, idx4, idx3])
return vertices, np.array(faces)
def get_or_create_mesh(self, key: str, vertices, faces, color, opacity):
"""获取或创建mesh(对象池)"""
# 检查是否启用线框模式
wireframe_mode = self.gui_controls.get('wireframe_mode', None)
is_wireframe = wireframe_mode.value if wireframe_mode else False
# 线框模式:强制白色,完全不透明
if is_wireframe:
display_color = (255, 255, 255)
display_opacity = 1.0
else:
display_color = color
display_opacity = opacity
if key in self.mesh_handles_pool:
mesh = self.mesh_handles_pool[key]
mesh.vertices = vertices
mesh.vertex_colors = None
mesh.wireframe = is_wireframe
mesh.opacity = display_opacity
mesh.visible = True
# 更新颜色
color_array = np.array(display_color, dtype=np.uint8)
if color_array.max() <= 1.0:
color_array = (color_array * 255).astype(np.uint8)
mesh.color = tuple(color_array)
else:
# 创建新mesh
color_array = np.array(display_color, dtype=np.uint8)
if color_array.max() <= 1.0:
color_array = (color_array * 255).astype(np.uint8)
mesh = self.server.scene.add_mesh_simple(
name=f"/mesh_{key}",
vertices=vertices,
faces=faces,
color=tuple(color_array),
opacity=display_opacity,
wireframe=is_wireframe,
flat_shading=False
)
self.mesh_handles_pool[key] = mesh
return mesh
def clear_visualization(self):
"""清空可视化"""
# 隐藏所有mesh
for mesh in self.mesh_handles_pool.values():
mesh.visible = False
# 清空句柄列表
self.superquadric_handles = []
self.gt_superquadric_handles = []
# 删除点云
if self.point_cloud_handle is not None:
self.point_cloud_handle.remove()
self.point_cloud_handle = None
# 删除相机椎体/RGB
for handle in self.camera_frustum_handles:
handle.remove()
self.camera_frustum_handles = []
if self.camera_rgb_handle is not None:
self.camera_rgb_handle.remove()
self.camera_rgb_handle = None
# 删除坐标系
if self.coordinate_frame_handle is not None:
self.coordinate_frame_handle.remove()
self.coordinate_frame_handle = None
# 删除物体信息标签
for handle in self.object_label_handles:
try:
handle.remove()
except (KeyError, AttributeError):
# 标签可能已经被删除,忽略错误
pass
self.object_label_handles = []
def _on_capture_camera(self, event):
"""捕获当前相机视角"""
clients = list(self.server.get_clients().values())
if not clients:
print("⚠️ 没有连接的客户端")
self.gui_controls['export_status'].value = "错误: 没有连接的客户端"
return
# 获取第一个客户端的相机参数
client = clients[0]
self.export_camera_pos = np.array(client.camera.position)
self.export_camera_wxyz = np.array(client.camera.wxyz)
print(f"📸 已捕获相机视角: pos={self.export_camera_pos}, wxyz={self.export_camera_wxyz}")
self.gui_controls['export_status'].value = f"已捕获视角: {self.export_camera_pos}"
def _on_export_viser(self, event):
"""导出为viser场景文件(可交互)"""
if self.current_sample_path is None:
print("⚠️ 请先加载样本")
self.gui_controls['export_status'].value = "错误: 请先加载样本"
return
if self.original_frame_count <= 0:
print("⚠️ 没有帧可以导出")
self.gui_controls['export_status'].value = "错误: 没有帧可以导出"
return
# 在后台线程导出
threading.Thread(target=self._export_viser_thread, daemon=True).start()
def _export_viser_thread(self):
"""导出viser场景文件(带动画)"""
try:
print(f"\n{'='*60}")
print(f"💾 开始导出Viser场景")
print(f"{'='*60}")
# 获取当前客户端的相机参数
clients = list(self.server.get_clients().values())
camera_params = None
if clients:
client = clients[0]
cam_pos = client.camera.position
cam_lookat = client.camera.look_at
cam_up = client.camera.up_direction
# 生成viser URL参数格式
camera_params = (
f"&initialCameraPosition={cam_pos[0]:.3f},{cam_pos[1]:.3f},{cam_pos[2]:.3f}"
f"&initialCameraLookAt={cam_lookat[0]:.3f},{cam_lookat[1]:.3f},{cam_lookat[2]:.3f}"
f"&initialCameraUp={cam_up[0]:.3f},{cam_up[1]:.3f},{cam_up[2]:.3f}"
)
print(f" 📸 记录相机视角:")
print(f" 位置: {cam_pos}")
print(f" 朝向: {cam_lookat}")
print(f" 向上: {cam_up}")
# 获取FPS
fps = int(self.gui_controls['fps_slider'].value)
# 创建输出目录
output_dir = self.core_space_dir / "exports"
output_dir.mkdir(exist_ok=True)
# 生成文件名
selected_output = self.gui_controls['output_selector'].value
sample_idx = int(self.gui_controls['sample_slider'].value)
step_info = "unknown"
if "step" in selected_output:
try:
step_part = selected_output.split("_step")[1].split("_")[0]
step_info = f"step{step_part}"
except:
pass
timestamp = time.strftime("%Y%m%d_%H%M%S")
experiment_name = selected_output.split("_")[0]
output_file = output_dir / f"{experiment_name}_{step_info}_sample{sample_idx}_{timestamp}.viser"
print(f" 输出文件: {output_file}")
print(f" 帧数: {self.original_frame_count}")
print(f" FPS: {fps}")
# 获取场景序列化器
serializer = self.server.get_scene_serializer()
# 记录初始状态(第一帧)
self.visualize_frame(0)
serializer.insert_sleep(1.0 / fps)
# 逐帧更新并记录
for frame_idx in range(1, self.original_frame_count):
self.export_progress = int((frame_idx + 1) / self.original_frame_count * 100)
self.gui_controls['export_status'].value = f"导出中... {self.export_progress}%"
# 更新场景(会自动更新viser场景)
self.visualize_frame(frame_idx)
# 添加帧延迟
serializer.insert_sleep(1.0 / fps)
print(f" 记录帧 {frame_idx+1}/{self.original_frame_count}")
# 序列化并保存
data = serializer.serialize()
output_file.write_bytes(data)
print(f"✅ 场景导出完成: {output_file}")
print(f" 文件大小: {len(data) / 1024 / 1024:.2f} MB")
print(f"\n📖 查看方式:")
print(f" 1. 安装viser客户端: viser-build-client --output-dir viser-client/")
print(f" 2. 启动HTTP服务器: python -m http.server 8000")
# 生成完整URL(带相机参数)
base_url = f"http://localhost:8000/viser-client/?playbackPath=http://localhost:8000/exports/{output_file.name}"
if camera_params:
full_url = base_url + camera_params
print(f" 3. 打开浏览器(带相机视角):")
print(f" {full_url}")
else:
print(f" 3. 打开浏览器:")
print(f" {base_url}")
relative_path = output_file.relative_to(self.core_space_dir)
self.gui_controls['export_status'].value = f"完成! {relative_path}"
# 提供下载
clients = list(self.server.get_clients().values())
if clients:
clients[0].send_file_download(output_file.name, data)
print(f" 💾 已发送下载到浏览器")
except Exception as e:
print(f"❌ 导出失败: {e}")
import traceback
traceback.print_exc()
self.gui_controls['export_status'].value = f"错误: {str(e)}"
def _on_export_video(self, event):
"""导出视频"""
if self.is_exporting:
print("⚠️ 正在导出中,请等待...")
return
if self.current_sample_path is None:
print("⚠️ 请先加载样本")
self.gui_controls['export_status'].value = "错误: 请先加载样本"
return
if self.original_frame_count <= 0:
print("⚠️ 没有帧可以导出")
self.gui_controls['export_status'].value = "错误: 没有帧可以导出"
return
# 检查是否有连接的客户端
clients = list(self.server.get_clients().values())
if not clients:
print("⚠️ 没有连接的客户端")
self.gui_controls['export_status'].value = "错误: 请先在浏览器中打开viser界面"
return
# 每次导出都获取最新的相机视角(重要!)
# 无论之前是否捕获过,都使用当前最新的视角
client = clients[0]
self.export_camera_pos = np.array(client.camera.position)
self.export_camera_wxyz = np.array(client.camera.wxyz)
print(f"📸 使用当前视角: pos={self.export_camera_pos}, wxyz={self.export_camera_wxyz}")
# 在后台线程导出视频
threading.Thread(target=self._export_video_thread_screenshot, daemon=True).start()
def _export_video_thread(self):
"""视频导出线程"""
try:
self.is_exporting = True
self.gui_controls['export_status'].value = "正在导出..."
# 确保场景归一化参数已设置(通过可视化当前帧来初始化)
if not hasattr(self, 'scene_center') or self.scene_center is None:
print(" 初始化场景参数...")
self.visualize_frame(self.current_frame)
# 获取参数
fps = int(self.gui_controls['fps_slider'].value)
resolution = int(self.gui_controls['export_resolution'].value)
# 创建输出目录 - 放在core_space根目录下
output_dir = self.core_space_dir / "exports"
output_dir.mkdir(exist_ok=True)
# 提取实验信息
selected_output = self.gui_controls['output_selector'].value
sample_idx = int(self.gui_controls['sample_slider'].value)
# 从输出名称提取步数 (例如: 20251205_184253_step5_text2wave -> step5)
step_info = "unknown"
if "step" in selected_output:
try:
step_part = selected_output.split("_step")[1].split("_")[0]
step_info = f"step{step_part}"
except:
pass
# 生成输出文件名: {实验名}_{step}_sample{idx}_{timestamp}.mp4
timestamp = time.strftime("%Y%m%d_%H%M%S")
experiment_name = selected_output.split("_")[0] # 取日期部分作为实验名
output_file = output_dir / f"{experiment_name}_{step_info}_sample{sample_idx}_{timestamp}.mp4"
print(f"\n{'='*60}")
print(f"🎬 开始导出视频")
print(f"{'='*60}")
print(f" 实验: {selected_output}")
print(f" 样本: {sample_idx}")
print(f" 输出文件: {output_file}")
print(f" 帧数: {self.original_frame_count}")
print(f" FPS: {fps}")
print(f" 分辨率: {resolution}x{resolution}")
print(f" 相机位置: {self.export_camera_pos}")
print(f" 相机旋转: {self.export_camera_wxyz}")
# 尝试使用imageio(更好的兼容性),如果不可用则使用OpenCV
try:
import imageio
use_imageio = True
print(" 使用 imageio 进行视频编码(H.264)")
except ImportError:
use_imageio = False
print(" 使用 OpenCV 进行视频编码")
if use_imageio:
# 使用imageio-ffmpeg,生成高兼容性的H.264视频
# 注意:必须指定format='FFMPEG'来确保使用FFmpeg插件
writer = imageio.get_writer(
str(output_file),
format='FFMPEG',
mode='I',
fps=fps,
codec='libx264',
pixelformat='yuv420p', # 确保兼容性
output_params=['-crf', '18'] # H.264质量参数,18是高质量
)
# 渲染每一帧
for frame_idx in range(self.original_frame_count):
self.export_progress = int((frame_idx + 1) / self.original_frame_count * 100)
self.gui_controls['export_status'].value = f"导出中... {self.export_progress}%"
# 渲染帧
frame_image = self._render_frame_offline(
frame_idx,
resolution=resolution,
camera_pos=self.export_camera_pos,
camera_wxyz=self.export_camera_wxyz
)
# 写入视频(imageio需要RGB格式)
if frame_image is not None:
writer.append_data(frame_image)
print(f" 渲染帧 {frame_idx+1}/{self.original_frame_count}")
writer.close()
else:
# 使用OpenCV,尝试更兼容的编码器
# 尝试顺序: H264 -> avc1 -> X264 -> mp4v
codecs_to_try = [
('H264', 'H.264'),
('avc1', 'H.264 (AVC1)'),
('X264', 'X264'),
('mp4v', 'MPEG-4')
]
writer = None
used_codec = None
for codec_fourcc, codec_name in codecs_to_try:
try:
fourcc = cv2.VideoWriter_fourcc(*codec_fourcc)
test_writer = cv2.VideoWriter(
str(output_file),
fourcc,
fps,
(resolution, resolution)
)
if test_writer.isOpened():
writer = test_writer
used_codec = codec_name
print(f" 使用编码器: {codec_name}")
break
else:
test_writer.release()
except:
continue
if writer is None:
raise RuntimeError("无法初始化视频编码器")
# 渲染每一帧
for frame_idx in range(self.original_frame_count):
self.export_progress = int((frame_idx + 1) / self.original_frame_count * 100)
self.gui_controls['export_status'].value = f"导出中... {self.export_progress}%"
# 渲染帧
frame_image = self._render_frame_offline(
frame_idx,
resolution=resolution,
camera_pos=self.export_camera_pos,
camera_wxyz=self.export_camera_wxyz
)
# 写入视频(OpenCV需要BGR格式)
if frame_image is not None:
writer.write(cv2.cvtColor(frame_image, cv2.COLOR_RGB2BGR))
print(f" 渲染帧 {frame_idx+1}/{self.original_frame_count}")
writer.release()
print(f"✅ 视频导出完成: {output_file}")
relative_path = output_file.relative_to(self.core_space_dir)
self.gui_controls['export_status'].value = f"完成! {relative_path}"
except Exception as e:
print(f"❌ 导出视频失败: {e}")
import traceback
traceback.print_exc()
self.gui_controls['export_status'].value = f"错误: {str(e)}"
finally:
self.is_exporting = False
def _export_video_thread_screenshot(self):
"""视频导出线程(基于截图viser界面)"""
try:
self.is_exporting = True
self.gui_controls['export_status'].value = "正在导出..."
# 获取参数
fps = int(self.gui_controls['fps_slider'].value)
# 创建输出目录
output_dir = self.core_space_dir / "exports"
output_dir.mkdir(exist_ok=True)
# 提取实验信息并生成文件名
selected_output = self.gui_controls['output_selector'].value
sample_idx = int(self.gui_controls['sample_slider'].value)
step_info = "unknown"
if "step" in selected_output:
try:
step_part = selected_output.split("_step")[1].split("_")[0]
step_info = f"step{step_part}"
except:
pass
timestamp = time.strftime("%Y%m%d_%H%M%S")
experiment_name = selected_output.split("_")[0]
output_file = output_dir / f"{experiment_name}_{step_info}_sample{sample_idx}_{timestamp}.mp4"
print(f"\n{'='*60}")
print(f"🎬 开始导出视频(截图模式)")
print(f"{'='*60}")
print(f" 实验: {selected_output}")
print(f" 样本: {sample_idx}")
print(f" 输出文件: {output_file}")
print(f" 帧数: {self.original_frame_count}")
print(f" FPS: {fps}")
print(f" 方法: 直接截取Viser显示画面")
# 检查selenium
try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time as time_module
use_selenium = True
print(" ✅ 使用 Selenium 截图")
except ImportError:
print(" ⚠️ Selenium未安装,使用逐帧渲染方法")
print(" 提示: pip install selenium")
use_selenium = False
if use_selenium:
# 使用Selenium截图方法
frames = []
# 配置Chrome
chrome_options = Options()
chrome_options.add_argument('--headless') # 无头模式
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
url = f"http://localhost:{self.port}"
driver.get(url)
print(f" 📱 打开浏览器: {url}")
# 等待页面加载
time_module.sleep(3)
# 逐帧截图
for frame_idx in range(self.original_frame_count):
self.export_progress = int((frame_idx + 1) / self.original_frame_count * 100)
self.gui_controls['export_status'].value = f"截图中... {self.export_progress}%"
# 通过GUI更新帧
self.gui_controls['frame_slider'].value = frame_idx
time_module.sleep(0.3) # 等待渲染
# 截图
screenshot = driver.get_screenshot_as_png()
img = cv2.imdecode(np.frombuffer(screenshot, np.uint8), cv2.IMREAD_COLOR)
frames.append(img)
print(f" 截图帧 {frame_idx+1}/{self.original_frame_count}")
driver.quit()
# 使用imageio写入视频
try:
import imageio
writer = imageio.get_writer(
str(output_file),
format='FFMPEG',
mode='I',
fps=fps,
codec='libx264',
pixelformat='yuv420p',
output_params=['-crf', '18']
)
for frame in frames:
# 转换BGR到RGB
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
writer.append_data(frame_rgb)
writer.close()
print(f"✅ 视频导出完成: {output_file}")
relative_path = output_file.relative_to(self.core_space_dir)
self.gui_controls['export_status'].value = f"完成! {relative_path}"
except ImportError:
# 使用OpenCV写入
height, width = frames[0].shape[:2]
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(str(output_file), fourcc, fps, (width, height))
for frame in frames:
writer.write(frame)
writer.release()
print(f"✅ 视频导出完成: {output_file}")
relative_path = output_file.relative_to(self.core_space_dir)
self.gui_controls['export_status'].value = f"完成! {relative_path}"
except Exception as e:
print(f"❌ Selenium截图失败: {e}")
import traceback
traceback.print_exc()
# 回退到渲染方法
use_selenium = False
if not use_selenium:
# 回退到原来的渲染方法
print(" 使用PyRender离线渲染...")
self._export_video_thread()
return
except Exception as e:
print(f"❌ 导出视频失败: {e}")
import traceback
traceback.print_exc()
self.gui_controls['export_status'].value = f"错误: {str(e)}"
finally:
self.is_exporting = False
def _render_frame_offline(self, frame_idx: int, resolution: int,
camera_pos: np.ndarray, camera_wxyz: np.ndarray) -> Optional[np.ndarray]:
"""离线渲染一帧"""
# 尝试导入pyrender
try:
import pyrender
import trimesh
except ImportError:
if frame_idx == 0:
print("⚠️ pyrender未安装,使用简化渲染...")
print(" 提示: 安装 pyrender 以获得完整3D渲染")
print(" pip install pyrender trimesh")
return self._render_frame_simple(frame_idx, resolution)
# 设置PyRender使用离屏渲染(EGL或OSMesa)
# 优先尝试EGL,如果失败则尝试OSMesa
for platform in ['egl', 'osmesa']:
try:
os.environ['PYOPENGL_PLATFORM'] = platform
# 创建场景 - 设置深蓝色背景(与viser一致)
scene = pyrender.Scene(
ambient_light=[0.3, 0.3, 0.3],
bg_color=[13/255, 13/255, 38/255, 1.0] # 深蓝色背景
)
# 获取GUI参数
show_generated = self.gui_controls['show_generated'].value
show_gt = self.gui_controls['show_gt'].value
generated_color = np.array(self.gui_controls['generated_color'].value) / 255.0
gt_color = np.array(self.gui_controls['gt_color'].value) / 255.0
mesh_resolution = int(self.gui_controls['mesh_resolution'].value)
mesh_count = 0
# 添加生成的超二次曲面
# 重要:需要应用场景归一化,使物体坐标与相机坐标在同一空间
if show_generated:
predictions = self._extract_predictions(frame_idx)
if predictions is not None:
for obj_idx, obj_params in enumerate(predictions):
if obj_params[0] > 0.5:
# 复制参数并应用场景归一化到平移部分
obj_params_normalized = obj_params.copy()
# 归一化平移: (translation - scene_center) * scene_scale
translation = obj_params[6:9]
translation_normalized = (translation - self.scene_center) * self.scene_scale
obj_params_normalized[6:9] = translation_normalized
# 归一化缩放: scale * scene_scale
obj_params_normalized[3:6] = obj_params[3:6] * self.scene_scale
vertices, faces = self.generate_superquadric_mesh(
obj_params_normalized, num_samples=mesh_resolution
)
if frame_idx == 0 and obj_idx == 0:
print(f" 物体原始位置: {translation}")
print(f" 物体归一化位置: {translation_normalized}")
print(f" 场景中心: {self.scene_center}, 缩放: {self.scene_scale}")
mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
# 为每个顶点设置颜色 (N, 4) - RGBA
num_verts = len(vertices)
vertex_colors = np.zeros((num_verts, 4), dtype=np.uint8)
vertex_colors[:, :3] = (generated_color * 255).astype(np.uint8) # RGB
vertex_colors[:, 3] = 255 # 完全不透明
mesh.visual.vertex_colors = vertex_colors
# 创建PyRender材质
material = pyrender.MetallicRoughnessMaterial(
baseColorFactor=list(generated_color) + [1.0],
metallicFactor=0.3,
roughnessFactor=0.7
)
mesh_obj = pyrender.Mesh.from_trimesh(mesh, material=material)
scene.add(mesh_obj)
mesh_count += 1
# 添加GT超二次曲面
if show_gt:
targets = self._extract_targets(frame_idx)
if targets is not None:
for obj_idx, obj_params in enumerate(targets):
if obj_params[0] > 0.5:
# 复制参数并应用场景归一化
obj_params_normalized = obj_params.copy()
translation = obj_params[6:9]
translation_normalized = (translation - self.scene_center) * self.scene_scale
obj_params_normalized[6:9] = translation_normalized
obj_params_normalized[3:6] = obj_params[3:6] * self.scene_scale
vertices, faces = self.generate_superquadric_mesh(
obj_params_normalized, num_samples=mesh_resolution
)
mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
# 为每个顶点设置颜色 (N, 4) - RGBA
num_verts = len(vertices)
vertex_colors = np.zeros((num_verts, 4), dtype=np.uint8)
vertex_colors[:, :3] = (gt_color * 255).astype(np.uint8) # RGB
vertex_colors[:, 3] = 255 # 完全不透明
mesh.visual.vertex_colors = vertex_colors
# 创建PyRender材质
material = pyrender.MetallicRoughnessMaterial(
baseColorFactor=list(gt_color) + [0.5],
metallicFactor=0.3,
roughnessFactor=0.7
)
mesh_obj = pyrender.Mesh.from_trimesh(mesh, material=material)
scene.add(mesh_obj)
mesh_count += 1
if frame_idx == 0:
print(f" 场景中添加了 {mesh_count} 个mesh")
# 设置相机
# Viser使用的是wxyz四元数,需要转换为PyRender的变换矩阵
from scipy.spatial.transform import Rotation as R
# wxyz -> xyzw for scipy
rot = R.from_quat([camera_wxyz[1], camera_wxyz[2], camera_wxyz[3], camera_wxyz[0]])
rot_matrix = rot.as_matrix()
# PyRender使用OpenGL坐标系
# 构建相机变换矩阵
camera_pose = np.eye(4)
camera_pose[:3, :3] = rot_matrix
camera_pose[:3, 3] = camera_pos
if frame_idx == 0:
print(f" 相机位置: {camera_pos}")
print(f" 相机旋转矩阵:\n{rot_matrix}")
# 创建透视相机
camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
scene.add(camera, pose=camera_pose)
# 添加多个光源以确保场景被充分照亮
# 主光源跟随相机
light1 = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=2.0)
scene.add(light1, pose=camera_pose)
# 额外的环境光源
light2 = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1.0)
light_pose = np.eye(4)
light_pose[:3, 3] = [10, 10, 10]
scene.add(light2, pose=light_pose)
# 渲染
renderer = pyrender.OffscreenRenderer(resolution, resolution)
color, depth = renderer.render(scene)
renderer.delete()
# 首次成功时打印使用的平台和渲染统计
if frame_idx == 0:
print(f" ✅ 使用 {platform.upper()} 进行离线渲染")
print(f" 渲染输出范围: [{color.min()}, {color.max()}]")
print(f" 深度范围: [{depth.min()}, {depth.max()}]")
return color
except Exception as e:
if platform == 'osmesa':
# 两种方式都失败了
if frame_idx == 0:
print(f"❌ PyRender渲染失败 (EGL和OSMesa都不可用): {e}")
print(" 使用简化渲染模式...")
return self._render_frame_simple(frame_idx, resolution)
# EGL失败,继续尝试OSMesa
continue
# 不应该到达这里,但以防万一
return self._render_frame_simple(frame_idx, resolution)
def _render_frame_simple(self, frame_idx: int, resolution: int) -> np.ndarray:
"""简化渲染(纯色背景 + 文字提示)"""
# 创建空白图像
image = np.full((resolution, resolution, 3), [13, 13, 38], dtype=np.uint8)
# 添加文字
text = f"Frame {frame_idx + 1}/{self.original_frame_count}"
font = cv2.FONT_HERSHEY_SIMPLEX
text_size = cv2.getTextSize(text, font, 1, 2)[0]
text_x = (resolution - text_size[0]) // 2
text_y = (resolution + text_size[1]) // 2
cv2.putText(image, text, (text_x, text_y), font, 1, (255, 255, 255), 2)
# 添加提示信息
hint = "Install pyrender for full rendering"
hint_size = cv2.getTextSize(hint, font, 0.5, 1)[0]
hint_x = (resolution - hint_size[0]) // 2
hint_y = text_y + 40
cv2.putText(image, hint, (hint_x, hint_y), font, 0.5, (150, 150, 150), 1)
return image
def run(self, auto_open_browser: bool = True):
"""运行可视化器"""
print("\n" + "="*60)
print("🎨 WaveGen 训练可视化器")
print("="*60)
print(f"📁 监控目录: {self.core_space_dir}")
print(f"🌐 Web界面: http://localhost:{self.port}")
print("="*60)
print("\n💡 提示:")
print(" - 如果页面空白一直加载,请刷新浏览器 (Ctrl+Shift+R)")
print(" - 建议使用 Chrome 或 Firefox 浏览器")
print("\n按 Ctrl+C 退出\n")
# 自动打开浏览器
if auto_open_browser:
url = f"http://localhost:{self.port}"
print(f"🌐 正在打开浏览器: {url}")
try:
webbrowser.open(url)
except Exception as e:
print(f"⚠️ 无法自动打开浏览器: {e}")
print(f" 请手动访问: {url}")
try:
while True:
time.sleep(0.1)
except KeyboardInterrupt:
print("\n👋 再见!")
print("正在关闭服务器...")
# 清理资源
try:
for mesh in self.mesh_handles_pool.values():
mesh.remove()
except:
pass
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description="WaveGen训练结果可视化工具")
parser.add_argument(
'--core-space',
type=str,
default='core_space',
help='core_space目录路径(默认: ./core_space)'
)
parser.add_argument(
'--port',
type=int,
default=8080,
help='Viser服务器端口(默认: 8080,如果被占用会自动尝试下一个端口)'
)
parser.add_argument(
'--no-browser',
action='store_true',
help='不自动打开浏览器'
)
args = parser.parse_args()
# 创建并运行可视化器
visualizer = TrainingVisualizer(core_space_dir=args.core_space, port=args.port)
visualizer.run(auto_open_browser=not args.no_browser)
if __name__ == "__main__":
main()