depthsplat / src /model /model_wrapper.py

Upload folder using huggingface_hub

a6dd040 verified 5 months ago

55.9 kB

	from dataclasses import dataclass
	from pathlib import Path
	from typing import Optional, Protocol, runtime_checkable

	import moviepy.editor as mpy
	import torch
	# import wandb
	import swanlab as wandb

	from einops import pack, rearrange, repeat, einsum
	from jaxtyping import Float
	from pytorch_lightning import LightningModule
	# from pytorch_lightning.loggers.wandb import WandbLogger
	from swanlab.integration.pytorch_lightning import SwanLabLogger


	from pytorch_lightning.utilities import rank_zero_only
	from torch import Tensor, nn, optim
	import numpy as np
	import json
	import os
	import time
	from tqdm import tqdm
	import torch.nn.functional as F
	import math

	from ..dataset.data_module import get_data_shim
	from ..dataset.types import BatchedExample
	from ..dataset import DatasetCfg
	from ..evaluation.metrics import compute_lpips, compute_psnr, compute_ssim
	from ..global_cfg import get_cfg
	from ..loss import Loss
	from ..misc.benchmarker import Benchmarker
	from ..misc.image_io import prep_image, save_image, save_video
	from ..misc.LocalLogger import LOG_PATH, LocalLogger
	from ..misc.step_tracker import StepTracker
	from ..visualization.annotation import add_label
	from ..visualization.camera_trajectory.interpolation import (
	interpolate_extrinsics,
	interpolate_intrinsics,
	)
	from ..visualization.camera_trajectory.wobble import (
	generate_wobble,
	generate_wobble_transformation,
	)
	from ..visualization.color_map import apply_color_map_to_image
	from ..visualization.layout import add_border, hcat, vcat
	from ..visualization.validation_in_3d import render_cameras, render_projections
	from .decoder.decoder import Decoder, DepthRenderingMode, DecoderOutput
	from .encoder import Encoder
	from .encoder.visualization.encoder_visualizer import EncoderVisualizer
	from src.visualization.vis_depth import viz_depth_tensor
	from PIL import Image
	from ..misc.stablize_camera import render_stabilization_path
	from .ply_export import save_gaussian_ply
	from .encoder.encoder_volsplat import print_mem
	import MinkowskiEngine as ME

	###测试###
	from ..test.visual import save_output_images
	from ..test.export_ply import export_raw_points_step

	# import debugpy
	# try:
	# # 5678 is the default attach port in the VS Code debug configurations. Unless a host and port are specified, host defaults to 127.0.0.1
	# debugpy.listen(("localhost", 9326))
	# print("Waiting for debugger attach")
	# debugpy.wait_for_client()
	# except Exception as e:
	# pass


	@dataclass
	class OptimizerCfg:
	lr: float
	warm_up_steps: int
	lr_monodepth: float
	weight_decay: float


	@dataclass
	class TestCfg:
	output_path: Path
	compute_scores: bool
	save_image: bool
	save_video: bool
	eval_time_skip_steps: int
	save_gt_image: bool
	save_input_images: bool
	save_depth: bool
	save_depth_concat_img: bool
	save_depth_npy: bool
	save_gaussian: bool
	render_chunk_size: int \| None
	stablize_camera: bool
	stab_camera_kernel: int


	@dataclass
	class TrainCfg:
	depth_mode: DepthRenderingMode \| None
	extended_visualization: bool
	print_log_every_n_steps: int
	eval_model_every_n_val: int
	eval_data_length: int
	eval_deterministic: bool
	eval_time_skip_steps: int
	eval_save_model: bool
	l1_loss: bool
	intermediate_loss_weight: float
	no_viz_video: bool
	viz_depth: bool
	forward_depth_only: bool
	train_ignore_large_loss: float
	no_log_projections: bool


	@runtime_checkable
	class TrajectoryFn(Protocol):
	def __call__(
	self,
	t: Float[Tensor, " t"],
	) -> tuple[
	Float[Tensor, "batch view 4 4"], # extrinsics
	Float[Tensor, "batch view 3 3"], # intrinsics
	]:
	pass


	class ModelWrapper(LightningModule):
	logger: Optional[SwanLabLogger]
	encoder: nn.Module
	encoder_visualizer: Optional[EncoderVisualizer]
	decoder: Decoder
	losses: nn.ModuleList
	optimizer_cfg: OptimizerCfg
	test_cfg: TestCfg
	train_cfg: TrainCfg
	step_tracker: StepTracker \| None
	eval_data_cfg: Optional[DatasetCfg \| None]

	def __init__(
	self,
	optimizer_cfg: OptimizerCfg,
	test_cfg: TestCfg,
	train_cfg: TrainCfg,
	encoder: Encoder,
	encoder_visualizer: Optional[EncoderVisualizer],
	decoder: Decoder,
	losses: list[Loss],
	step_tracker: StepTracker \| None,
	eval_data_cfg: Optional[DatasetCfg \| None] = None,
	) -> None:
	super().__init__()
	self.optimizer_cfg = optimizer_cfg
	self.test_cfg = test_cfg
	self.train_cfg = train_cfg
	self.step_tracker = step_tracker
	self.eval_data_cfg = eval_data_cfg

	# Set up the model.
	self.encoder = encoder
	self.encoder_visualizer = encoder_visualizer
	self.decoder = decoder
	self.data_shim = get_data_shim(self.encoder)
	self.losses = nn.ModuleList(losses)

	# This is used for testing.
	self.benchmarker = Benchmarker()
	self.eval_cnt = 0

	if self.test_cfg.compute_scores:
	self.test_step_outputs = {}
	self.time_skip_steps_dict = {"encoder": 0, "decoder": 0}

	# This is used for testing.
	self.benchmarker = Benchmarker()
	self.eval_cnt = 0

	if self.test_cfg.compute_scores:
	self.test_step_outputs = {}
	self.time_skip_steps_dict = {"encoder": 0, "decoder": 0}

	# MOD: debug开关（默认False）。要启用参数更新检查，在训练脚本里设置 model._check_param_updates = True
	self._check_param_updates = False
	# MOD: 保存训练 step 开始时的参数快照（仅在_check_param_updates=True时填充）
	self._before_params_snapshot = None

	def training_step(self, batch, batch_idx):
	batch: BatchedExample = self.data_shim(batch)
	_, _, _, h, w = batch["target"]["image"].shape
	_, views, _, _, _ = batch["context"]["image"].shape


	# MOD: 如果开启了参数更新检查，保存当前参数快照（用于在 optimizer.step 后对比）
	if getattr(self, "_check_param_updates", False):
	try:
	self._before_params_snapshot = self.snapshot_params()
	except Exception as e:
	print("[DEBUG] failed to snapshot params:", e)


	#######打印场景ID########
	print(f"Training step{self.global_step},Number of images:{views}:scene IDs:{batch['scene']}")

	# if self.global_step < 5000:
	# ues_voxelnet = True
	# else:
	# ues_voxelnet = True


	print_mem("before encoder")
	# Run the model. #ues_voxelnet=ues_voxelnet
	gaussians = self.encoder(
	batch["context"], self.global_step, False, scene_names=batch["scene"]
	)
	print_mem("after encoder")


	if isinstance(gaussians, dict) and len(gaussians) == 2:
	supervise_intermediate_depth = False
	pred_depths = gaussians["depths"]
	gaussians = gaussians["gaussians"]

	if isinstance(gaussians, dict) and len(gaussians) == 3:
	supervise_intermediate_depth = True
	pred_depths = gaussians["depths"]
	intermediate_gaussians = gaussians["intermediate_gaussians"]
	gaussians = gaussians["gaussians"]


	# ------------------ < 新增：检查 pred_depths 是否全为 0，并中断训练 > -----------------
	try:
	with torch.no_grad():
	pd = pred_depths.detach()
	# 期望 pred_depths shape = [B, V, H, W]
	B = pd.shape[0]
	# 若存在 NaN 或非数值项，先转换为 0（避免 sum 产生 NaN）
	pd = torch.nan_to_num(pd, nan=0.0, posinf=0.0, neginf=0.0)
	per_sample_sum = pd.abs().view(B, -1).sum(dim=1)
	zero_mask = (per_sample_sum == 0)

	if zero_mask.any():
	zero_idx = torch.nonzero(zero_mask, as_tuple=False).squeeze(1)
	# 处理 single-element 情况，统一为 list[int]
	if zero_idx.numel() == 1:
	zero_idx = [int(zero_idx.item())]
	else:
	zero_idx = [int(i.item()) for i in zero_idx]

	# 提取对应的 scene id（兼容 list/tuple/tensor/string 等多种形式）
	scene_ids = []
	for i in zero_idx:
	try:
	s = batch["scene"][i]
	except Exception:
	# 如果索引失败，跳过
	s = None
	# 将各种类型转换为可打印的 python 值
	try:
	if isinstance(s, torch.Tensor):
	# 若是单值 tensor
	if s.numel() == 1:
	scene_ids.append(s.item())
	else:
	# 向量 tensor，转为 list
	scene_ids.append(s.cpu().tolist())
	else:
	scene_ids.append(s)
	except Exception:
	scene_ids.append(str(s))

	# 多卡/分布式训练中只由 rank0 打印，避免重复日志
	is_rank0 = True
	try:
	is_rank0 = getattr(self.trainer, "global_rank", getattr(self, "global_rank", 0)) == 0
	except Exception:
	pass

	if is_rank0:
	print(f"[STOPPING] pred_depths all-zero detected for batch indices {zero_idx}; scene IDs: {scene_ids}")

	# 方式 A：设置 trainer 的停止标志（较为优雅）
	try:
	self.trainer.should_stop = True
	except Exception:
	pass

	# 方式 B：立即抛出异常以立刻中断训练（根据你的需求，选择保留或注释）
	raise RuntimeError(f"Stopping training because pred_depths are all zero for scenes: {scene_ids}")

	except Exception as e:
	# 如果检测逻辑本身出错，则打印 debug 信息并继续训练（以避免无意中挂起训练）
	print("[DEBUG] pred_depths zero-check failed or triggered stop. info:", e)
	# 如果你希望检测失败时也中断训练，可以在这里改为 raise
	# ------------------ < 新增结束 > ------------------


	print_mem("before decoder")
	#############设置中间深度监督###################
	if gaussians.means.size(0) != batch["target"]["extrinsics"].size(0):
	supervise_intermediate_depth = True
	assert gaussians.means.size(0) % batch["target"]["extrinsics"].size(0) == 0
	num_depths = gaussians.means.size(0) // batch["target"]["extrinsics"].size(
	0
	)
	# add loss to intermediate depth predictions
	target_extrinsics = torch.cat(
	[batch["target"]["extrinsics"]] * num_depths, dim=0
	)
	target_intrinsics = torch.cat(
	[batch["target"]["intrinsics"]] * num_depths, dim=0
	)
	target_near = torch.cat([batch["target"]["near"]] * num_depths, dim=0)
	target_far = torch.cat([batch["target"]["far"]] * num_depths, dim=0)
	#[2B, V, 3, 256, 448]
	output_all = self.decoder.forward(
	gaussians,
	target_extrinsics,
	target_intrinsics,
	target_near,
	target_far,
	(h, w),
	depth_mode=self.train_cfg.depth_mode,
	)


	# split
	batch_size = batch["target"]["extrinsics"].size(0)
	# order: intermediate depth, final depth
	output_intermediate = DecoderOutput(
	color=output_all.color[:-batch_size], #[B, V, 3, H, W]
	depth=(
	output_all.depth[:-batch_size]
	if output_all.depth is not None
	else None
	),
	)
	output = DecoderOutput(
	color=output_all.color[-batch_size:], #[B, V, 3, H, W]
	depth=(
	output_all.depth[-batch_size:]
	if output_all.depth is not None
	else None
	),
	)

	else:
	output = self.decoder.forward(
	gaussians,
	batch["target"]["extrinsics"],
	batch["target"]["intrinsics"],
	batch["target"]["near"],
	batch["target"]["far"],
	(h, w),
	depth_mode=self.train_cfg.depth_mode,
	)
	############################
	if supervise_intermediate_depth:
	output_intermediate = self.decoder.forward(
	intermediate_gaussians,
	batch["target"]["extrinsics"],
	batch["target"]["intrinsics"],
	batch["target"]["near"],
	batch["target"]["far"],
	(h, w),
	depth_mode=self.train_cfg.depth_mode,
	)

	print_mem("after decoder")

	target_gt = batch["target"]["image"]

	# Compute metrics.
	psnr_probabilistic = compute_psnr(
	rearrange(target_gt, "b v c h w -> (b v) c h w"),
	rearrange(output.color, "b v c h w -> (b v) c h w"),
	)

	#把图片打印出来看看效果
	# save_output_images(output.color, save_dir="/mnt/pfs/users/wangweijie/yeqing/BEV-Splat/outputs/out_image", prefix="prob_output")
	# save_output_images(target_gt, "/mnt/pfs/users/wangweijie/yeqing/BEV-Splat/outputs/rgb_image")

	self.log("train/psnr", psnr_probabilistic.mean())

	# Compute and log loss.
	total_loss = 0

	valid_depth_mask = None

	for loss_fn in self.losses:
	if loss_fn.name == "mse":
	loss = loss_fn.forward(
	output,
	batch,
	gaussians,
	self.global_step,
	l1_loss=self.train_cfg.l1_loss,
	clamp_large_error=self.train_cfg.train_ignore_large_loss,
	valid_depth_mask=valid_depth_mask,
	)
	else:
	loss = loss_fn.forward(
	output,
	batch,
	gaussians,
	self.global_step,
	valid_depth_mask=valid_depth_mask,
	)
	self.log(f"loss/{loss_fn.name}", loss)
	total_loss = total_loss + loss

	# color loss on intermediate output
	if supervise_intermediate_depth:
	for loss_fn in self.losses:
	batch_size = batch["target"]["extrinsics"].size(0)
	if output_intermediate.color.size(0) != batch_size:
	assert output_intermediate.color.size(0) % batch_size == 0
	num_intermediate = output_intermediate.color.size(0) // batch_size
	intermediate_loss = 0
	for i in range(num_intermediate):
	curr_output = DecoderOutput(
	color=output_intermediate.color[
	(batch_size * i) : (batch_size * (i + 1))
	],
	depth=(
	output_intermediate.depth[
	(batch_size * i) : (batch_size * (i + 1))
	]
	if output_intermediate.depth is not None
	else None
	),
	)
	curr_loss_weight = self.train_cfg.intermediate_loss_weight ** (
	num_intermediate - i
	)

	if loss_fn.name == "mse":
	loss = loss_fn.forward(
	curr_output,
	batch,
	gaussians,
	self.global_step,
	l1_loss=self.train_cfg.l1_loss,
	clamp_large_error=self.train_cfg.train_ignore_large_loss,
	valid_depth_mask=valid_depth_mask,
	)
	else:
	loss = loss_fn.forward(
	curr_output,
	batch,
	gaussians,
	self.global_step,
	valid_depth_mask=valid_depth_mask,
	)

	intermediate_loss = intermediate_loss + curr_loss_weight * loss

	self.log(f"loss/{loss_fn.name}_intermediate", intermediate_loss)
	total_loss = total_loss + intermediate_loss
	else:
	if loss_fn.name == "mse":
	loss = loss_fn.forward(
	output_intermediate,
	batch,
	gaussians,
	self.global_step,
	l1_loss=self.train_cfg.l1_loss,
	clamp_large_error=self.train_cfg.train_ignore_large_loss,
	valid_depth_mask=valid_depth_mask,
	)
	else:
	loss = loss_fn.forward(
	output_intermediate,
	batch,
	gaussians,
	self.global_step,
	valid_depth_mask=valid_depth_mask,
	)
	self.log(f"loss/{loss_fn.name}_intermediate", loss)
	total_loss = (
	total_loss + self.train_cfg.intermediate_loss_weight * loss
	)

	self.log("loss/total", total_loss)

	if (
	self.global_rank == 0
	and self.global_step % self.train_cfg.print_log_every_n_steps == 0
	):
	print(
	f"train step {self.global_step}; "
	f"scene = {[x[:20] for x in batch['scene']]}; "
	f"context = {batch['context']['index'].tolist()}; "
	f"bound = [{batch['context']['near'].detach().cpu().numpy().mean()} "
	f"{batch['context']['far'].detach().cpu().numpy().mean()}]; "
	f"loss = {total_loss:.6f}"
	)
	self.log("info/near", batch["context"]["near"].detach().cpu().numpy().mean())
	self.log("info/far", batch["context"]["far"].detach().cpu().numpy().mean())
	self.log("info/global_step", self.global_step) # hack for ckpt monitor

	# Tell the data loader processes about the current step.
	if self.step_tracker is not None:
	self.step_tracker.set_step(self.global_step)

	if self.global_step == 5 and self.global_rank == 0:
	os.system("nvidia-smi")

	return total_loss

	def test_step(self, batch, batch_idx):
	batch: BatchedExample = self.data_shim(batch)
	b, v, _, h, w = batch["target"]["image"].shape
	assert b == 1


	pred_depths = None

	# save input views for visualization
	if self.test_cfg.save_input_images:
	(scene,) = batch["scene"]
	self.test_cfg.output_path = os.path.join(get_cfg()["output_dir"], "metrics")
	path = Path(get_cfg()["output_dir"])

	input_images = batch["context"]["image"][0] # [V, 3, H, W]
	index = batch["context"]["index"][0]
	for idx, color in zip(index, input_images):
	save_image(color, path / "images" / scene / f"color/input_{idx:0>6}.png")

	# save depth vis
	if self.test_cfg.save_depth or self.test_cfg.save_gaussian:
	visualization_dump = {}
	else:
	visualization_dump = None

	# Render Gaussians.
	with self.benchmarker.time("encoder"):
	gaussians = self.encoder(
	batch["context"],
	self.global_step,
	deterministic=False,
	visualization_dump=visualization_dump,
	)

	if isinstance(gaussians, dict):
	pred_depths = gaussians["depths"]
	if "depth" in batch["context"]:
	depth_gt = batch["context"]["depth"]
	gaussians = gaussians["gaussians"]

	# save gaussians
	if self.test_cfg.save_gaussian:
	scene = batch["scene"][0]
	save_path = Path(get_cfg()['output_dir']) / 'gaussians' / (scene + '.ply')
	save_gaussian_ply(gaussians, visualization_dump, batch, save_path)

	if not self.train_cfg.forward_depth_only:
	with self.benchmarker.time("decoder", num_calls=v):

	camera_poses = batch["target"]["extrinsics"]

	if self.test_cfg.stablize_camera:
	stable_poses = render_stabilization_path(
	camera_poses[0].detach().cpu().numpy(),
	k_size=self.test_cfg.stab_camera_kernel,
	)

	stable_poses = list(
	map(
	lambda x: np.concatenate(
	(x, np.array([[0.0, 0.0, 0.0, 1.0]])), axis=0
	),
	stable_poses,
	)
	)
	stable_poses = torch.from_numpy(np.stack(stable_poses, axis=0)).to(
	camera_poses
	)
	camera_poses = stable_poses.unsqueeze(0)

	if self.test_cfg.render_chunk_size is not None:
	chunk_size = self.test_cfg.render_chunk_size
	num_chunks = math.ceil(camera_poses.shape[1] / chunk_size)

	output = None
	for i in range(num_chunks):
	start = chunk_size * i
	end = chunk_size * (i + 1)

	render_intrinsics = batch["target"]["intrinsics"]
	render_near = batch["target"]["near"]
	render_far = batch["target"]["far"]

	curr_output = self.decoder.forward(
	gaussians,
	camera_poses[:, start:end],
	render_intrinsics[:, start:end],
	render_near[:, start:end],
	render_far[:, start:end],
	(h, w),
	depth_mode=None,
	)

	if i == 0:
	output = curr_output
	else:
	# ignore depth
	output.color = torch.cat(
	(output.color, curr_output.color), dim=1
	)

	else:
	output = self.decoder.forward(
	gaussians,
	camera_poses,
	batch["target"]["intrinsics"],
	batch["target"]["near"],
	batch["target"]["far"],
	(h, w),
	depth_mode=None,
	)

	(scene,) = batch["scene"]
	self.test_cfg.output_path = os.path.join(get_cfg()["output_dir"], "metrics")
	path = Path(get_cfg()["output_dir"])

	# save depth
	if self.test_cfg.save_depth:
	if self.train_cfg.forward_depth_only:
	depth = pred_depths[0].cpu().detach() # [V, H, W]
	else:
	depth = (
	visualization_dump["depth"][0, :, :, :, 0, 0].cpu().detach()
	) # [V, H, W]

	index = batch["context"]["index"][0]

	if self.test_cfg.save_depth_concat_img:
	# concat (img0, img1, depth0, depth1)
	image = batch['context']['image'][0] # [V, 3, H, W] in [0,1]
	image = rearrange(image, "b c h w -> h (b w) c") # [H, VW, 3]
	image_concat = (image.detach().cpu().numpy() * 255).astype(np.uint8) # [H, VW, 3]

	depth_concat = []

	for idx, depth_i in zip(index, depth):
	depth_viz = viz_depth_tensor(
	1.0 / depth_i, return_numpy=True
	) # [H, W, 3]

	if self.test_cfg.save_depth_concat_img:
	depth_concat.append(depth_viz)

	save_path = path / "images" / scene / "depth" / f"{idx:0>6}.png"
	save_dir = os.path.dirname(save_path)
	os.makedirs(save_dir, exist_ok=True)
	Image.fromarray(depth_viz).save(save_path)

	# save depth as npy
	if self.test_cfg.save_depth_npy:
	depth_npy = depth_i.detach().cpu().numpy()
	save_path = path / "images" / scene / "depth" / f"{idx:0>6}.npy"
	save_dir = os.path.dirname(save_path)
	os.makedirs(save_dir, exist_ok=True)
	np.save(save_path, depth_npy)

	if self.test_cfg.save_depth_concat_img:
	depth_concat = np.concatenate(depth_concat, axis=1) # [H, VW, 3]
	concat = np.concatenate((image_concat, depth_concat), axis=0) # [2H, VW, 3]

	save_path = path / "images" / scene / "depth" / f"img_depth_{scene}.png"
	save_dir = os.path.dirname(save_path)
	os.makedirs(save_dir, exist_ok=True)
	Image.fromarray(concat).save(save_path)

	if self.train_cfg.forward_depth_only:
	return

	images_prob = output.color[0]
	rgb_gt = batch["target"]["image"][0]

	# Save images.
	if self.test_cfg.save_image:
	if self.test_cfg.save_gt_image:
	for index, color, gt in zip(
	batch["target"]["index"][0], images_prob, rgb_gt
	):
	save_image(color, path / "images" / scene / f"color/{index:0>6}.png")
	save_image(gt, path / "images" / scene / f"color/{index:0>6}_gt.png")
	else:
	for index, color in zip(batch["target"]["index"][0], images_prob):
	save_image(color, path / "images" / scene / f"color/{index:0>6}.png")

	# save video
	if self.test_cfg.save_video:
	frame_str = "_".join([str(x.item()) for x in batch["context"]["index"][0]])
	save_video(
	[a for a in images_prob],
	path / "videos" / f"{scene}_frame_{frame_str}.mp4",
	)

	# compute scores
	if self.test_cfg.compute_scores:
	if batch_idx < self.test_cfg.eval_time_skip_steps:
	self.time_skip_steps_dict["encoder"] += 1
	self.time_skip_steps_dict["decoder"] += v

	if not self.train_cfg.forward_depth_only:
	rgb = images_prob

	if f"psnr" not in self.test_step_outputs:
	self.test_step_outputs[f"psnr"] = []
	if f"ssim" not in self.test_step_outputs:
	self.test_step_outputs[f"ssim"] = []
	if f"lpips" not in self.test_step_outputs:
	self.test_step_outputs[f"lpips"] = []

	self.test_step_outputs[f"psnr"].append(
	compute_psnr(rgb_gt, rgb).mean().item()
	)
	self.test_step_outputs[f"ssim"].append(
	compute_ssim(rgb_gt, rgb).mean().item()
	)
	self.test_step_outputs[f"lpips"].append(
	compute_lpips(rgb_gt, rgb).mean().item()
	)

	def on_test_end(self) -> None:
	out_dir = Path(self.test_cfg.output_path)
	saved_scores = {}
	if self.test_cfg.compute_scores:
	self.benchmarker.dump_memory(out_dir / "peak_memory.json")
	self.benchmarker.dump(out_dir / "benchmark.json")

	for metric_name, metric_scores in self.test_step_outputs.items():
	avg_scores = sum(metric_scores) / len(metric_scores)
	saved_scores[metric_name] = avg_scores
	print(metric_name, avg_scores)
	with (out_dir / f"scores_{metric_name}_all.json").open("w") as f:
	json.dump(metric_scores, f)
	metric_scores.clear()

	for tag, times in self.benchmarker.execution_times.items():
	times = times[int(self.time_skip_steps_dict[tag]) :]
	saved_scores[tag] = [len(times), np.mean(times)]
	print(
	f"{tag}: {len(times)} calls, avg. {np.mean(times)} seconds per call"
	)
	self.time_skip_steps_dict[tag] = 0

	with (out_dir / f"scores_all_avg.json").open("w") as f:
	json.dump(saved_scores, f)
	self.benchmarker.clear_history()
	else:
	self.benchmarker.dump(out_dir / "benchmark.json")
	self.benchmarker.dump_memory(out_dir / "peak_memory.json")
	self.benchmarker.summarize()

	@rank_zero_only
	def validation_step(self, batch, batch_idx):
	batch: BatchedExample = self.data_shim(batch)

	if self.global_rank == 0:
	print(
	f"validation step {self.global_step}; "
	f"scene = {[a[:20] for a in batch['scene']]}; "
	f"context = {batch['context']['index'].tolist()}"
	)

	# Render Gaussians.
	b, _, _, h, w = batch["target"]["image"].shape
	assert b == 1
	gaussians_softmax = self.encoder(
	batch["context"],
	self.global_step,
	deterministic=False,
	)

	pred_depths = None

	if isinstance(gaussians_softmax, dict):
	pred_depths = gaussians_softmax["depths"]
	if "depth" in batch["context"]:
	depth_gt = batch["context"]["depth"] # [B, V, H, W]
	gaussians_softmax = gaussians_softmax["gaussians"]

	if not self.train_cfg.forward_depth_only:
	output_softmax = self.decoder.forward(
	gaussians_softmax,
	batch["target"]["extrinsics"],
	batch["target"]["intrinsics"],
	batch["target"]["near"],
	batch["target"]["far"],
	(h, w),
	)
	rgb_softmax = output_softmax.color[0]

	# Compute validation metrics.
	rgb_gt = batch["target"]["image"][0]
	for tag, rgb in zip(("val",), (rgb_softmax,)):
	psnr = compute_psnr(rgb_gt, rgb).mean()
	self.log(f"val/psnr_{tag}", psnr)
	lpips = compute_lpips(rgb_gt, rgb).mean()
	self.log(f"val/lpips_{tag}", lpips)
	ssim = compute_ssim(rgb_gt, rgb).mean()
	self.log(f"val/ssim_{tag}", ssim)

	# viz depth
	if pred_depths is not None:
	# only visualize predicted depth
	pred_depths = pred_depths[0] # [V, H, W]

	# gaussian downsample
	if pred_depths.shape[1:] != batch["context"]["image"].shape[-2:]:
	pred_depths = F.interpolate(
	pred_depths.unsqueeze(1),
	size=batch["context"]["image"].shape[-2:],
	mode="bilinear",
	align_corners=True,
	).squeeze(1)

	inverse_depth_pred = 1.0 / pred_depths

	concat = []
	for i in range(inverse_depth_pred.size(0)):
	concat.append(inverse_depth_pred[i])

	concat = torch.cat(concat, dim=1) # [H, W*N]

	depth_viz = viz_depth_tensor(concat.cpu().detach()) # [3, H, W*N]

	# also concat images
	input_images = batch["context"]["image"][0] # [N, 3, H, W]
	concat_img = [img for img in input_images]
	concat_img = torch.cat(concat_img, dim=-1) * 255 # [3, H, W*N]

	concat = torch.cat(
	(concat_img.cpu().detach(), depth_viz), dim=1
	) # [3, H2, WN]

	self.logger.log_image(
	"depth",
	[concat],
	step=self.global_step,
	caption=batch["scene"],
	)

	if not self.train_cfg.forward_depth_only:
	# Construct comparison image.
	comparison = hcat(
	add_label(vcat(*batch["context"]["image"][0]), "Context"),
	add_label(vcat(*rgb_gt), "Target (Ground Truth)"),
	add_label(vcat(*rgb_softmax), "Target (Prediction)"),
	# add_label(vcat(batch["context"]["image"][0]), "Context"),
	# add_label(vcat(rgb_gt), "Target (Ground Truth)"),
	# add_label(vcat(rgb_softmax), "Target (Prediction)"),
	)
	self.logger.log_image(
	"comparison",
	[prep_image(add_border(comparison))],
	step=self.global_step,
	caption=batch["scene"],
	)

	if not self.train_cfg.no_log_projections:
	# Render projections and construct projection image.
	projections = hcat(
	*render_projections(
	gaussians_softmax,
	256,
	extra_label="(Prediction)",
	)[0]
	)
	self.logger.log_image(
	"projection",
	[prep_image(add_border(projections))],
	step=self.global_step,
	)

	# Draw cameras.
	cameras = hcat(*render_cameras(batch, 256))
	self.logger.log_image(
	"cameras", [prep_image(add_border(cameras))], step=self.global_step
	)

	if self.encoder_visualizer is not None:
	for k, image in self.encoder_visualizer.visualize(
	batch["context"], self.global_step
	).items():
	self.logger.log_image(k, [prep_image(image)], step=self.global_step)

	# Run video validation step.
	if not self.train_cfg.no_viz_video:
	self.render_video_interpolation(batch)
	self.render_video_wobble(batch)
	if self.train_cfg.extended_visualization:
	self.render_video_interpolation_exaggerated(batch)

	def on_validation_epoch_end(self) -> None:
	"""hack to run the full validation"""
	if self.trainer.sanity_checking and self.global_rank == 0:
	print(self.encoder) # log the model to wandb log files

	if (not self.trainer.sanity_checking) and (self.eval_data_cfg is not None):
	self.eval_cnt = self.eval_cnt + 1
	if self.eval_cnt % self.train_cfg.eval_model_every_n_val == 0:
	# backup current ckpt before running full test sets eval
	if self.train_cfg.eval_save_model:
	ckpt_saved_path = (
	self.trainer.checkpoint_callback.format_checkpoint_name(
	dict(
	epoch=self.trainer.current_epoch,
	step=self.trainer.global_step,
	)
	)
	)
	backup_dir = str(
	Path(ckpt_saved_path).parent.parent / "checkpoints_backups"
	)
	if self.global_rank == 0:
	os.makedirs(backup_dir, exist_ok=True)
	ckpt_saved_path = os.path.join(
	backup_dir, os.path.basename(ckpt_saved_path)
	)
	# call save_checkpoint on ALL process as suggested by pytorch_lightning
	self.trainer.save_checkpoint(
	ckpt_saved_path,
	weights_only=True,
	)
	if self.global_rank == 0:
	print(f"backup model to {ckpt_saved_path}.")

	# run full test sets eval on rank=0 device
	self.run_full_test_sets_eval()

	@rank_zero_only
	def run_full_test_sets_eval(self) -> None:
	start_t = time.time()

	pred_depths = None
	depth_gt = None

	full_testsets = self.trainer.datamodule.test_dataloader(
	# dataset_cfg=self.eval_data_cfg
	)
	scores_dict = {}

	if not self.train_cfg.forward_depth_only:
	for score_tag in ("psnr", "ssim", "lpips"):
	scores_dict[score_tag] = {}
	for method_tag in ("deterministic", "probabilistic"):
	scores_dict[score_tag][method_tag] = []

	# evaluate depth
	if self.train_cfg.viz_depth:
	for score_tag in ("abs_rel", "rmse", "a1"):
	scores_dict[score_tag] = {}
	for method_tag in ("deterministic", "probabilistic"):
	scores_dict[score_tag][method_tag] = []

	self.benchmarker.clear_history()
	time_skip_first_n_steps = min(
	self.train_cfg.eval_time_skip_steps, len(full_testsets)
	)
	time_skip_steps_dict = {"encoder": 0, "decoder": 0}
	for batch_idx, batch in tqdm(
	enumerate(full_testsets),
	total=min(len(full_testsets), self.train_cfg.eval_data_length),
	):
	if batch_idx >= self.train_cfg.eval_data_length:
	break

	batch = self.data_shim(batch)
	batch = self.transfer_batch_to_device(batch, "cuda", dataloader_idx=0)

	# Render Gaussians.
	b, v, _, h, w = batch["target"]["image"].shape
	assert b == 1
	if batch_idx < time_skip_first_n_steps:
	time_skip_steps_dict["encoder"] += 1
	time_skip_steps_dict["decoder"] += v

	with self.benchmarker.time("encoder"):
	gaussians_probabilistic = self.encoder(
	batch["context"],
	self.global_step,
	deterministic=False,
	)

	if isinstance(gaussians_probabilistic, dict):
	pred_depths = gaussians_probabilistic["depths"]
	if "depth" in batch["context"]:
	depth_gt = batch["context"]["depth"]
	gaussians_probabilistic = gaussians_probabilistic["gaussians"]

	if not self.train_cfg.forward_depth_only:
	with self.benchmarker.time("decoder", num_calls=v):
	output_probabilistic = self.decoder.forward(
	gaussians_probabilistic,
	batch["target"]["extrinsics"],
	batch["target"]["intrinsics"],
	batch["target"]["near"],
	batch["target"]["far"],
	(h, w),
	)
	rgbs = [output_probabilistic.color[0]]
	tags = ["probabilistic"]

	if self.train_cfg.eval_deterministic:
	gaussians_deterministic = self.encoder(
	batch["context"],
	self.global_step,
	deterministic=True,
	)
	output_deterministic = self.decoder.forward(
	gaussians_deterministic,
	batch["target"]["extrinsics"],
	batch["target"]["intrinsics"],
	batch["target"]["near"],
	batch["target"]["far"],
	(h, w),
	)
	rgbs.append(output_deterministic.color[0])
	tags.append("deterministic")

	# Compute validation metrics.
	rgb_gt = batch["target"]["image"][0]
	for tag, rgb in zip(tags, rgbs):
	scores_dict["psnr"][tag].append(
	compute_psnr(rgb_gt, rgb).mean().item()
	)
	scores_dict["lpips"][tag].append(
	compute_lpips(rgb_gt, rgb).mean().item()
	)
	scores_dict["ssim"][tag].append(
	compute_ssim(rgb_gt, rgb).mean().item()
	)

	# summarise scores and log to logger
	for score_tag, methods in scores_dict.items():
	for method_tag, cur_scores in methods.items():
	if len(cur_scores) > 0:
	cur_mean = sum(cur_scores) / len(cur_scores)
	self.log(f"test/{score_tag}", cur_mean)
	# summarise run time
	for tag, times in self.benchmarker.execution_times.items():
	times = times[int(time_skip_steps_dict[tag]) :]
	print(f"{tag}: {len(times)} calls, avg. {np.mean(times)} seconds per call")
	self.log(f"test/runtime_avg_{tag}", np.mean(times))
	self.benchmarker.clear_history()

	overall_eval_time = time.time() - start_t
	print(f"Eval total time cost: {overall_eval_time:.3f}s")
	self.log("test/runtime_all", overall_eval_time)

	@rank_zero_only
	def render_video_wobble(self, batch: BatchedExample) -> None:
	# Two views are needed to get the wobble radius.
	_, v, _, _ = batch["context"]["extrinsics"].shape
	if v != 2:
	return

	def trajectory_fn(t):
	origin_a = batch["context"]["extrinsics"][:, 0, :3, 3]
	origin_b = batch["context"]["extrinsics"][:, 1, :3, 3]
	delta = (origin_a - origin_b).norm(dim=-1)
	extrinsics = generate_wobble(
	batch["context"]["extrinsics"][:, 0],
	delta * 0.25,
	t,
	)
	intrinsics = repeat(
	batch["context"]["intrinsics"][:, 0],
	"b i j -> b v i j",
	v=t.shape[0],
	)
	return extrinsics, intrinsics

	return self.render_video_generic(batch, trajectory_fn, "wobble", num_frames=60)

	@rank_zero_only
	def render_video_interpolation(self, batch: BatchedExample) -> None:
	_, v, _, _ = batch["context"]["extrinsics"].shape

	def trajectory_fn(t):
	extrinsics = interpolate_extrinsics(
	batch["context"]["extrinsics"][0, 0],
	(
	batch["context"]["extrinsics"][0, 1]
	if v == 2
	else batch["target"]["extrinsics"][0, 0]
	),
	t,
	)
	intrinsics = interpolate_intrinsics(
	batch["context"]["intrinsics"][0, 0],
	(
	batch["context"]["intrinsics"][0, 1]
	if v == 2
	else batch["target"]["intrinsics"][0, 0]
	),
	t,
	)
	return extrinsics[None], intrinsics[None]

	return self.render_video_generic(batch, trajectory_fn, "rgb")

	@rank_zero_only
	def render_video_interpolation_exaggerated(self, batch: BatchedExample) -> None:
	# Two views are needed to get the wobble radius.
	_, v, _, _ = batch["context"]["extrinsics"].shape
	if v != 2:
	return

	def trajectory_fn(t):
	origin_a = batch["context"]["extrinsics"][:, 0, :3, 3]
	origin_b = batch["context"]["extrinsics"][:, 1, :3, 3]
	delta = (origin_a - origin_b).norm(dim=-1)
	tf = generate_wobble_transformation(
	delta * 0.5,
	t,
	5,
	scale_radius_with_t=False,
	)
	extrinsics = interpolate_extrinsics(
	batch["context"]["extrinsics"][0, 0],
	(
	batch["context"]["extrinsics"][0, 1]
	if v == 2
	else batch["target"]["extrinsics"][0, 0]
	),
	t * 5 - 2,
	)
	intrinsics = interpolate_intrinsics(
	batch["context"]["intrinsics"][0, 0],
	(
	batch["context"]["intrinsics"][0, 1]
	if v == 2
	else batch["target"]["intrinsics"][0, 0]
	),
	t * 5 - 2,
	)
	return extrinsics @ tf, intrinsics[None]

	return self.render_video_generic(
	batch,
	trajectory_fn,
	"interpolation_exagerrated",
	num_frames=300,
	smooth=False,
	loop_reverse=False,
	)

	@rank_zero_only
	def render_video_generic(
	self,
	batch: BatchedExample,
	trajectory_fn: TrajectoryFn,
	name: str,
	num_frames: int = 30,
	smooth: bool = True,
	loop_reverse: bool = True,
	) -> None:
	# Render probabilistic estimate of scene.
	gaussians_prob = self.encoder(batch["context"], self.global_step, False)
	# gaussians_det = self.encoder(batch["context"], self.global_step, True)

	if isinstance(gaussians_prob, dict):
	gaussians_prob = gaussians_prob["gaussians"]

	t = torch.linspace(0, 1, num_frames, dtype=torch.float32, device=self.device)
	if smooth:
	t = (torch.cos(torch.pi * (t + 1)) + 1) / 2

	extrinsics, intrinsics = trajectory_fn(t)

	_, _, _, h, w = batch["context"]["image"].shape

	# Color-map the result.
	def depth_map(result):
	near = result[result > 0][:16_000_000].quantile(0.01).log()
	far = result.view(-1)[:16_000_000].quantile(0.99).log()
	result = result.log()
	result = 1 - (result - near) / (far - near)
	return apply_color_map_to_image(result, "turbo")

	near = repeat(batch["context"]["near"][:, 0], "b -> b v", v=num_frames)
	far = repeat(batch["context"]["far"][:, 0], "b -> b v", v=num_frames)
	output_prob = self.decoder.forward(
	gaussians_prob, extrinsics, intrinsics, near, far, (h, w), "depth"
	)
	images_prob = [
	vcat(rgb, depth)
	for rgb, depth in zip(output_prob.color[0], depth_map(output_prob.depth[0]))
	]

	images = [
	add_border(
	hcat(
	add_label(image_prob, "Prediction"),
	)
	)
	for image_prob, _ in zip(images_prob, images_prob)
	]

	video = torch.stack(images)
	video = (video.clip(min=0, max=1) * 255).type(torch.uint8).cpu().numpy()
	if loop_reverse:
	video = pack([video, video[::-1][1:-1]], "* c h w")[0]
	#swanlab不支持
	# visualizations = {
	# f"video/{name}": wandb.Video(video[None], fps=30, format="mp4")
	# }

	# Since the PyTorch Lightning doesn't support video logging, log to wandb directly.
	# try:
	# wandb.log(visualizations)
	# except Exception:
	# assert isinstance(self.logger, LocalLogger)
	# for key, value in visualizations.items():
	# tensor = value._prepare_video(value.data)
	# clip = mpy.ImageSequenceClip(list(tensor), fps=value._fps)
	# dir = LOG_PATH / key
	# dir.mkdir(exist_ok=True, parents=True)
	# clip.write_videofile(
	# str(dir / f"{self.global_step:0>6}.mp4"), logger=None
	# )

	def configure_optimizers(self):

	if self.optimizer_cfg.lr_monodepth > 0:
	pretrained_params = []
	new_params = []

	for name, param in self.named_parameters():
	if "pretrained" in name:
	pretrained_params.append(param)
	else:
	new_params.append(param)

	optimizer = torch.optim.AdamW(
	[
	{
	"params": pretrained_params,
	"lr": self.optimizer_cfg.lr_monodepth,
	},
	{"params": new_params, "lr": self.optimizer_cfg.lr},
	],
	weight_decay=self.optimizer_cfg.weight_decay,
	)

	scheduler = torch.optim.lr_scheduler.OneCycleLR(
	optimizer,
	[self.optimizer_cfg.lr_monodepth, self.optimizer_cfg.lr],
	self.trainer.max_steps + 10,
	pct_start=0.01,
	cycle_momentum=False,
	anneal_strategy="cos",
	)

	else:
	optimizer = optim.AdamW(
	self.parameters(),
	lr=self.optimizer_cfg.lr,
	weight_decay=self.optimizer_cfg.weight_decay,
	)

	scheduler = torch.optim.lr_scheduler.OneCycleLR(
	optimizer,
	self.optimizer_cfg.lr,
	self.trainer.max_steps + 10,
	pct_start=0.01,
	cycle_momentum=False,
	anneal_strategy="cos",
	)

	return {
	"optimizer": optimizer,
	"lr_scheduler": {
	"scheduler": scheduler,
	"interval": "step",
	"frequency": 1,
	},
	}



	# ----------------- MOD: debug helper functions -----------------
	def snapshot_params(self):
	"""Return a snapshot dict name->cpu clone for all trainable params."""
	return {n: p.detach().cpu().clone() for n, p in self.named_parameters() if p.requires_grad}

	def compare_params_snapshot(self, before_snap, tol=0.0, show_n=50):
	"""Compare saved snapshot with current params; report params with zero update."""
	if before_snap is None:
	print("[CHECK] no before-snapshot provided")
	return
	zero_updates = []
	small_updates = []
	for name, p in self.named_parameters():
	if not p.requires_grad:
	continue
	if name not in before_snap:
	continue
	before = before_snap[name]
	cur = p.detach().cpu()
	diff = (cur - before).abs().sum().item()
	if diff <= tol:
	zero_updates.append((name, diff))
	else:
	if diff < 1e-12:
	small_updates.append((name, diff))
	print(f"[CHECK] zero-updates={len(zero_updates)}, very-small-updates={len(small_updates)}")
	if zero_updates:
	print(" params with (nearly) zero update (first {}):".format(show_n))
	for n, d in zero_updates[:show_n]:
	print(f" {n} diff_sum={d:.3e}")

	def list_params_with_no_grad_after_backward(self, tiny_thresh=1e-12, show_n=50):
	"""Call after backward: list params with grad None or extremely small norm."""
	none_grad = []
	very_small = []
	nan_inf = []
	for name, p in self.named_parameters():
	if not p.requires_grad:
	continue
	g = p.grad
	if g is None:
	none_grad.append(name)
	else:
	if torch.isnan(g).any() or torch.isinf(g).any():
	nan_inf.append(name)
	try:
	norm = float(g.norm().item())
	except Exception:
	norm = 0.0
	if norm < tiny_thresh:
	very_small.append((name, norm))
	print(f"[CHECK] grads None: {len(none_grad)}, very_small (<{tiny_thresh}): {len(very_small)}, nan/inf: {len(nan_inf)}")
	if none_grad:
	print(" grads is None (first {}):".format(show_n))
	for n in none_grad[:show_n]:
	print(" ", n)
	if very_small:
	print(" grads very small (first {}):".format(show_n))
	for n, norm in very_small[:show_n]:
	print(f" {n} norm={norm:.3e}")
	if nan_inf:
	print(" grads have NaN/Inf (first {}):".format(show_n))
	for n in nan_inf[:show_n]:
	print(" ", n)

	def list_frozen_params(self, show_n=50):
	frozen = [(n, p.shape) for n, p in self.named_parameters() if not p.requires_grad]
	print(f"[CHECK] frozen params count = {len(frozen)}")
	for name, shape in frozen[:show_n]:
	print(" ", name, shape)

	def check_optimizer_coverage(self):
	"""Check whether configured optimizer(s) include all trainable params"""
	try:
	opt_or_list = self.optimizers()
	except Exception as e:
	# may be called too early before trainer.configure_optimizers completes
	print("[CHECK] cannot access optimizer from here:", e)
	return
	opts = opt_or_list if isinstance(opt_or_list, (list, tuple)) else [opt_or_list]
	opt_param_ids = set(id(p) for o in opts for g in o.param_groups for p in g['params'])
	missing = [n for n, p in self.named_parameters() if p.requires_grad and id(p) not in opt_param_ids]
	print(f"[CHECK] optimizer missing params count = {len(missing)} (show up to 50):")
	for n in missing[:50]:
	print(" ", n)
	print(" optimizer lrs per opt:", [[g['lr'] for g in o.param_groups] for o in opts])

	# ----------------- MOD: hooks -----------------
	def on_after_backward(self):
	"""Called by Lightning after backward(): print grad stats (only if debug enabled)."""
	if not getattr(self, "_check_param_updates", False):
	return
	try:
	print(f"[on_after_backward] step={self.global_step}")
	# print a few grad stats
	self.list_params_with_no_grad_after_backward()
	except Exception as e:
	print("on_after_backward debug error:", e)

	def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx=0):
	"""Called after optimizer.step() in Lightning — compare params snapshot here."""
	# Only run our heavy compare when enabled
	if not getattr(self, "_check_param_updates", False):
	return
	try:
	# Compare params snapshot (if any)
	before = getattr(self, "_before_params_snapshot", None)
	if before is None:
	print("[on_train_batch_end] no before snapshot found")
	else:
	self.compare_params_snapshot(before, tol=0.0)
	# clear snapshot for next step
	self._before_params_snapshot = None
	# Also check optimizer coverage (helps discover params not in optimizer)
	self.check_optimizer_coverage()
	except Exception as e:
	print("on_train_batch_end debug error:", e)