#!/usr/bin/env python3 """ repair_seed_variance.py 针对 5 个 CRITICAL 组的定点修复脚本 运行方式: python /root/autodl-tmp/SplatAtlas/scripts/repair_seed_variance.py """ import os, subprocess, shutil, json, time from pathlib import Path SPLATATLAS = "/root/autodl-tmp/SplatAtlas" OUTPUTS = f"{SPLATATLAS}/outputs" LOGS = f"{SPLATATLAS}/logs/repair" os.makedirs(LOGS, exist_ok=True) ITERS = 30000 LOG_FILE = open(f"{LOGS}/repair_{time.strftime('%Y%m%d_%H%M%S')}.log", "a") def log(msg): ts = time.strftime("%H:%M:%S") line = f"[{ts}] {msg}" print(line); LOG_FILE.write(line + "\n"); LOG_FILE.flush() def run(cmd, env=None, cwd=SPLATATLAS): log(f" CMD: {cmd[:120]}...") r = subprocess.run(cmd, shell=True, env=env, cwd=cwd) ok = r.returncode == 0 log(f" -> {'OK' if ok else 'FAILED'} (rc={r.returncode})") return ok def make_env(env_path, extra_py, oom_safe=False): """构造运行环境:PYTHONPATH + 可选 OOM 防护""" e = os.environ.copy() e["PYTHONPATH"] = f"{extra_py}:{SPLATATLAS}:{e.get('PYTHONPATH', '')}" if oom_safe: e["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" return e def python_bin(env_path): return "python" if env_path == "base" else f"{env_path}/bin/python" def check_ply(model_dir): p = f"{model_dir}/point_cloud/iteration_{ITERS}/point_cloud.ply" return os.path.exists(p), p def check_flag(model_dir): p = f"{model_dir}/render_complete_{ITERS}.flag" return os.path.exists(p) def check_metrics(model_dir): p = f"{model_dir}/metrics_test_iter{ITERS}.json" return os.path.exists(p), p def do_train(python, env, method, source, model_dir, res, seed=None): seed_str = f" --seed {seed}" if seed else "" cmd = ( f"{python} {SPLATATLAS}/scripts/main_train.py" f" --method {method}" f" --source_path {source}" f" --model_path {model_dir}" f" --iterations {ITERS}" f" --resolution {res}" f" --save_iterations 30000" f"{seed_str}" ) return run(cmd, env) def do_render(python, env, method, source, model_dir, res): # 清理残留渲染目录 for d in [f"renders_test_{ITERS}", f"gt_test_{ITERS}", f"depths_test_{ITERS}"]: p = f"{model_dir}/{d}" if os.path.exists(p): shutil.rmtree(p) cmd = ( f"{python} {SPLATATLAS}/scripts/main_render.py" f" --method {method}" f" --source_path {source}" f" --model_path {model_dir}" f" --iteration {ITERS}" f" --resolution {res}" ) return run(cmd, env) def do_eval(python, env, method, scene, model_dir, source): ply_path = f"{model_dir}/point_cloud/iteration_{ITERS}/point_cloud.ply" render_dir = f"{model_dir}/renders_test_{ITERS}" gt_dir = f"{model_dir}/gt_test_{ITERS}" depth_dir = f"{model_dir}/depths_test_{ITERS}" out_json = f"{model_dir}/metrics_test_iter{ITERS}.json" cmd = ( f"{python} {SPLATATLAS}/ufd_evalkit/run_eval.py" f" --method {method}" f" --scene {scene}" f" --render_dir {render_dir}" f" --gt_dir {gt_dir}" f" --ply_path {ply_path}" f" --output_json {out_json}" f" --colmap_dir {source}" f" --depth_dir {depth_dir}" ) return run(cmd, env) # ───────────────────────────────────────────────────────────────────────────── # 修复目标定义 # ───────────────────────────────────────────────────────────────────────────── TARGETS = [ # ── Case 1: vanilla_3dgs bonsai ────────────────────────────────────────── # PLY 存在,渲染存在,只是 eval OOM。加 OOM 防护重跑 eval。 { "label": "vanilla_3dgs bonsai (all seeds, eval OOM fix)", "method": "vanilla_3dgs", "scene": "bonsai", "source": "/root/autodl-tmp/dataset/360/bonsai", "res": 2, "env_path": "base", "extra_py": "/root/autodl-tmp/3dgs_official", "oom_safe": True, # ← 关键:防 LPIPS OOM "seeds": [None, 1, 2], # None = default (无 seed suffix) }, # ── Case 2: analyticsplatting bonsai seed1/seed2 ───────────────────────── # default 已有 metrics,seed1/seed2 缺 PLY → 需重训 { "label": "analyticsplatting bonsai (seed1, seed2 retrain)", "method": "analyticsplatting", "scene": "bonsai", "source": "/root/autodl-tmp/dataset/360/bonsai", "res": 2, "env_path": "/root/autodl-tmp/envs/ana_splatting", "extra_py": "/root/autodl-tmp/Analytic-Splatting_offy", "oom_safe": False, "seeds": [1, 2], # default 已完成,只补 seed1/seed2 }, # ── Case 3: analyticsplatting Lego seed1/seed2 ─────────────────────────── { "label": "analyticsplatting Lego (seed1, seed2 retrain)", "method": "analyticsplatting", "scene": "Lego", "source": "/root/autodl-tmp/dataset/Synthetic_NeRF_Verified/Synthetic_NeRF/Lego", "res": 1, "env_path": "/root/autodl-tmp/envs/ana_splatting", "extra_py": "/root/autodl-tmp/Analytic-Splatting_offy", "oom_safe": False, "seeds": [1, 2], }, # ── Case 4: erankgs bonsai ─────────────────────────────────────────────── # 所有 seed 都缺 metrics;render 崩溃是因为环境混用。 # 用 erank_gs 环境严格跑 train→render→eval 全流程 { "label": "erankgs bonsai (all seeds, env fix)", "method": "erankgs", "scene": "bonsai", "source": "/root/autodl-tmp/dataset/360/bonsai", "res": 2, "env_path": "/root/autodl-tmp/envs/erank_gs", "extra_py": "/root/autodl-tmp/erank_gs", "oom_safe": False, "seeds": [None, 1, 2], }, # ── Case 5: pgsr bonsai ────────────────────────────────────────────────── # 与 erankgs 症状相同,严格用 pgsr_v2 环境 { "label": "pgsr bonsai (all seeds, env fix)", "method": "pgsr", "scene": "bonsai", "source": "/root/autodl-tmp/dataset/360/bonsai", "res": 2, "env_path": "/root/autodl-tmp/envs/pgsr_v2", "extra_py": "/root/autodl-tmp/pgsr_official", "oom_safe": False, "seeds": [None, 1, 2], }, ] # ───────────────────────────────────────────────────────────────────────────── # 执行逻辑 # ───────────────────────────────────────────────────────────────────────────── def model_dir_for(method, scene, seed): """None seed → vanilla 目录名(无 suffix)""" suffix = f"_seed{seed}" if seed is not None else "" return f"{OUTPUTS}/{method}{suffix}_{scene}" def repair(target): method = target["method"] scene = target["scene"] source = target["source"] res = target["res"] env_path = target["env_path"] extra_py = target["extra_py"] oom_safe = target["oom_safe"] python = python_bin(env_path) env = make_env(env_path, extra_py, oom_safe) log(f"\n{'='*60}") log(f"TARGET: {target['label']}") for seed in target["seeds"]: mdir = model_dir_for(method, scene, seed) tag = f"{method}_seed{seed}_{scene}" if seed else f"{method}_{scene}" log(f"\n -- {tag} --") log(f" dir: {mdir}") # ── 1. 诊断当前状态 ────────────────────────────────────────────────── ply_ok, ply_path = check_ply(mdir) flag_ok = check_flag(mdir) metrics_ok, _ = check_metrics(mdir) if metrics_ok: log(" metrics 已存在,跳过") continue log(f" 状态: PLY={'OK' if ply_ok else 'MISSING'} " f"render={'OK' if flag_ok else 'MISSING'} " f"metrics=MISSING") # ── 2. 训练(如需)────────────────────────────────────────────────── if not ply_ok: log(" → 执行训练") if not do_train(python, env, method, source, mdir, res, seed): log(f" !! 训练失败,跳过后续步骤: {tag}") continue ply_ok = True flag_ok = False # 训练完需重新渲染 # ── 3. 渲染(如需)────────────────────────────────────────────────── if not flag_ok: log(" → 执行渲染") if not do_render(python, env, method, source, mdir, res): log(f" !! 渲染失败,跳过 eval: {tag}") continue # ── 4. 评估 ───────────────────────────────────────────────────────── log(" → 执行评估") if do_eval(python, env, method, scene, mdir, source): log(f" ✓ DONE: {tag}") else: log(f" !! eval 失败: {tag}") # ───────────────────────────────────────────────────────────────────────────── # 收尾:汇总状态 # ───────────────────────────────────────────────────────────────────────────── def final_report(): log("\n" + "="*60) log("修复后状态汇总") log("="*60) header = f"{'Group':<40} {'PLY':>5} {'Metrics':>8}" log(header) for t in TARGETS: for seed in t["seeds"]: mdir = model_dir_for(t["method"], t["scene"], seed) tag = f"{t['method']}_seed{seed}_{t['scene']}" if seed else f"{t['method']}_{t['scene']}" ply_ok,_ = check_ply(mdir) metrics_ok,_ = check_metrics(mdir) log(f" {tag:<38} {'✓' if ply_ok else '✗':>5} {'✓' if metrics_ok else '✗':>8}") if __name__ == "__main__": log("====== repair_seed_variance 开始 ======") t0 = time.time() for target in TARGETS: repair(target) final_report() elapsed = (time.time() - t0) / 3600 log(f"\n====== 完成,总耗时 {elapsed:.1f}h ======") LOG_FILE.close()