| |
| """Probe: how much does the TRAINED per-pixel head contribute over a DEFAULT (untrained, |
| zero-residual) head? Renders the same val clips with (a) the trained head and (b) a freshly |
| re-initialized head (the init: opacity logit 2.0, log-scale 0 -> footprint, identity quat). |
| If (a)~(b), the learnable head adds ~nothing and PSNR is set by VGGT-depth + source color + |
| default footprint -> the bottleneck is the representation/backbone, not the head's optimization. |
| """ |
| import argparse, os, copy, sys |
| import numpy as np |
| import torch |
| from mapgs.config import load_config |
| from mapgs.data import UnifiedClipDataset |
| from mapgs.eval.metrics import psnr, ssim |
| from mapvggt import MapVGGT |
| DEV = "cuda" |
| sys.path.insert(0, "/mnt/william") |
| from scripts.train_mapvggt_full import prep, render_scene |
|
|
|
|
| def build_val(args): |
| cfg = load_config(overrides=["data.name=unified", f"data.root={args.roots}", |
| f"data.height={args.height}", f"data.width={args.width}", |
| "model.tokens.n_map=2048"]) |
| full = UnifiedClipDataset(cfg, roots=args.roots.split(","), split="train", n_sup_views=6) |
| def segid(p): |
| return "_".join(os.path.basename(p.rstrip("/")).split("_")[:2]) |
| segs = sorted(set(segid(c) for c in full.clips)) |
| val_segs = set(segs[:args.val_segs]) |
| seen, vclips = set(), [] |
| for c in full.clips: |
| sgi = segid(c) |
| if sgi in val_segs and sgi not in seen: |
| seen.add(sgi); vclips.append(c) |
| vds = copy.copy(full); vds.clips = vclips |
| return vds |
|
|
|
|
| @torch.no_grad() |
| def eval_model(model, vds, n_in): |
| model.eval(); model.cur_s = model.s_max |
| model.vggt.eval() |
| ps, ss = [], [] |
| for i in range(len(vds.clips)): |
| d = prep(vds[i], n_in, DEV) |
| gsm = model(d["in_img"], d["in_K"], d["in_c2w"], d["ap"], d["at"], d["an"]) |
| rgb, _ = render_scene(model, gsm, d, *d["sup_img"].shape[-2:], gain=1.0) |
| p = float(psnr(rgb, d["sup_img"])) |
| if p == p and abs(p) != float("inf"): |
| ps.append(p); ss.append(float(ssim(rgb, d["sup_img"]))) |
| return np.mean(ps), np.mean(ss), np.std(ps) |
|
|
|
|
| def main(): |
| ap = argparse.ArgumentParser() |
| ap.add_argument("--roots", default="/mnt/william/data/unified/waymo") |
| ap.add_argument("--ckpt", default="/mnt/william/runs/abl_full_best.safetensors") |
| ap.add_argument("--n-in", type=int, default=8) |
| ap.add_argument("--height", type=int, default=256) |
| ap.add_argument("--width", type=int, default=448) |
| ap.add_argument("--val-segs", type=int, default=40) |
| ap.add_argument("--clips", type=int, default=12) |
| args = ap.parse_args() |
|
|
| from safetensors.torch import load_file |
| sd = load_file(args.ckpt) |
| vds = build_val(args) |
| vds.clips = vds.clips[:args.clips] |
| print(f"probing {len(vds.clips)} val clips", flush=True) |
|
|
| |
| model = MapVGGT(with_map=True, with_dyn=True, finetune_backbone=False).to(DEV) |
| model.load_state_dict(sd, strict=False) |
| tp, ts, tsd = eval_model(model, vds, args.n_in) |
| print(f"TRAINED head: PSNR {tp:.2f}±{tsd:.2f} SSIM {ts:.3f}", flush=True) |
|
|
| |
| |
| import torch.nn as nn |
| h = model.head |
| nn.init.zeros_(h[0].weight); nn.init.zeros_(h[0].bias) |
| nn.init.zeros_(h[2].weight); nn.init.zeros_(h[2].bias) |
| nn.init.zeros_(h[-1].weight); nn.init.zeros_(h[-1].bias) |
| h[-1].bias.data[0] = 2.0 |
| h[-1].bias.data[4] = 1.0 |
| up, us, usd = eval_model(model, vds, args.n_in) |
| print(f"DEFAULT head: PSNR {up:.2f}±{usd:.2f} SSIM {us:.3f}", flush=True) |
| print(f"\nHead's learned contribution: PSNR {tp-up:+.2f} dB SSIM {ts-us:+.3f}", flush=True) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|