code

436b829 verified 16 days ago

16.1 kB

	"""
	Per-claim verification of the LPD implementation against paper.tex.

	Each `check()` block ties one specific paper claim to the code that
	realises it; failures are reported with the missing/incorrect piece. Run as

	cd /mnt/sig/pixel-perfect-depth
	python -m ppd.lpd.tests.verify_paper

	Exits with non-zero status if any check fails. Designed to be a
	single-pass audit, not a unit-test suite — it executes real tensor ops
	on small inputs to confirm shapes, equations, and gradient flow.
	"""
	from __future__ import annotations

	import os
	import sys
	import math
	import inspect
	from typing import Callable

	import torch
	import torch.nn.functional as F


	CHECKS: list[tuple[str, Callable[[], None]]] = []


	def check(name: str):
	def deco(fn: Callable[[], None]):
	CHECKS.append((name, fn))
	return fn
	return deco


	def assert_close(actual: torch.Tensor, expected: torch.Tensor, msg: str, atol: float = 1e-5):
	if not torch.allclose(actual, expected, atol=atol):
	diff = (actual - expected).abs().max().item()
	raise AssertionError(f"{msg}: max diff {diff:.3e}")


	# ----------------------------------------------------------------- §3.1 image
	@check("§3.1: sparse-prompt encoder pools at scales {4, 8, 16, 32}")
	def _():
	from ppd.lpd.prompt_encoder import SparsePromptEncoder
	enc = SparsePromptEncoder()
	assert tuple(enc.scales) == (4, 8, 16, 32), f"scales={enc.scales}"


	@check("§3.1: encoder produces both depth and density per scale")
	def _():
	from ppd.lpd.prompt_encoder import masked_avg_pool
	d = torch.zeros(1, 1, 32, 32); d[:, :, ::8, ::8] = 1.0
	m = (d > 0).float()
	pooled, density = masked_avg_pool(d, m, kernel=4)
	assert pooled.shape == density.shape == (1, 1, 8, 8)
	# density at a fully-observed cell should be 1/16 (one observation in 4x4)
	assert (density.max() - 1 / 16.0).abs() < 1e-5
	# mask sum 16; pooled should equal 1 at sampled cells (since masked avg)
	assert pooled.max() == 1.0


	@check("§3.1: encoder applies a two-layer CNN + linear projection")
	def _():
	from ppd.lpd.prompt_encoder import SparsePromptEncoder, _SmallCNN
	enc = SparsePromptEncoder()
	# _SmallCNN: Conv → GELU → Conv (= 2 convs)
	n_convs = sum(1 for m in enc.per_scale[0].net if isinstance(m, torch.nn.Conv2d))
	assert n_convs == 2, f"expect two convs, got {n_convs}"
	# Final projection should be a linear layer.
	assert isinstance(enc.fuse, torch.nn.Linear)


	@check("§3.1: prompt-aware quantile log normalization produces ~[-0.5, 0.5]")
	def _():
	from ppd.lpd.prompt_encoder import quantile_log_normalize
	d = torch.linspace(0.5, 50.0, 256).reshape(1, 1, 16, 16)
	m = torch.ones_like(d)
	nd = quantile_log_normalize(d, m)
	assert -0.5 - 1e-3 <= nd.min().item() <= 0.05
	assert 0.5 - 0.05 <= nd.max().item() <= 1.0


	@check("§3.1 Eq.(1): prompt gate computes s_sem + g(p,ρ,t) ⊙ m(s_sem,p,ρ,t)")
	def _():
	from ppd.lpd.prompt_gate import PromptGate
	g = PromptGate(embed_dim=32, timestep_dim=32, hidden=32)
	B, T, D = 2, 8, 32
	s_sem = torch.randn(B, T, D)
	p = torch.randn(B, T, D)
	rho = torch.rand(B, T, 1)
	t = torch.randn(B, D)
	out = g(s_sem, p, rho, t)
	# zero-init last layers ⇒ delta=0 and gate output passes sigmoid(0)=0.5,
	# so g*delta = 0 and the joint should equal s_sem on init.
	assert_close(out, s_sem, "joint should equal s_sem at init", atol=1e-5)


	@check("§3.1: m and g are zero-initialized so model starts as pretrained PPD")
	def _():
	from ppd.lpd.prompt_gate import PromptGate
	g = PromptGate(embed_dim=16, timestep_dim=16, hidden=16)
	# last layer of mixer must be zero
	assert torch.all(g.mixer[-1].weight == 0)
	assert torch.all(g.mixer[-1].bias == 0)
	# gate's pre-sigmoid linear must be zero (Linear is at index -2 before Sigmoid)
	assert torch.all(g.gate[-2].weight == 0)
	assert torch.all(g.gate[-2].bias == 0)


	@check("§3.1: timestep embedding is projected before entering the gate")
	def _():
	# LPDDiT calls self.t_embedder(timestep) which contains a 2-layer MLP
	from ppd.models.dit import TimestepEmbedder
	t_embed = TimestepEmbedder(hidden_size=32)
	assert isinstance(t_embed.mlp, torch.nn.Sequential)
	assert sum(1 for m in t_embed.mlp if isinstance(m, torch.nn.Linear)) == 2


	# ----------------------------------------------------------------- §3.3 score decomp
	@check("§3.3 Eq.(5): LiDAR likelihood gradient = -M ⊙ (x - y) / R")
	def _():
	from ppd.lpd.posterior_projection import posterior_project
	x = torch.full((1, 1, 4, 4), 0.5)
	y = torch.full((1, 1, 4, 4), 0.0)
	M = torch.ones_like(x)
	out = posterior_project(
	x, sigma_t=torch.tensor(1.0),
	sparse_depth=y, sparse_mask=M, R=0.1,
	mu_prior=None, P_prior=None, alpha=1.0,
	)
	# eta = sigma² · alpha = 1, kalman term zero ⇒ x ← x + 1 · (-M⊙(x-y)/R)
	expected = x + 1.0 * (-M * (x - y) / 0.1)
	assert_close(out, expected, "Eq.(5) projection")


	@check("§3.3 Eq.(6): Kalman temporal-prior gradient = -(x - μ) / P")
	def _():
	from ppd.lpd.posterior_projection import posterior_project
	x = torch.full((1, 1, 4, 4), 0.3)
	mu = torch.full((1, 1, 4, 4), 0.1)
	P = torch.full((1, 1, 4, 4), 0.5)
	out = posterior_project(
	x, sigma_t=torch.tensor(1.0),
	sparse_depth=torch.zeros_like(x),
	sparse_mask=torch.zeros_like(x),
	R=0.1,
	mu_prior=mu, P_prior=P, alpha=1.0,
	)
	expected = x + 1.0 * (-(x - mu) / P)
	assert_close(out, expected, "Eq.(6) Kalman prior gradient")


	@check("§3.3 Eq.(7): η_τ = α · σ_τ²")
	def _():
	from ppd.lpd.posterior_projection import posterior_project
	x = torch.full((1, 1, 2, 2), 1.0)
	y = torch.zeros_like(x)
	M = torch.ones_like(x)
	sigma = torch.tensor(0.5)
	out = posterior_project(
	x, sigma, sparse_depth=y, sparse_mask=M, R=1.0,
	mu_prior=None, P_prior=None, alpha=2.0,
	)
	eta = 2.0 * 0.5 ** 2 # = 0.5
	expected = x + eta * (-M * (x - y) / 1.0)
	assert_close(out, expected, "Eq.(7) step-size schedule")


	# ----------------------------------------------------------------- §3.4 KIL
	@check("§3.4 Algorithm 1: Kalman gain K = P / (P + σ²)")
	def _():
	P = torch.tensor(0.5); sig2 = torch.tensor(0.25)
	K = P / (P + sig2)
	assert (K - 2 / 3).abs() < 1e-6


	@check("§3.4 Algorithm 1: variance update P_τ = (1-K) P_{τ-1} ⇒ monotone decrease")
	def _():
	P = torch.tensor(1.0)
	for sig2 in [1.0, 0.5, 0.25, 0.0625]:
	K = P / (P + sig2)
	P_new = (1 - K) * P
	assert P_new <= P + 1e-9, "variance must not grow"
	P = P_new


	@check("§3.4: Kalman state μ_τ = μ_{τ-1} + K (x̂_0 - μ_{τ-1})")
	def _():
	mu = torch.tensor(0.0); P = torch.tensor(1.0)
	x_hat = torch.tensor(1.0); sig2 = torch.tensor(1.0)
	K = P / (P + sig2)
	mu_new = mu + K * (x_hat - mu)
	assert (mu_new - 0.5).abs() < 1e-6


	@check("§3.4: kalman_in_loop_sample returns (depth, posterior_variance)")
	def _():
	import inspect
	from ppd.lpd.kalman_in_loop import kalman_in_loop_sample
	sig = inspect.signature(kalman_in_loop_sample)
	assert "x_T" in sig.parameters and "sparse_depth" in sig.parameters


	# ----------------------------------------------------------------- §3.5 temporal Kalman
	@check("§3.5: predict step warps state and inflates variance by Q")
	def _():
	from ppd.lpd.temporal_kalman import TemporalKalmanFilter, TemporalKalmanConfig
	kf = TemporalKalmanFilter(
	shape=(1, 1, 8, 8), device=torch.device("cpu"),
	config=TemporalKalmanConfig(Q_base=0.1, alpha=0.0, P_init=0.0, occ_threshold=999.0),
	)
	kf.mu.fill_(1.0)
	kf.P.fill_(0.0)
	kf.has_state = True
	flow = torch.zeros(1, 2, 8, 8) # zero flow ⇒ identity warp
	kf.predict(flow_fwd=flow, flow_bwd=flow)
	# variance should grow by Q_base since alpha=0
	assert (kf.P - 0.1).abs().max() < 1e-5


	@check("§3.5 Eq.(9): forward-backward error ε = \|\|p + f_fwd + f_bwd(p+f_fwd)\|\|")
	def _():
	from ppd.lpd.temporal_kalman import forward_backward_error
	f_fwd = torch.zeros(1, 2, 8, 8); f_fwd[:, 0] = 2.0 # +2 in x
	f_bwd = -f_fwd # exact inverse ⇒ ε ≈ 0
	eps = forward_backward_error(f_fwd, f_bwd)
	assert eps.max() < 1e-3, f"ε should be ~0, got {eps.max().item()}"


	@check("§3.5: occluded pixels (ε > τ_occ) reset variance to P_max")
	def _():
	from ppd.lpd.temporal_kalman import TemporalKalmanFilter, TemporalKalmanConfig
	kf = TemporalKalmanFilter(
	shape=(1, 1, 8, 8), device=torch.device("cpu"),
	config=TemporalKalmanConfig(P_max=99.0, occ_threshold=0.5),
	)
	kf.mu.fill_(1.0); kf.P.fill_(0.1); kf.has_state = True
	f_fwd = torch.zeros(1, 2, 8, 8); f_fwd[:, 0] = 5.0 # 5px fwd
	f_bwd = torch.zeros_like(f_fwd) # no return ⇒ ε = 5
	kf.predict(f_fwd, f_bwd)
	assert kf.P.max() >= 99.0


	@check("§3.5: update step Kalman gain K = P / (P + R) at observed pixels")
	def _():
	from ppd.lpd.temporal_kalman import TemporalKalmanFilter, TemporalKalmanConfig
	kf = TemporalKalmanFilter(
	shape=(1, 1, 4, 4), device=torch.device("cpu"),
	config=TemporalKalmanConfig(R=0.1, P_init=1.0),
	)
	sd = torch.full((1, 1, 4, 4), 0.5)
	sm = torch.ones_like(sd)
	mu, P = kf.update(sd, sm)
	K = 1.0 / (1.0 + 0.1)
	expected_mu = 0.0 + K * (0.5 - 0.0)
	expected_P = (1 - K) * 1.0
	assert (mu - expected_mu).abs().max() < 1e-5
	assert (P - expected_P).abs().max() < 1e-5


	@check("§3.5: at unobserved pixels (mask=0), state passes through unchanged")
	def _():
	from ppd.lpd.temporal_kalman import TemporalKalmanFilter, TemporalKalmanConfig
	kf = TemporalKalmanFilter(
	shape=(1, 1, 4, 4), device=torch.device("cpu"),
	config=TemporalKalmanConfig(R=0.1, P_init=0.5),
	)
	kf.mu.fill_(0.7)
	sd = torch.zeros(1, 1, 4, 4)
	sm = torch.zeros_like(sd) # nothing observed
	mu, P = kf.update(sd, sm)
	assert (mu - 0.7).abs().max() < 1e-6
	assert (P - 0.5).abs().max() < 1e-6


	@check("§3.5: metric uncertainty = exp(sqrt(P)) - 1")
	def _():
	from ppd.lpd.temporal_kalman import TemporalKalmanFilter, TemporalKalmanConfig
	kf = TemporalKalmanFilter(
	shape=(1, 1, 1, 1), device=torch.device("cpu"),
	config=TemporalKalmanConfig(P_init=0.25),
	)
	expected = math.exp(math.sqrt(0.25)) - 1
	actual = kf.metric_uncertainty().item()
	assert abs(actual - expected) < 1e-5


	# ----------------------------------------------------------------- §3.6 modulation
	@check("§3.6 Eq.(8): ρ̃(p) = ρ(p) · (1 + P(p)/max P)")
	def _():
	from ppd.lpd.uncertainty_modulation import modulate_density
	rho = torch.full((1, 4, 1), 0.5)
	P_full = torch.tensor([0.0, 0.5, 1.0, 2.0]).reshape(1, 1, 1, 4)
	rho_tilde = modulate_density(rho, P_full)
	# max P = 2.0; ρ̃ = 0.5 * (1 + P/2.0)
	expected = 0.5 * (1 + P_full.squeeze(2).squeeze(1).reshape(1, 4, 1) / 2.0)
	assert_close(rho_tilde, expected, "Eq.(8) modulation")


	# ----------------------------------------------------------------- §3.7 training
	@check("§3.7: anchor loss is L1(x̂_0 - y) over observed pixels")
	def _():
	from ppd.lpd.losses import anchor_loss
	x = torch.tensor([[[[0.0, 0.5, 1.0, 0.0]]]]).float()
	y = torch.tensor([[[[0.5, 0.5, 0.5, 0.0]]]]).float()
	m = torch.tensor([[[[1.0, 1.0, 1.0, 0.0]]]])
	# observed diffs: \|0-0.5\|+\|0.5-0.5\|+\|1-0.5\|=1.0; \|M\|=3 → 1/3
	loss = anchor_loss(x, y, m).item()
	assert abs(loss - 1.0 / 3) < 1e-6


	@check("§3.7: total training loss combines MSE + λ_a anchor + λ_g grad")
	def _():
	src = inspect.getsource(__import__("ppd.lpd.lpd_train", fromlist=["LiDARPerfectDepth"]))
	assert "lambda_anchor" in src
	assert "anchor_loss" in src
	assert "multi_scale_grad_loss" in src


	@check("§3.7: backbone freeze leaves only prompt-encoder + gate trainable")
	def _():
	from ppd.lpd.lpd_dit import LPDDiT
	m = LPDDiT(hidden_size=128, depth=4, num_heads=4, patch_size=8)
	m.freeze_backbone()
	for n, p in m.named_parameters():
	if p.requires_grad:
	assert n.startswith("sparse_prompt_encoder") or n.startswith("prompt_gate"), \
	f"unexpected trainable: {n}"


	# ----------------------------------------------------------------- §4.1 sparse simulator
	@check("§4.1: sparse simulator implements random / scan_line / grid / hybrid")
	def _():
	from ppd.lpd.sparse_simulator import simulate
	d = torch.ones(1, 1, 32, 32); m = torch.ones_like(d, dtype=torch.bool)
	for pat in ["random", "scan_line", "grid", "hybrid"]:
	sd, sm = simulate(d, m, pattern=pat, density=0.05, n_lines=4,
	line_density=0.5, grid_stride=8, min_points=4)
	assert sm.sum() > 0, f"{pat} produced no observations"
	# depth should be zero where mask is false
	assert (sd[~sm] == 0).all().item()


	# ----------------------------------------------------------------- §4.4 implementation
	@check("§4.4: temporal Kalman defaults — R=0.01, Q_base=0.005, α=0.5, P_max=10, τ_occ=2.0")
	def _():
	from ppd.lpd.temporal_kalman import TemporalKalmanConfig
	c = TemporalKalmanConfig()
	assert c.R == 0.01
	assert c.Q_base == 0.005
	assert c.alpha == 0.5
	assert c.P_max == 10.0
	assert c.occ_threshold == 2.0


	@check("§4.4: posterior projection R_proj defaults to 0.1")
	def _():
	from ppd.lpd.kalman_in_loop import KalmanInLoopConfig
	c = KalmanInLoopConfig()
	assert c.R_proj == 0.1


	@check("§4.4: PPD weights load with smart partial loading (strict=False)")
	def _():
	src = inspect.getsource(__import__("ppd.lpd.lpd_train", fromlist=["LiDARPerfectDepth"]))
	assert "strict=False" in src
	assert "_load_ppd_weights" in src


	# ----------------------------------------------------------------- end-to-end shape sanity
	@check("end-to-end: LPDDiT forward at training resolution returns (B,1,H,W)")
	def _():
	from ppd.lpd.lpd_dit import LPDDiT
	m = LPDDiT(hidden_size=128, depth=4, num_heads=4, patch_size=8)
	B, H, W = 1, 64, 64
	x = torch.randn(B, 4, H, W)
	sem = torch.randn(B, (H // 16) * (W // 16), 1024)
	t = torch.tensor([100.0])
	sd = torch.zeros(B, 1, H, W); sd[:, :, ::8, ::8] = 0.3
	sm = (sd > 0)
	out = m(x, sem, t, sparse_depth=sd, sparse_mask=sm)
	assert out.shape == (B, 1, H, W)


	@check("end-to-end: KIL sampler produces depth + variance maps with the right shapes")
	def _():
	from ppd.utils.diffusion.timesteps import Timesteps
	from ppd.utils.diffusion.schedule import LinearSchedule
	from ppd.utils.diffusion.sampler import EulerSampler
	from ppd.lpd.kalman_in_loop import kalman_in_loop_sample, KalmanInLoopConfig
	sched = LinearSchedule(T=1000)
	ts = Timesteps(T=1000, steps=4, device=torch.device("cpu"))
	sampler = EulerSampler(schedule=sched, timesteps=ts, prediction_type="velocity")
	B, H, W = 1, 32, 32
	x_T = torch.randn(B, 1, H, W)
	cond = torch.randn(B, 3, H, W)
	sd = torch.zeros(B, 1, H, W); sm = torch.zeros_like(sd)
	def predict(x_tau, tau): return torch.zeros_like(x_tau)
	out, P = kalman_in_loop_sample(
	dit_predict_x0=predict, sampler=sampler,
	timesteps=list(ts), x_T=x_T, cond=cond,
	semantics_fn=lambda: None,
	sparse_depth=sd, sparse_mask=sm,
	config=KalmanInLoopConfig(),
	)
	assert out.shape == (B, 1, H, W)
	assert P.shape == (B, 1, H, W)


	# ----------------------------------------------------------------- runner
	def main() -> int:
	ok = fail = 0
	width = max(len(name) for name, _ in CHECKS)
	for name, fn in CHECKS:
	try:
	fn()
	print(f" ✓ {name.ljust(width)}")
	ok += 1
	except Exception as e:
	print(f" ✗ {name.ljust(width)} → {type(e).__name__}: {e}")
	fail += 1
	print(f"\n{ok} passed, {fail} failed (of {len(CHECKS)})")
	return 0 if fail == 0 else 1


	if __name__ == "__main__":
	sys.path.insert(0, os.getcwd())
	sys.exit(main())