phanerozoic commited on 13 days ago

Commit

a103957

verified ·

1 Parent(s): 4f1f9bf

7 depth head candidates with shared losses/utils and registry

Browse files

Files changed (18) hide show

heads/__init__.py +26 -0
heads/cofiber_linear/__init__.py +1 -0
heads/cofiber_linear/head.py +43 -0
heads/cofiber_threshold/__init__.py +1 -0
heads/cofiber_threshold/head.py +47 -0
heads/linear_probe/__init__.py +1 -0
heads/linear_probe/head.py +24 -0
heads/log_linear/__init__.py +1 -0
heads/log_linear/head.py +19 -0
heads/multiscale_gradient/__init__.py +1 -0
heads/multiscale_gradient/head.py +51 -0
heads/ordinal_regression/__init__.py +1 -0
heads/ordinal_regression/head.py +24 -0
heads/wavelet/__init__.py +1 -0
heads/wavelet/head.py +40 -0
losses/__init__.py +0 -0
losses/depth.py +28 -0
utils/__init__.py +0 -0

heads/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+"""Depth head registry."""
+from .linear_probe.head import LinearProbe
+from .cofiber_linear.head import CofiberLinear
+from .cofiber_threshold.head import CofiberThreshold
+from .wavelet.head import Wavelet
+from .log_linear.head import LogLinear
+from .ordinal_regression.head import OrdinalRegression
+from .multiscale_gradient.head import MultiscaleGradient
+REGISTRY = {
+    "linear_probe": LinearProbe,
+    "cofiber_linear": CofiberLinear,
+    "cofiber_threshold": CofiberThreshold,
+    "wavelet": Wavelet,
+    "log_linear": LogLinear,
+    "ordinal_regression": OrdinalRegression,
+    "multiscale_gradient": MultiscaleGradient,
+}
+ALL_NAMES = list(REGISTRY.keys())
+def get_head(name):
+    if name not in REGISTRY:
+        raise ValueError(f"Unknown head: {name}. Available: {ALL_NAMES}")
+    return REGISTRY[name]()

heads/cofiber_linear/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/cofiber_linear/head.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""Cofiber Linear: analytic decomposition + shared depth bin prediction per scale."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def cofiber_decompose(f, n_scales):
+    cofibers = []
+    residual = f
+    for _ in range(n_scales - 1):
+        omega = F.avg_pool2d(residual, 2)
+        sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False)
+        cofibers.append(residual - sigma_omega)
+        residual = omega
+    cofibers.append(residual)
+    return cofibers
+class CofiberLinear(nn.Module):
+    name = "cofiber_linear"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, n_bins=256, min_depth=0.001, max_depth=10.0, n_scales=3):
+        super().__init__()
+        self.n_scales = n_scales
+        self.n_bins = n_bins
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+        self.conv = nn.Conv2d(feat_dim, n_bins, 1)
+    def forward(self, spatial, inter=None):
+        cofibers = cofiber_decompose(spatial, self.n_scales)
+        target_size = spatial.shape[2:]
+        logits = None
+        for cof in cofibers:
+            out = self.conv(cof)
+            out = F.interpolate(out, size=target_size, mode="bilinear", align_corners=False)
+            logits = out if logits is None else logits + out
+        dist = torch.relu(logits) + 0.1
+        dist = dist / dist.sum(dim=1, keepdim=True)
+        bins = torch.linspace(self.min_depth, self.max_depth, self.n_bins, device=spatial.device)
+        return torch.einsum("bkhw,k->bhw", dist, bins).unsqueeze(1)

heads/cofiber_threshold/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/cofiber_threshold/head.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""Cofiber Threshold: analytic decomposition + per-scale LayerNorm + prototype depth prediction."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def cofiber_decompose(f, n_scales):
+    cofibers = []
+    residual = f
+    for _ in range(n_scales - 1):
+        omega = F.avg_pool2d(residual, 2)
+        sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False)
+        cofibers.append(residual - sigma_omega)
+        residual = omega
+    cofibers.append(residual)
+    return cofibers
+class CofiberThreshold(nn.Module):
+    name = "cofiber_threshold"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, n_bins=256, min_depth=0.001, max_depth=10.0, n_scales=3):
+        super().__init__()
+        self.n_scales = n_scales
+        self.n_bins = n_bins
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+        self.scale_norms = nn.ModuleList([nn.LayerNorm(feat_dim) for _ in range(n_scales)])
+        self.weight = nn.Parameter(torch.randn(n_bins, feat_dim) * 0.01)
+        self.bias = nn.Parameter(torch.zeros(n_bins))
+    def forward(self, spatial, inter=None):
+        cofibers = cofiber_decompose(spatial, self.n_scales)
+        target_size = spatial.shape[2:]
+        logits = None
+        for i, cof in enumerate(cofibers):
+            B, C, H, W = cof.shape
+            f = self.scale_norms[i](cof.permute(0, 2, 3, 1).reshape(-1, C))
+            out = (f @ self.weight.T + self.bias).reshape(B, H, W, -1).permute(0, 3, 1, 2)
+            out = F.interpolate(out, size=target_size, mode="bilinear", align_corners=False)
+            logits = out if logits is None else logits + out
+        dist = torch.relu(logits) + 0.1
+        dist = dist / dist.sum(dim=1, keepdim=True)
+        bins = torch.linspace(self.min_depth, self.max_depth, self.n_bins, device=spatial.device)
+        return torch.einsum("bkhw,k->bhw", dist, bins).unsqueeze(1)

heads/linear_probe/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/linear_probe/head.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""Linear probe: BatchNorm + 1x1 conv -> 256 depth bins. The EUPE paper baseline."""
+import torch
+import torch.nn as nn
+class LinearProbe(nn.Module):
+    name = "linear_probe"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, n_bins=256, min_depth=0.001, max_depth=10.0):
+        super().__init__()
+        self.bn = nn.BatchNorm2d(feat_dim)
+        self.conv = nn.Conv2d(feat_dim, n_bins, 1)
+        self.n_bins = n_bins
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+    def forward(self, spatial, inter=None):
+        logits = self.conv(self.bn(spatial))
+        dist = torch.relu(logits) + 0.1
+        dist = dist / dist.sum(dim=1, keepdim=True)
+        bins = torch.linspace(self.min_depth, self.max_depth, self.n_bins, device=spatial.device)
+        return torch.einsum("bkhw,k->bhw", dist, bins).unsqueeze(1)

heads/log_linear/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/log_linear/head.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""Log-Linear: predict log-depth with a single linear layer. 769 parameters."""
+import torch
+import torch.nn as nn
+class LogLinear(nn.Module):
+    name = "log_linear"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, min_depth=0.001, max_depth=10.0):
+        super().__init__()
+        self.conv = nn.Conv2d(feat_dim, 1, 1)
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+    def forward(self, spatial, inter=None):
+        log_depth = self.conv(spatial)
+        return log_depth.exp().clamp(self.min_depth, self.max_depth)

heads/multiscale_gradient/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/multiscale_gradient/head.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""Multi-scale Gradient: predict depth gradients per cofiber scale, integrate for absolute depth.
+Cofiber features are inherently edge-like; predicting gradients aligns with the feature structure."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def cofiber_decompose(f, n_scales):
+    cofibers = []
+    residual = f
+    for _ in range(n_scales - 1):
+        omega = F.avg_pool2d(residual, 2)
+        sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False)
+        cofibers.append(residual - sigma_omega)
+        residual = omega
+    cofibers.append(residual)
+    return cofibers
+class MultiscaleGradient(nn.Module):
+    name = "multiscale_gradient"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, n_scales=3, min_depth=0.001, max_depth=10.0):
+        super().__init__()
+        self.n_scales = n_scales
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+        # Per-scale: predict dx and dy gradients
+        self.grad_heads = nn.ModuleList([nn.Conv2d(feat_dim, 2, 1) for _ in range(n_scales)])
+        # Base depth from coarsest residual
+        self.base_head = nn.Conv2d(feat_dim, 1, 1)
+    def forward(self, spatial, inter=None):
+        cofibers = cofiber_decompose(spatial, self.n_scales)
+        target_size = spatial.shape[2:]
+        # Base depth from coarsest scale
+        base = self.base_head(cofibers[-1])
+        base = F.interpolate(base, size=target_size, mode="bilinear", align_corners=False)
+        depth = base
+        # Add integrated gradients from each finer scale
+        for i in range(self.n_scales - 1):
+            grads = self.grad_heads[i](cofibers[i])
+            grads = F.interpolate(grads, size=target_size, mode="bilinear", align_corners=False)
+            dx, dy = grads[:, 0:1], grads[:, 1:2]
+            depth = depth + dx.cumsum(dim=3) + dy.cumsum(dim=2)
+        return depth.clamp(self.min_depth, self.max_depth)

heads/ordinal_regression/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/ordinal_regression/head.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""Ordinal Regression: predict 'is this pixel deeper than threshold t?' for K thresholds.
+Each threshold is a 768->1 linear classifier. Depth = sum of positive predictions * bin width."""
+import torch
+import torch.nn as nn
+class OrdinalRegression(nn.Module):
+    name = "ordinal_regression"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, n_thresholds=64, min_depth=0.001, max_depth=10.0):
+        super().__init__()
+        self.conv = nn.Conv2d(feat_dim, n_thresholds, 1)
+        self.n_thresholds = n_thresholds
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+    def forward(self, spatial, inter=None):
+        logits = self.conv(spatial)
+        probs = torch.sigmoid(logits)
+        bin_width = (self.max_depth - self.min_depth) / self.n_thresholds
+        depth = self.min_depth + probs.sum(dim=1, keepdim=True) * bin_width
+        return depth

heads/wavelet/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/wavelet/head.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""Wavelet: Haar decomposition + per-subband depth bin prediction."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Wavelet(nn.Module):
+    name = "wavelet"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, n_bins=256, min_depth=0.001, max_depth=10.0, n_scales=3):
+        super().__init__()
+        self.n_scales = n_scales
+        self.n_bins = n_bins
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+        self.heads = nn.ModuleList([nn.Conv2d(feat_dim, n_bins, 1) for _ in range(n_scales)])
+    @staticmethod
+    def haar_down(x):
+        h, w = x.shape[2], x.shape[3]
+        x = x[:, :, :h - h % 2, :w - w % 2]
+        return (x[:, :, 0::2, 0::2] + x[:, :, 0::2, 1::2] +
+                x[:, :, 1::2, 0::2] + x[:, :, 1::2, 1::2]) / 4
+    def forward(self, spatial, inter=None):
+        target_size = spatial.shape[2:]
+        f = spatial
+        logits = None
+        for i in range(self.n_scales):
+            out = self.heads[i](f)
+            out = F.interpolate(out, size=target_size, mode="bilinear", align_corners=False)
+            logits = out if logits is None else logits + out
+            if i < self.n_scales - 1:
+                f = self.haar_down(f)
+        dist = torch.relu(logits) + 0.1
+        dist = dist / dist.sum(dim=1, keepdim=True)
+        bins = torch.linspace(self.min_depth, self.max_depth, self.n_bins, device=spatial.device)
+        return torch.einsum("bkhw,k->bhw", dist, bins).unsqueeze(1)

losses/__init__.py ADDED Viewed

File without changes

losses/depth.py ADDED Viewed

	@@ -0,0 +1,28 @@

+"""Depth losses."""
+import torch
+import torch.nn.functional as F
+def silog_loss(pred, target, mask=None, variance_focus=0.85):
+    """Scale-invariant logarithmic loss."""
+    pred = pred.flatten(1)
+    target = target.flatten(1)
+    if mask is not None:
+        mask = mask.flatten(1).bool()
+        pred = pred[mask]
+        target = target[mask]
+    else:
+        pred = pred.reshape(-1)
+        target = target.reshape(-1)
+    pred = pred.clamp(min=1e-6)
+    target = target.clamp(min=1e-6)
+    d = torch.log(pred) - torch.log(target)
+    return torch.sqrt((d ** 2).mean() - variance_focus * (d.mean() ** 2) + 1e-8)
+def l1_depth_loss(pred, target, mask=None):
+    """Simple L1 loss on depth values."""
+    if mask is not None:
+        return F.l1_loss(pred[mask], target[mask])
+    return F.l1_loss(pred, target)

utils/__init__.py ADDED Viewed

File without changes