phanerozoic commited on 11 days ago

Commit

0e8110e

verified ·

1 Parent(s): 714b88b

8 segmentation head candidates with shared losses/utils and registry

Browse files

Files changed (21) hide show

heads/__init__.py +28 -0
heads/cofiber_linear/__init__.py +1 -0
heads/cofiber_linear/head.py +36 -0
heads/cofiber_threshold/__init__.py +1 -0
heads/cofiber_threshold/head.py +41 -0
heads/graph_crf/__init__.py +1 -0
heads/graph_crf/head.py +41 -0
heads/hypercolumn_linear/__init__.py +1 -0
heads/hypercolumn_linear/head.py +29 -0
heads/linear_probe/__init__.py +1 -0
heads/linear_probe/head.py +15 -0
heads/patch_attention/__init__.py +1 -0
heads/patch_attention/head.py +35 -0
heads/prototype_bank/__init__.py +1 -0
heads/prototype_bank/head.py +22 -0
heads/wavelet/__init__.py +1 -0
heads/wavelet/head.py +31 -0
losses/__init__.py +0 -0
losses/segmentation.py +11 -0
utils/__init__.py +0 -0
utils/decode.py +11 -0

heads/__init__.py ADDED Viewed

	@@ -0,0 +1,28 @@

+"""Segmentation head registry."""
+from .linear_probe.head import LinearProbe
+from .cofiber_linear.head import CofiberLinear
+from .cofiber_threshold.head import CofiberThreshold
+from .prototype_bank.head import PrototypeBank
+from .wavelet.head import Wavelet
+from .patch_attention.head import PatchAttention
+from .graph_crf.head import GraphCRF
+from .hypercolumn_linear.head import HypercolumnLinear
+REGISTRY = {
+    "linear_probe": LinearProbe,
+    "cofiber_linear": CofiberLinear,
+    "cofiber_threshold": CofiberThreshold,
+    "prototype_bank": PrototypeBank,
+    "wavelet": Wavelet,
+    "patch_attention": PatchAttention,
+    "graph_crf": GraphCRF,
+    "hypercolumn_linear": HypercolumnLinear,
+}
+ALL_NAMES = list(REGISTRY.keys())
+def get_head(name):
+    if name not in REGISTRY:
+        raise ValueError(f"Unknown head: {name}. Available: {ALL_NAMES}")
+    return REGISTRY[name]()

heads/cofiber_linear/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/cofiber_linear/head.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Cofiber Linear: analytic multi-scale decomposition + shared 1x1 conv per scale."""
+import torch.nn as nn
+import torch.nn.functional as F
+def cofiber_decompose(f, n_scales):
+    cofibers = []
+    residual = f
+    for _ in range(n_scales - 1):
+        omega = F.avg_pool2d(residual, 2)
+        sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False)
+        cofibers.append(residual - sigma_omega)
+        residual = omega
+    cofibers.append(residual)
+    return cofibers
+class CofiberLinear(nn.Module):
+    name = "cofiber_linear"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, num_classes=150, n_scales=3):
+        super().__init__()
+        self.n_scales = n_scales
+        self.conv = nn.Conv2d(feat_dim, num_classes, 1)
+    def forward(self, spatial, inter=None):
+        cofibers = cofiber_decompose(spatial, self.n_scales)
+        target_size = spatial.shape[2:]
+        logits = None
+        for cof in cofibers:
+            out = self.conv(cof)
+            out = F.interpolate(out, size=target_size, mode="bilinear", align_corners=False)
+            logits = out if logits is None else logits + out
+        return logits

heads/cofiber_threshold/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/cofiber_threshold/head.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""Cofiber Threshold: analytic decomposition + per-scale LayerNorm + prototype classification."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def cofiber_decompose(f, n_scales):
+    cofibers = []
+    residual = f
+    for _ in range(n_scales - 1):
+        omega = F.avg_pool2d(residual, 2)
+        sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False)
+        cofibers.append(residual - sigma_omega)
+        residual = omega
+    cofibers.append(residual)
+    return cofibers
+class CofiberThreshold(nn.Module):
+    name = "cofiber_threshold"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, num_classes=150, n_scales=3):
+        super().__init__()
+        self.n_scales = n_scales
+        self.scale_norms = nn.ModuleList([nn.LayerNorm(feat_dim) for _ in range(n_scales)])
+        self.prototypes = nn.Parameter(torch.randn(num_classes, feat_dim) * 0.01)
+        self.proto_bias = nn.Parameter(torch.zeros(num_classes))
+    def forward(self, spatial, inter=None):
+        cofibers = cofiber_decompose(spatial, self.n_scales)
+        target_size = spatial.shape[2:]
+        logits = None
+        for i, cof in enumerate(cofibers):
+            B, C, H, W = cof.shape
+            f = self.scale_norms[i](cof.permute(0, 2, 3, 1).reshape(-1, C))
+            out = (f @ self.prototypes.T + self.proto_bias).reshape(B, H, W, -1).permute(0, 3, 1, 2)
+            out = F.interpolate(out, size=target_size, mode="bilinear", align_corners=False)
+            logits = out if logits is None else logits + out
+        return logits

heads/graph_crf/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/graph_crf/head.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""Graph CRF: k-NN graph in feature space + message passing + per-node classification."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class GraphCRF(nn.Module):
+    name = "graph_crf"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, num_classes=150, dim=256, k=8, rounds=2):
+        super().__init__()
+        self.k = k
+        self.rounds = rounds
+        self.proj = nn.Linear(feat_dim, dim)
+        self.msg_layers = nn.ModuleList()
+        for _ in range(rounds):
+            self.msg_layers.append(nn.ModuleDict({
+                "msg": nn.Linear(dim, dim),
+                "gate": nn.Sequential(nn.Linear(dim, dim), nn.Sigmoid()),
+                "upd": nn.Linear(dim, dim),
+                "norm": nn.LayerNorm(dim),
+            }))
+        self.cls_head = nn.Linear(dim, num_classes)
+    def forward(self, spatial, inter=None):
+        B, C, H, W = spatial.shape
+        tokens = self.proj(spatial.flatten(2).permute(0, 2, 1))
+        N = tokens.shape[1]
+        with torch.no_grad():
+            sim = torch.bmm(F.normalize(tokens, dim=-1),
+                            F.normalize(tokens, dim=-1).transpose(1, 2))
+            _, knn_idx = sim.topk(self.k, dim=-1)
+        for layer in self.msg_layers:
+            neighbors = tokens.gather(1, knn_idx.reshape(B, -1, 1).expand(-1, -1, tokens.shape[-1])).reshape(B, N, self.k, -1)
+            msg = layer["msg"](neighbors.mean(dim=2))
+            gate = layer["gate"](tokens)
+            tokens = layer["norm"](tokens + layer["upd"](gate * msg))
+        logits = self.cls_head(tokens).reshape(B, H, W, -1).permute(0, 3, 1, 2)
+        return logits

heads/hypercolumn_linear/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/hypercolumn_linear/head.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""Hypercolumn Linear: concatenate features from intermediate blocks, single linear layer."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+N_PREFIX = 5
+class HypercolumnLinear(nn.Module):
+    name = "hypercolumn_linear"
+    needs_intermediates = True
+    def __init__(self, feat_dim=768, num_classes=150, n_blocks=4):
+        super().__init__()
+        self.n_blocks = n_blocks
+        self.conv = nn.Conv2d(feat_dim * n_blocks, num_classes, 1)
+    def forward(self, spatial, inter=None):
+        B, C, H, W = spatial.shape
+        if inter is None:
+            raise ValueError("hypercolumn_linear requires intermediate block features")
+        spatials = []
+        for feat in inter:
+            patches = feat[:, N_PREFIX:, :]
+            s = patches.permute(0, 2, 1).reshape(B, C, H, W)
+            spatials.append(s)
+        stacked = torch.cat(spatials, dim=1)
+        return self.conv(stacked)

heads/linear_probe/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/linear_probe/head.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""Linear probe: BatchNorm + 1x1 conv. The EUPE paper baseline."""
+import torch.nn as nn
+class LinearProbe(nn.Module):
+    name = "linear_probe"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, num_classes=150):
+        super().__init__()
+        self.bn = nn.BatchNorm2d(feat_dim)
+        self.conv = nn.Conv2d(feat_dim, num_classes, 1)
+    def forward(self, spatial, inter=None):
+        return self.conv(self.bn(spatial))

heads/patch_attention/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/patch_attention/head.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""Patch Attention: each patch attends to k nearest neighbors before classifying."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PatchAttention(nn.Module):
+    name = "patch_attention"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, num_classes=150, dim=256, k=16):
+        super().__init__()
+        self.k = k
+        self.proj = nn.Linear(feat_dim, dim)
+        self.attn = nn.MultiheadAttention(dim, 4, batch_first=True)
+        self.norm = nn.LayerNorm(dim)
+        self.cls_head = nn.Linear(dim, num_classes)
+    def forward(self, spatial, inter=None):
+        B, C, H, W = spatial.shape
+        tokens = self.proj(spatial.flatten(2).permute(0, 2, 1))
+        with torch.no_grad():
+            sim = torch.bmm(F.normalize(tokens, dim=-1),
+                            F.normalize(tokens, dim=-1).transpose(1, 2))
+            _, knn_idx = sim.topk(self.k, dim=-1)
+        # Gather k-NN for each token as KV, self-attend
+        N = tokens.shape[1]
+        kv = tokens.gather(1, knn_idx.reshape(B, -1, 1).expand(-1, -1, tokens.shape[-1])).reshape(B, N, self.k, -1)
+        kv_flat = kv.reshape(B * N, self.k, -1)
+        q = tokens.reshape(B * N, 1, -1)
+        out, _ = self.attn(q, kv_flat, kv_flat)
+        out = self.norm(tokens + out.reshape(B, N, -1))
+        logits = self.cls_head(out).reshape(B, H, W, -1).permute(0, 3, 1, 2)
+        return logits

heads/prototype_bank/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/prototype_bank/head.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""Prototype Bank: learned class prototypes, per-pixel cosine similarity, argmax. No conv layers."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PrototypeBank(nn.Module):
+    name = "prototype_bank"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, num_classes=150):
+        super().__init__()
+        self.prototypes = nn.Parameter(torch.randn(num_classes, feat_dim) * 0.01)
+        self.scale = nn.Parameter(torch.ones(1) * 10.0)
+    def forward(self, spatial, inter=None):
+        B, C, H, W = spatial.shape
+        f = F.normalize(spatial.permute(0, 2, 3, 1).reshape(-1, C), dim=-1)
+        p = F.normalize(self.prototypes, dim=-1)
+        logits = (f @ p.T * self.scale).reshape(B, H, W, -1).permute(0, 3, 1, 2)
+        return logits

heads/wavelet/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .head import *

heads/wavelet/head.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""Wavelet: Haar decomposition + per-subband classification."""
+import torch.nn as nn
+import torch.nn.functional as F
+class Wavelet(nn.Module):
+    name = "wavelet"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, num_classes=150, n_scales=3):
+        super().__init__()
+        self.n_scales = n_scales
+        self.heads = nn.ModuleList([nn.Conv2d(feat_dim, num_classes, 1) for _ in range(n_scales)])
+    @staticmethod
+    def haar_down(x):
+        return (x[:, :, 0::2, 0::2] + x[:, :, 0::2, 1::2] +
+                x[:, :, 1::2, 0::2] + x[:, :, 1::2, 1::2]) / 4
+    def forward(self, spatial, inter=None):
+        target_size = spatial.shape[2:]
+        f = spatial
+        logits = None
+        for i in range(self.n_scales):
+            out = self.heads[i](f)
+            out = F.interpolate(out, size=target_size, mode="bilinear", align_corners=False)
+            logits = out if logits is None else logits + out
+            if i < self.n_scales - 1:
+                f = self.haar_down(f)
+        return logits

losses/__init__.py ADDED Viewed

File without changes

losses/segmentation.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""Segmentation losses."""
+import torch
+import torch.nn.functional as F
+def cross_entropy_loss(logits, targets, ignore_index=255):
+    """Standard per-pixel cross entropy. logits: [B, C, H, W], targets: [B, H, W]."""
+    if logits.shape[2:] != targets.shape[1:]:
+        logits = F.interpolate(logits, size=targets.shape[1:], mode="bilinear", align_corners=False)
+    return F.cross_entropy(logits, targets, ignore_index=ignore_index)

utils/__init__.py ADDED Viewed

File without changes

utils/decode.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""Shared utilities for segmentation heads."""
+import torch
+import torch.nn.functional as F
+def upsample_and_argmax(logits, target_size):
+    """Upsample logits to target spatial size and return class indices."""
+    if logits.shape[2:] != target_size:
+        logits = F.interpolate(logits, size=target_size, mode="bilinear", align_corners=False)
+    return logits.argmax(dim=1)