AbstractPhil
/

grid-geometric-classifier-sliding-proto

@@ -1,11 +1,10 @@
 """
-Cell 4: Multi-Scale Geometric Extraction Pipeline
-===================================================
 Run after Cells 1-3. Uses globals from prior cells.
-Updated for PatchCrossAttentionClassifier (no Conv3d).
-Defines extraction functions and the MultiScaleExtractor class.
-Does NOT execute anything — Cell 5 uses these.
 """
 import numpy as np
@@ -20,8 +19,8 @@ import math
 class ExtractionConfig:
     canonical_shape: Tuple[int, int, int] = (8, 16, 16)
     scales: List[Tuple[int, int, int]] = field(default_factory=lambda: [
-        (32, 64, 64),   # L0: full latent
-        (16, 32, 32),   # L1: regional
         (8, 16, 16),    # L2: native patch
         (4, 8, 8),      # L3: fine detail
     ])
@@ -30,6 +29,7 @@ class ExtractionConfig:
     min_occupancy: float = 0.005
     binarize_percentiles: List[float] = field(default_factory=lambda: [75, 90, 95])
     n_channel_groups: int = 8
     device: str = 'cuda'
@@ -48,53 +48,116 @@ class GeometricAnnotation:
     channel_group_pair: Optional[Tuple[int, int]] = None
-def extract_patches_sliding(volume, patch_size, overlap=0.5):
-    """Extract overlapping patches from a 3D volume."""
     D, H, W = volume.shape
     pz, py, px = patch_size
-    if D < pz or H < py or W < px:
-        pad_d = max(pz - D, 0)
-        pad_h = max(py - H, 0)
-        pad_w = max(px - W, 0)
         volume = F.pad(volume, (0, pad_w, 0, pad_h, 0, pad_d))
         D, H, W = volume.shape
-    stride_z = max(1, int(pz * (1 - overlap)))
-    stride_y = max(1, int(py * (1 - overlap)))
-    stride_x = max(1, int(px * (1 - overlap)))
-    patches = []
-    for z in range(0, max(1, D - pz + 1), stride_z):
-        for y in range(0, max(1, H - py + 1), stride_y):
-            for x in range(0, max(1, W - px + 1), stride_x):
-                patch = volume[z:z+pz, y:y+py, x:x+px]
-                patches.append((patch, (z, y, x)))
-    return patches
-def resize_to_canonical(patch, target=(8, 16, 16)):
-    """Resize 3D patch to canonical resolution via trilinear interpolation."""
-    x = patch.unsqueeze(0).unsqueeze(0).float()
-    x = F.interpolate(x, size=target, mode='trilinear', align_corners=False)
-    return x.squeeze(0).squeeze(0)
-def binarize_continuous(patch, percentiles=[75, 90, 95]):
-    """Binarize continuous patch at multiple percentile thresholds."""
-    flat = patch.flatten()
-    nonzero = flat[flat.abs() > 1e-8]
-    if len(nonzero) < 10:
-        return [torch.zeros_like(patch)] * len(percentiles)
-    thresholds = [torch.quantile(nonzero.abs(), p / 100.0).item() for p in percentiles]
-    return [(patch.abs() >= t).float() for t in thresholds]
 def cluster_channels(latents, n_groups=8):
     """
     Cluster VAE channels by correlation.
-    latents: (N, C, H, W) batch
     Returns: (groups, corr_matrix)
     """
     N, C, H, W = latents.shape
@@ -153,164 +216,153 @@ def cluster_channels(latents, n_groups=8):
 def compute_inter_group_deviances(latent, groups):
     """
-    Compute (H, W) deviance maps between channel groups.
     latent: (C, H, W), groups: list of channel index lists
-    Returns: list of ((group_i, group_j), deviance_map)
     """
-    group_means = torch.stack([latent[grp].mean(dim=0) for grp in groups])
     n = len(groups)
-    deviances = []
-    for i in range(n):
-        for j in range(i + 1, n):
-            dev = (group_means[i] - group_means[j]).abs()
-            deviances.append(((i, j), dev))
-    return deviances
-def deviance_maps_to_3d(deviances, n_groups):
-    """Stack deviance maps into (n_pairs, H, W) volume."""
-    return torch.stack([dev for (_, dev) in deviances], dim=0)
 class MultiScaleExtractor:
     """
-    Confidence-cascaded multi-scale geometric extractor.
-    Uses trained PatchCrossAttentionClassifier (from Cell 2 globals).
     """
     def __init__(self, classifier, config=None):
         self.classifier = classifier
         self.config = config or ExtractionConfig()
         self.classifier.eval()
     @torch.no_grad()
-    def classify_patches(self, patches, max_batch=512):
-        """Classify batch of (B, 8, 16, 16) patches with chunking to avoid OOM."""
-        device = next(self.classifier.parameters()).device
         N = patches.shape[0]
         all_results = []
-        for start in range(0, N, max_batch):
-            chunk = patches[start:start+max_batch].to(device)
             out = self.classifier(chunk)
             probs = F.softmax(out["class_logits"], dim=-1)
             max_prob, pred_class = probs.max(dim=-1)
             top2 = probs.topk(2, dim=-1).values
             margin = top2[:, 0] - top2[:, 1]
-            dim_pred = out["dim_logits"].argmax(dim=-1)
-            curved_pred = (out["is_curved_pred"].squeeze(-1) > 0.0)
-            curv_type_pred = out["curv_type_logits"].argmax(dim=-1)
             all_results.append({
                 "pred_class": pred_class.cpu(),
                 "confidence": margin.cpu(),
                 "max_prob": max_prob.cpu(),
-                "dim_pred": dim_pred.cpu(),
-                "curved_pred": curved_pred.cpu(),
-                "curv_type_pred": curv_type_pred.cpu(),
-                "features": out["features"].cpu(),
             })
             del chunk, out, probs
-            torch.cuda.empty_cache()
         return {k: torch.cat([r[k] for r in all_results], dim=0)
                 for k in all_results[0]}
     def extract_from_volume(self, volume, min_confidence=None):
         """
-        Extract annotations via confidence cascade.
-        volume: (D, H, W) continuous or binary tensor
         """
         conf_thresh = min_confidence or self.config.confidence_threshold
         annotations = []
-        regions_to_process = [(0, 0, 0, 0,
-                               volume.shape[0], volume.shape[1], volume.shape[2])]
         for level, scale in enumerate(self.config.scales):
-            if not regions_to_process:
-                break
-            next_regions = []
             pz, py, px = scale
-            all_patches = []
-            for ridx, (lvl, rz0, ry0, rx0, rz1, ry1, rx1) in enumerate(regions_to_process):
-                if lvl != level:
-                    next_regions.append((lvl, rz0, ry0, rx0, rz1, ry1, rx1))
-                    continue
-                subvol = volume[rz0:rz1, ry0:ry1, rx0:rx1]
-                patches = extract_patches_sliding(subvol, (pz, py, px), self.config.overlap)
-                for patch, (lz, ly, lx) in patches:
-                    binary_patches = binarize_continuous(patch, self.config.binarize_percentiles)
-                    for bp in binary_patches:
-                        occ = bp.mean().item()
-                        if occ < self.config.min_occupancy:
-                            continue
-                        canonical = resize_to_canonical(bp, self.config.canonical_shape)
-                        all_patches.append((
-                            canonical,
-                            (rz0 + lz, ry0 + ly, rx0 + lx),
-                            ridx, scale))
-            if not all_patches:
-                regions_to_process = next_regions
                 continue
-            # Batch classify
-            batch = torch.stack([p[0] for p in all_patches])
-            results = self.classify_patches(batch)
-            for i, (_, loc, ridx, sc) in enumerate(all_patches):
-                conf = results["confidence"][i].item()
-                cls_idx = results["pred_class"][i].item()
-                if conf >= conf_thresh:
-                    ann = GeometricAnnotation(
-                        class_name=CLASS_NAMES[cls_idx],
-                        class_idx=cls_idx,
-                        confidence=conf,
-                        scale_level=level,
-                        location=loc,
-                        patch_size=sc,
-                        dimension=results["dim_pred"][i].item(),
-                        is_curved=bool(results["curved_pred"][i].item()),
-                        curvature_type=CURVATURE_NAMES[results["curv_type_pred"][i].item()],
-                    )
-                    annotations.append(ann)
-                elif level < len(self.config.scales) - 1:
-                    z0, y0, x0 = loc
-                    next_regions.append((
-                        level + 1,
-                        z0, y0, x0,
-                        min(z0 + sc[0], volume.shape[0]),
-                        min(y0 + sc[1], volume.shape[1]),
-                        min(x0 + sc[2], volume.shape[2]),
-                    ))
-            regions_to_process = next_regions
         return annotations
     def extract_from_latent(self, latent, channel_groups=None):
         """
-        Full extraction for a single Flux 2 VAE latent.
         latent: (C, H, W) tensor
         """
-        raw_annotations = self.extract_from_volume(latent)
         deviance_annotations = []
         if channel_groups is not None:
-            deviances = compute_inter_group_deviances(latent, channel_groups)
-            dev_volume = deviance_maps_to_3d(deviances, len(channel_groups))
-            deviance_annotations = self.extract_from_volume(dev_volume)
             for ann in deviance_annotations:
                 pair_idx = ann.location[0]
-                if pair_idx < len(deviances):
-                    ann.channel_group_pair = deviances[pair_idx][0]
         return {
             'raw_annotations': raw_annotations,
@@ -320,6 +372,6 @@ class MultiScaleExtractor:
         }
-print("✓ Cell 4: Extraction pipeline defined (PatchCrossAttention)")
 print(f"  Scales: {ExtractionConfig().scales}")
 print(f"  Canonical: {ExtractionConfig().canonical_shape}")

 """
+Cell 4: Multi-Scale Geometric Extraction Pipeline (Vectorized)
+===============================================================
 Run after Cells 1-3. Uses globals from prior cells.
+Fully vectorized — no Python loops over patches.
+Uses unfold for extraction, batched binarization, batched resize.
 """
 import numpy as np
 class ExtractionConfig:
     canonical_shape: Tuple[int, int, int] = (8, 16, 16)
     scales: List[Tuple[int, int, int]] = field(default_factory=lambda: [
+        (16, 64, 64),   # L0: full latent
+        (8, 32, 32),    # L1: regional
         (8, 16, 16),    # L2: native patch
         (4, 8, 8),      # L3: fine detail
     ])
     min_occupancy: float = 0.005
     binarize_percentiles: List[float] = field(default_factory=lambda: [75, 90, 95])
     n_channel_groups: int = 8
+    max_classify_batch: int = 512
     device: str = 'cuda'
     channel_group_pair: Optional[Tuple[int, int]] = None
+# === Vectorized Extraction ====================================================
+def extract_patches_unfold(volume, patch_size, overlap=0.5):
+    """
+    Extract all patches from volume using unfold. Fully vectorized.
+    volume: (D, H, W)
+    Returns: (patches: (N, pz, py, px), locations: (N, 3))
+    """
     D, H, W = volume.shape
     pz, py, px = patch_size
+    # Pad if needed
+    pad_d = max(pz - D, 0)
+    pad_h = max(py - H, 0)
+    pad_w = max(px - W, 0)
+    if pad_d > 0 or pad_h > 0 or pad_w > 0:
         volume = F.pad(volume, (0, pad_w, 0, pad_h, 0, pad_d))
         D, H, W = volume.shape
+    sz = max(1, int(pz * (1 - overlap)))
+    sy = max(1, int(py * (1 - overlap)))
+    sx = max(1, int(px * (1 - overlap)))
+    # unfold each dim: (D, H, W) → patches
+    # Use as_strided for 3D unfold
+    nz = max(1, (D - pz) // sz + 1)
+    ny = max(1, (H - py) // sy + 1)
+    nx = max(1, (W - px) // sx + 1)
+    # Build index grids
+    z_starts = torch.arange(nz, device=volume.device) * sz
+    y_starts = torch.arange(ny, device=volume.device) * sy
+    x_starts = torch.arange(nx, device=volume.device) * sx
+    # Clamp to valid range
+    z_starts = z_starts.clamp(max=D - pz)
+    y_starts = y_starts.clamp(max=H - py)
+    x_starts = x_starts.clamp(max=W - px)
+    # Meshgrid of all patch origins
+    gz, gy, gx = torch.meshgrid(z_starts, y_starts, x_starts, indexing='ij')
+    locations = torch.stack([gz.flatten(), gy.flatten(), gx.flatten()], dim=1)  # (N, 3)
+    N = locations.shape[0]
+    # Extract using advanced indexing
+    # Build (N, pz, py, px) index tensors
+    oz = torch.arange(pz, device=volume.device)
+    oy = torch.arange(py, device=volume.device)
+    ox = torch.arange(px, device=volume.device)
+    # (N, pz)
+    z_idx = locations[:, 0:1] + oz.unsqueeze(0)  # (N, pz)
+    y_idx = locations[:, 1:2] + oy.unsqueeze(0)  # (N, py)
+    x_idx = locations[:, 2:3] + ox.unsqueeze(0)  # (N, px)
+    # Expand to (N, pz, py, px)
+    z_idx = z_idx[:, :, None, None].expand(N, pz, py, px)
+    y_idx = y_idx[:, None, :, None].expand(N, pz, py, px)
+    x_idx = x_idx[:, None, None, :].expand(N, pz, py, px)
+    patches = volume[z_idx, y_idx, x_idx]  # (N, pz, py, px)
+    return patches, locations
+def binarize_batch(patches, percentiles=[75, 90, 95]):
+    """
+    Binarize N patches at multiple thresholds. Vectorized.
+    patches: (N, pz, py, px)
+    Returns: (N * len(percentiles), pz, py, px), repeat_indices
+    """
+    N = patches.shape[0]
+    flat = patches.reshape(N, -1)
+    abs_flat = flat.abs()
+    results = []
+    for p in percentiles:
+        # Per-patch percentile threshold
+        thresholds = torch.quantile(abs_flat, p / 100.0, dim=1, keepdim=True)  # (N, 1)
+        binary = (abs_flat >= thresholds).float().reshape(patches.shape)
+        results.append(binary)
+    # Stack: (n_thresh, N, pz, py, px) → (N * n_thresh, pz, py, px)
+    stacked = torch.cat(results, dim=0)  # (N*n_thresh, pz, py, px)
+    # Location indices: each original patch repeated n_thresh times
+    repeat_idx = torch.arange(N, device=patches.device).repeat(len(percentiles))
+    return stacked, repeat_idx
+def resize_batch(patches, target=(8, 16, 16)):
+    """
+    Resize batch of 3D patches to canonical. Vectorized.
+    patches: (N, pz, py, px)
+    Returns: (N, tz, ty, tx)
+    """
+    if patches.shape[1:] == target:
+        return patches
+    x = patches.unsqueeze(1)  # (N, 1, pz, py, px)
+    x = F.interpolate(x, size=target, mode='trilinear', align_corners=False)
+    return x.squeeze(1)
+# === Channel Clustering =======================================================
 def cluster_channels(latents, n_groups=8):
     """
     Cluster VAE channels by correlation.
+    latents: (N, C, H, W)
     Returns: (groups, corr_matrix)
     """
     N, C, H, W = latents.shape
 def compute_inter_group_deviances(latent, groups):
     """
+    Compute deviance maps between channel groups. Vectorized.
     latent: (C, H, W), groups: list of channel index lists
+    Returns: (n_pairs, H, W)
     """
+    group_means = torch.stack([latent[grp].mean(dim=0) for grp in groups])  # (G, H, W)
     n = len(groups)
+    # All pairs via broadcasting
+    i_idx, j_idx = torch.triu_indices(n, n, offset=1)
+    deviances = (group_means[i_idx] - group_means[j_idx]).abs()  # (n_pairs, H, W)
+    pair_indices = list(zip(i_idx.tolist(), j_idx.tolist()))
+    return deviances, pair_indices
+# === Extractor ================================================================
 class MultiScaleExtractor:
     """
+    Vectorized multi-scale geometric extractor.
+    No Python loops over individual patches.
     """
     def __init__(self, classifier, config=None):
         self.classifier = classifier
         self.config = config or ExtractionConfig()
         self.classifier.eval()
+        self.device = next(classifier.parameters()).device
     @torch.no_grad()
+    def classify_patches(self, patches):
+        """Classify (N, 8, 16, 16) patches in chunks."""
         N = patches.shape[0]
+        max_b = self.config.max_classify_batch
         all_results = []
+        for start in range(0, N, max_b):
+            chunk = patches[start:start+max_b].to(self.device)
             out = self.classifier(chunk)
             probs = F.softmax(out["class_logits"], dim=-1)
             max_prob, pred_class = probs.max(dim=-1)
             top2 = probs.topk(2, dim=-1).values
             margin = top2[:, 0] - top2[:, 1]
             all_results.append({
                 "pred_class": pred_class.cpu(),
                 "confidence": margin.cpu(),
                 "max_prob": max_prob.cpu(),
+                "dim_pred": out["dim_logits"].argmax(dim=-1).cpu(),
+                "curved_pred": (out["is_curved_pred"].squeeze(-1) > 0.0).cpu(),
+                "curv_type_pred": out["curv_type_logits"].argmax(dim=-1).cpu(),
             })
             del chunk, out, probs
+        if not all_results:
+            return None
         return {k: torch.cat([r[k] for r in all_results], dim=0)
                 for k in all_results[0]}
     def extract_from_volume(self, volume, min_confidence=None):
         """
+        Vectorized extraction over all scales.
+        volume: (D, H, W) tensor on any device
         """
         conf_thresh = min_confidence or self.config.confidence_threshold
+        canonical = self.config.canonical_shape
         annotations = []
+        volume = volume.float().cpu()
         for level, scale in enumerate(self.config.scales):
             pz, py, px = scale
+            D, H, W = volume.shape
+            if D < pz or H < py or W < px:
+                continue
+            # 1. Extract all patches — vectorized
+            patches, locations = extract_patches_unfold(volume, scale, self.config.overlap)
+            # patches: (N, pz, py, px), locations: (N, 3)
+            if patches.shape[0] == 0:
+                continue
+            # 2. Binarize at all thresholds — vectorized
+            binary, repeat_idx = binarize_batch(patches, self.config.binarize_percentiles)
+            # binary: (N*n_thresh, pz, py, px), repeat_idx: (N*n_thresh,)
+            # 3. Filter by occupancy — vectorized
+            occ = binary.reshape(binary.shape[0], -1).mean(dim=1)
+            keep = occ >= self.config.min_occupancy
+            binary = binary[keep]
+            loc_idx = repeat_idx[keep]
+            if binary.shape[0] == 0:
                 continue
+            # 4. Resize to canonical — vectorized
+            canonical_patches = resize_batch(binary, canonical)
+            # 5. Classify in chunks
+            results = self.classify_patches(canonical_patches)
+            if results is None:
+                continue
+            # 6. Filter by confidence and build annotations
+            conf_mask = results["confidence"] >= conf_thresh
+            indices = conf_mask.nonzero(as_tuple=True)[0]
+            for i in indices.tolist():
+                orig_idx = loc_idx[i].item()
+                loc = locations[orig_idx].tolist()
+                ann = GeometricAnnotation(
+                    class_name=CLASS_NAMES[results["pred_class"][i].item()],
+                    class_idx=results["pred_class"][i].item(),
+                    confidence=results["confidence"][i].item(),
+                    scale_level=level,
+                    location=tuple(int(x) for x in loc),
+                    patch_size=scale,
+                    dimension=results["dim_pred"][i].item(),
+                    is_curved=bool(results["curved_pred"][i].item()),
+                    curvature_type=CURVATURE_NAMES[results["curv_type_pred"][i].item()],
+                )
+                annotations.append(ann)
+            del patches, locations, binary, canonical_patches, results
         return annotations
     def extract_from_latent(self, latent, channel_groups=None):
         """
+        Full extraction for one Flux 2 VAE latent.
         latent: (C, H, W) tensor
         """
+        latent_cpu = latent.cpu().float()
+        # Raw volume: treat channels as depth
+        raw_annotations = self.extract_from_volume(latent_cpu)
+        # Deviance volume
         deviance_annotations = []
         if channel_groups is not None:
+            dev_maps, pair_indices = compute_inter_group_deviances(latent_cpu, channel_groups)
+            # dev_maps: (n_pairs, H, W) — treat as (D, H, W)
+            deviance_annotations = self.extract_from_volume(dev_maps)
             for ann in deviance_annotations:
                 pair_idx = ann.location[0]
+                if pair_idx < len(pair_indices):
+                    ann.channel_group_pair = pair_indices[pair_idx]
         return {
             'raw_annotations': raw_annotations,
         }
+print("✓ Cell 4: Vectorized extraction pipeline defined")
 print(f"  Scales: {ExtractionConfig().scales}")
 print(f"  Canonical: {ExtractionConfig().canonical_shape}")