Upload 4 files

Browse files

Files changed (1) hide show

spectral/notebooks/experiment_4_invertible_transforms.ipynb +29 -26

spectral/notebooks/experiment_4_invertible_transforms.ipynb CHANGED Viewed

@@ -2098,45 +2098,48 @@
    "source": [
     "# @title Experiment 4.5 \u2014 Procrustes Alignment\n",
     "class ProcrustessFrontEnd(nn.Module):\n",
-    "    \"\"\"Align image patches to reference templates, use alignment residuals as features.\"\"\"\n",
     "    def __init__(self, n_templates=8, patch_size=4, input_size=32):\n",
     "        super().__init__()\n",
     "        self.patch_size = patch_size\n",
     "        self.n_templates = n_templates\n",
     "        n_patches = (input_size // patch_size) ** 2\n",
     "        self.n_patches = n_patches\n",
-    "        patch_dim = 3 * patch_size * patch_size\n",
-    "        # Fixed reference templates (random orthogonal patches)\n",
-    "        templates = F.normalize(torch.randn(n_templates, n_patches, patch_dim), dim=-1)\n",
     "        self.register_buffer('templates', templates)\n",
-    "        # Features per template: alignment quality (1) + top singular values (min(3,D)) + rotation trace (1)\n",
-    "        self.output_dim = n_templates * (2 + min(3, patch_dim))\n",
-    "        print(f\"[PROCRUSTES] {n_templates} templates, dim={self.output_dim}\")\n",
     "\n",
     "    @torch.amp.custom_fwd(device_type='cuda', cast_inputs=torch.float32)\n",
     "    def forward(self, x):\n",
     "        B, C, H, W = x.shape\n",
     "        ps = self.patch_size\n",
-    "        patches = x.unfold(2, ps, ps).unfold(3, ps, ps)\n",
-    "        patches = patches.contiguous().reshape(B, self.n_patches, -1)\n",
     "        patches_n = F.normalize(patches, dim=-1)\n",
-    "\n",
-    "        results = []\n",
-    "        for t in range(self.n_templates):\n",
-    "            template = self.templates[t]  # (n_patches, patch_dim)\n",
-    "            # Cross-covariance M: (B, D, D) where D = patch_dim\n",
-    "            M = torch.bmm(patches_n.transpose(1, 2),\n",
-    "                          template.unsqueeze(0).expand(B, -1, -1))\n",
-    "            # Direct SVD of cross-covariance: M = U S Vh\n",
-    "            U, S, Vh = torch.linalg.svd(M, full_matrices=False)\n",
-    "            # Optimal Procrustes rotation R = U Vh (= U V^T)\n",
-    "            R_opt = torch.bmm(U, Vh)  # (B, D, D)\n",
-    "            # Features: alignment quality + top singular values + rotation trace\n",
-    "            align_quality = S.sum(dim=-1, keepdim=True)\n",
-    "            top_s = S[:, :min(3, S.shape[1])]\n",
-    "            rot_trace = R_opt.diagonal(dim1=-2, dim2=-1).sum(-1, keepdim=True)  # det proxy\n",
-    "            results.append(torch.cat([align_quality, top_s, rot_trace], dim=-1))\n",
-    "        return torch.cat(results, dim=-1)\n",
     "\n",
     "front = ProcrustessFrontEnd(n_templates=8, patch_size=4).to(device)\n",
     "model_4_5 = SpectralGeoLIPEncoder(\n",

    "source": [
     "# @title Experiment 4.5 \u2014 Procrustes Alignment\n",
     "class ProcrustessFrontEnd(nn.Module):\n",
+    "    \"\"\"Per-patch Procrustes alignment in color space \u2014 SO(3) rotations.\n",
+    "    Cross-covariance of patch pixel colors vs template pixel colors gives\n",
+    "    a 3\u00d73 matrix. SVD of this 3\u00d73 via fused Triton kernel. R = U Vh is\n",
+    "    the optimal color rotation. All templates batched in one kernel call.\"\"\"\n",
     "    def __init__(self, n_templates=8, patch_size=4, input_size=32):\n",
     "        super().__init__()\n",
     "        self.patch_size = patch_size\n",
     "        self.n_templates = n_templates\n",
     "        n_patches = (input_size // patch_size) ** 2\n",
     "        self.n_patches = n_patches\n",
+    "        ps2 = patch_size * patch_size\n",
+    "        N = n_patches * ps2  # total pixels per image\n",
+    "        # Templates: (T, N, 3) \u2014 unit-norm color directions per pixel\n",
+    "        templates = F.normalize(torch.randn(n_templates, N, 3), dim=-1)\n",
     "        self.register_buffer('templates', templates)\n",
+    "        # Per template: align_quality(1) + S(3) + rot_trace(1) = 5\n",
+    "        self.output_dim = n_templates * 5\n",
+    "        backend = \"Triton SVD3\" if _HAS_TRITON_SVD3 else \"torch.linalg.svd\"\n",
+    "        print(f\"[PROCRUSTES] {n_templates} templates, color-space SO(3), dim={self.output_dim} ({backend})\")\n",
     "\n",
     "    @torch.amp.custom_fwd(device_type='cuda', cast_inputs=torch.float32)\n",
     "    def forward(self, x):\n",
     "        B, C, H, W = x.shape\n",
+    "        T = self.n_templates\n",
     "        ps = self.patch_size\n",
+    "        # Reshape to (B, N, 3) \u2014 pixels with 3 color channels\n",
+    "        patches = x.unfold(2, ps, ps).unfold(3, ps, ps)      # (B, C, nh, nw, ps, ps)\n",
+    "        patches = patches.permute(0, 2, 3, 4, 5, 1).contiguous()  # (B, nh, nw, ps, ps, C)\n",
+    "        patches = patches.reshape(B, -1, C)                   # (B, N, 3)\n",
     "        patches_n = F.normalize(patches, dim=-1)\n",
+    "        # Expand for all templates \u2014 one batched call, no Python loop\n",
+    "        patches_exp = patches_n.unsqueeze(1).expand(B, T, -1, -1).reshape(B * T, -1, 3)\n",
+    "        templates_exp = self.templates.unsqueeze(0).expand(B, -1, -1, -1).reshape(B * T, -1, 3)\n",
+    "        # Cross-covariance in color space: (B*T, 3, N) @ (B*T, N, 3) = (B*T, 3, 3)\n",
+    "        M = torch.bmm(patches_exp.transpose(1, 2), templates_exp)\n",
+    "        # SVD of 3\u00d73 via Triton kernel (or fallback)\n",
+    "        U, S, Vh = batched_svd3(M)  # M is (B*T, 3, 3)\n",
+    "        R_opt = torch.bmm(U, Vh)    # optimal SO(3) color rotation\n",
+    "        align_quality = S.sum(dim=-1, keepdim=True)\n",
+    "        rot_trace = R_opt.diagonal(dim1=-2, dim2=-1).sum(-1, keepdim=True)\n",
+    "        feats = torch.cat([align_quality, S, rot_trace], dim=-1)  # (B*T, 5)\n",
+    "        return feats.reshape(B, T * 5)\n",
     "\n",
     "front = ProcrustessFrontEnd(n_templates=8, patch_size=4).to(device)\n",
     "model_4_5 = SpectralGeoLIPEncoder(\n",