RameshArvind
/

iterative-sat

Model card Files Files and versions

xet

Community

RameshArvind commited on Mar 15

Commit

a8ccddf

verified ·

1 Parent(s): 0caf6af

Upload iterative_sat.py with huggingface_hub

Browse files

Files changed (1) hide show

iterative_sat.py +44 -48

iterative_sat.py CHANGED Viewed

@@ -112,26 +112,33 @@ class TransformerBlock(nn.Module):
 # ---------------------------------------------------------------------------
 class IterativeSATModel(nn.Module):
     def __init__(self, config: SATConfig):
         super().__init__()
         self.config = config
         d = config.d_model
         N = config.max_vars
         S = config.n_scratch
-        total_pos = N + S  # variable positions + scratchpad positions
-        # Input: clause membership (max_clauses) + polarity (max_clauses) = 2 * max_clauses
         self.input_proj = nn.Linear(2 * config.max_clauses, d, bias=False)
-        # (hidden carry handled via pred_proj — no separate gate needed)
-        # Buffer 2: Scratchpad / register tokens (learned initial embeddings)
         if S > 0:
             self.scratch_embeds = nn.Parameter(torch.randn(S, d) * 0.02)
-        # Buffer 3: Rich feedback — previous hidden projected back (not just 2 scalars)
-        self.pred_proj = nn.Linear(d + 2, d, bias=False)  # prev_hidden(d) + assign(1) + violation(1)
         # Shared transformer
         self.layers = nn.ModuleList([
             TransformerBlock(d, config.n_heads, config.d_ff, config.dropout)
@@ -139,7 +146,7 @@ class IterativeSATModel(nn.Module):
         ])
         self.final_norm = nn.RMSNorm(d)
-        # Output: assignment logit per variable (T/F) — only for variable positions, not scratch
         self.assign_head = nn.Linear(d, 1, bias=False)
         cos, sin = build_rope_cache(total_pos, d // config.n_heads, config.rope_base)
@@ -147,64 +154,53 @@ class IterativeSATModel(nn.Module):
         self.register_buffer("rope_sin", sin)
     def forward(self, clause_mask, clause_sign, n_vars_batch=None, n_iters=None):
-        """
-        clause_mask: (B, max_vars, max_clauses) — 1 if variable appears in clause
-        clause_sign: (B, max_vars, max_clauses) — polarity (+1/-1, 0=not present)
-        Returns: list of assignment logits (B, max_vars), one per iteration
-        """
         if n_iters is None:
             n_iters = self.config.train_iters
         B = clause_mask.shape[0]
         N = self.config.max_vars
         S = self.config.n_scratch
-        d = self.config.d_model
         device = clause_mask.device
-        # Encode variable features
-        features = torch.cat([clause_mask, clause_sign], dim=-1)  # (B, N, 2*max_clauses)
-        h_vars = self.input_proj(features)  # (B, N, d)
-        # Append scratchpad tokens
         if S > 0:
-            h_scratch = self.scratch_embeds.unsqueeze(0).expand(B, -1, -1)  # (B, S, d)
-            h = torch.cat([h_vars, h_scratch], dim=1)  # (B, N+S, d)
-        else:
-            h = h_vars
         all_logits = []
-        assign_prob = torch.full((B, N), 0.5, device=device)
-        prev_hidden = torch.zeros(B, N + S, d, device=device)  # buffer 1: carried hidden state
         for _ in range(n_iters):
-            # Buffer 3: rich feedback — full prev hidden + scalar signals
-            violation = self._compute_violations(assign_prob, clause_mask, clause_sign)  # (B, N)
-            # Pad violation/assign for scratch positions
-            assign_padded = F.pad(assign_prob, (0, S))  # (B, N+S)
-            violation_padded = F.pad(violation, (0, S))  # (B, N+S)
-            feedback = torch.cat([
-                prev_hidden,                              # (B, N+S, d) — full hidden buffer
-                assign_padded.unsqueeze(-1),               # (B, N+S, 1)
-                violation_padded.unsqueeze(-1),             # (B, N+S, 1)
-            ], dim=-1)  # (B, N+S, d+2)
-            h_input = h + self.pred_proj(feedback)
-            # Transformer step (all positions: variables + scratchpad)
-            x = h_input
-            for layer in self.layers:
-                x = layer(x, self.rope_cos, self.rope_sin)
-            x = self.final_norm(x)
-            # Carry full hidden state to next iteration (with gradients)
-            prev_hidden = x
-            # Output: only variable positions predict assignments (not scratchpad)
-            var_out = x[:, :N, :]  # (B, N, d)
-            logits = self.assign_head(var_out).squeeze(-1)  # (B, N)
             all_logits.append(logits)
-            assign_prob = torch.sigmoid(logits)  # gradients flow through
         return all_logits

 # ---------------------------------------------------------------------------
 class IterativeSATModel(nn.Module):
+    """Sotaku-style iterative SAT solver.
+    Key design (matching sotaku):
+      - h_prev carries the full hidden state directly (residual across iterations)
+      - pred_proj adds a small correction from detached predictions (not the hidden state)
+      - Scratchpad tokens provide extra working memory positions
+      - Gradients flow through h_prev, predictions are detached
+    """
     def __init__(self, config: SATConfig):
         super().__init__()
         self.config = config
         d = config.d_model
         N = config.max_vars
         S = config.n_scratch
+        total_pos = N + S
+        # Input encoder: clause structure → initial hidden state (one-time)
         self.input_proj = nn.Linear(2 * config.max_clauses, d, bias=False)
+        # Prediction feedback: small correction from detached predictions
+        # assign(1) + violation(1) → d_model (like sotaku's pred_proj on softmax preds)
+        self.pred_proj = nn.Linear(2, d, bias=False)
+        # Scratchpad tokens (extra working memory)
         if S > 0:
             self.scratch_embeds = nn.Parameter(torch.randn(S, d) * 0.02)
         # Shared transformer
         self.layers = nn.ModuleList([
             TransformerBlock(d, config.n_heads, config.d_ff, config.dropout)
         ])
         self.final_norm = nn.RMSNorm(d)
+        # Output head (variable positions only)
         self.assign_head = nn.Linear(d, 1, bias=False)
         cos, sin = build_rope_cache(total_pos, d // config.n_heads, config.rope_base)
         self.register_buffer("rope_sin", sin)
     def forward(self, clause_mask, clause_sign, n_vars_batch=None, n_iters=None):
         if n_iters is None:
             n_iters = self.config.train_iters
         B = clause_mask.shape[0]
         N = self.config.max_vars
         S = self.config.n_scratch
         device = clause_mask.device
+        # One-time encoding (re-added every iteration to prevent forgetting)
+        features = torch.cat([clause_mask, clause_sign], dim=-1)
+        h_init = self.input_proj(features)  # (B, N, d)
+        # Append scratchpad
         if S > 0:
+            h_scratch = self.scratch_embeds.unsqueeze(0).expand(B, -1, -1)
+            h_init = torch.cat([h_init, h_scratch], dim=1)  # (B, N+S, d)
+        h_prev = h_init  # first iteration starts from input encoding
         all_logits = []
+        # Initial predictions: uniform
+        preds = torch.zeros(B, N + S, 2, device=device)
+        preds[:, :N, 0] = 0.5
+        # violation starts at 0
         for _ in range(n_iters):
+            # Clean carry + fresh input + prediction correction
+            h = h_prev + h_init + self.pred_proj(preds)
+            # Shared transformer
+            for layer in self.layers:
+                h = layer(h, self.rope_cos, self.rope_sin)
+            h = self.final_norm(h)
+            # h becomes h_prev for next iteration (direct carry, with gradients)
+            h_prev = h
+            # Predict assignments from variable positions only
+            logits = self.assign_head(h[:, :N, :]).squeeze(-1)  # (B, N)
             all_logits.append(logits)
+            # Build detached prediction feedback for next iteration
+            assign_prob = torch.sigmoid(logits).detach()
+            violation = self._compute_violations(assign_prob, clause_mask, clause_sign)
+            preds = torch.zeros(B, N + S, 2, device=device)
+            preds[:, :N, 0] = assign_prob
+            preds[:, :N, 1] = violation
         return all_logits