Initial commit: threshold circuit pruning framework

Browse files

7 pruning methods: magnitude, batched, zero, quantize, evolutionary, annealing, pareto

Files changed (2) hide show

README.md +59 -0
prune.py +1162 -0

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+# Threshold Pruner
+Multi-method pruning framework for threshold logic circuits.
+## Methods
+| Method | Flag | Description |
+|--------|------|-------------|
+| Magnitude Reduction | `mag` | Reduce weights by 1 toward zero |
+| Batched Magnitude | `batched` | GPU-parallel magnitude reduction |
+| Zero Pruning | `zero` | Set weights directly to 0 |
+| Quantization | `quant` | Force weights to {-1, 0, 1} |
+| Evolutionary | `evo` | Mutation + selection with parsimony |
+| Simulated Annealing | `anneal` | Gradual cooling search |
+| Pareto Search | `pareto` | Correctness vs size tradeoff |
+## Usage
+```bash
+# List available circuits
+python prune.py --list
+# Prune a circuit with all methods
+python prune.py threshold-hamming74decoder
+# Specific methods only
+python prune.py threshold-hamming74decoder --methods mag,zero,evo
+# Batch process
+python prune.py --all --max-inputs 8
+# Save best result
+python prune.py threshold-hamming74decoder --save
+```
+## Requirements
+```
+torch
+safetensors
+```
+## Circuit Format
+Each circuit needs:
+```
+threshold-{name}/
+├── model.safetensors   # Weights: {layer.weight: [...], layer.bias: [...]}
+├── model.py            # Forward function
+├── config.json         # {inputs, outputs, neurons, layers, parameters}
+```
+## Related
+- [Threshold Logic Circuits Collection](https://huggingface.co/collections/phanerozoic/threshold-logic-circuits-6972546b096a4384dd9f34ad)
+## License
+MIT

prune.py ADDED Viewed

	@@ -0,0 +1,1162 @@

+"""
+Unified Threshold Circuit Pruning Framework
+============================================
+All pruning methods for threshold logic circuits in a single file.
+Methods:
+    1. Magnitude Reduction (sequential & batched GPU)
+    2. Zero Pruning (sparsification)
+    3. Weight Quantization (force to {-1,0,1})
+    4. Evolutionary Search (mutation + selection)
+    5. Simulated Annealing (gradual cooling)
+    6. Pareto Frontier (correctness vs size tradeoff)
+Usage:
+    python prune.py threshold-hamming74decoder
+    python prune.py threshold-hamming74decoder --methods magnitude,zero,evo
+    python prune.py --list
+    python prune.py --all --max-inputs 8
+Author: Pruning framework for phanerozoic/threshold-logic-circuits
+"""
+import torch
+import torch.jit
+import json
+import time
+import random
+import argparse
+import importlib.util
+import sys
+from pathlib import Path
+from dataclasses import dataclass, field
+from typing import Dict, List, Tuple, Optional, Callable, Set
+from enum import Enum, auto
+from datetime import datetime
+from safetensors.torch import load_file, save_file
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+CIRCUITS_PATH = Path('D:/threshold-circuits')
+RESULTS_PATH = CIRCUITS_PATH / 'pruned_results'
+@dataclass
+class Config:
+    """Global configuration for pruning."""
+    device: str = 'cuda'
+    fitness_threshold: float = 0.9999
+    batch_size: int = 80000
+    verbose: bool = True
+    # Method toggles
+    run_magnitude: bool = True
+    run_batched_magnitude: bool = True
+    run_zero: bool = True
+    run_quantize: bool = True
+    run_evolutionary: bool = True
+    run_annealing: bool = True
+    run_pareto: bool = True
+    # Method-specific
+    magnitude_passes: int = 100
+    evo_generations: int = 1000
+    evo_pop_size: int = 200
+    evo_mutation_rate: float = 0.1
+    evo_parsimony: float = 0.001
+    annealing_iterations: int = 10000
+    annealing_initial_temp: float = 10.0
+    annealing_cooling: float = 0.995
+    quantize_targets: List[float] = field(default_factory=lambda: [-1.0, 0.0, 1.0])
+    pareto_levels: List[float] = field(default_factory=lambda: [1.0, 0.99, 0.95, 0.90])
+# =============================================================================
+# CIRCUIT LOADING
+# =============================================================================
+@dataclass
+class CircuitSpec:
+    """Metadata for a threshold circuit."""
+    name: str
+    path: Path
+    inputs: int
+    outputs: int
+    neurons: int
+    layers: int
+    parameters: int
+    description: str = ""
+class Circuit:
+    """Threshold logic circuit loaded from safetensors."""
+    def __init__(self, path: Path, device: str = 'cuda'):
+        self.path = Path(path)
+        self.device = device
+        self.spec = self._load_spec()
+        self.weights = self._load_weights()
+    def _load_spec(self) -> CircuitSpec:
+        with open(self.path / 'config.json') as f:
+            cfg = json.load(f)
+        return CircuitSpec(
+            name=cfg['name'],
+            path=self.path,
+            inputs=cfg['inputs'],
+            outputs=cfg['outputs'],
+            neurons=cfg['neurons'],
+            layers=cfg['layers'],
+            parameters=cfg['parameters'],
+            description=cfg.get('description', '')
+        )
+    def _load_weights(self) -> Dict[str, torch.Tensor]:
+        w = load_file(str(self.path / 'model.safetensors'))
+        return {k: v.float().to(self.device) for k, v in w.items()}
+    def clone(self) -> Dict[str, torch.Tensor]:
+        return {k: v.clone() for k, v in self.weights.items()}
+    def stats(self, weights: Dict[str, torch.Tensor] = None) -> Dict:
+        w = weights or self.weights
+        total = sum(t.numel() for t in w.values())
+        nonzero = sum((t != 0).sum().item() for t in w.values())
+        mag = sum(t.abs().sum().item() for t in w.values())
+        maxw = max(t.abs().max().item() for t in w.values())
+        unique = set()
+        for t in w.values():
+            unique.update(t.flatten().tolist())
+        return {
+            'total': total,
+            'nonzero': nonzero,
+            'zeros': total - nonzero,
+            'sparsity': 1 - nonzero/total if total else 0,
+            'magnitude': mag,
+            'max_weight': maxw,
+            'unique_count': len(unique),
+            'unique_values': sorted(unique)
+        }
+    def save(self, weights: Dict[str, torch.Tensor], suffix: str = 'pruned'):
+        path = self.path / f'model_{suffix}.safetensors'
+        cpu_w = {k: v.cpu() for k, v in weights.items()}
+        save_file(cpu_w, str(path))
+        return path
+def discover_circuits(base: Path = CIRCUITS_PATH) -> List[CircuitSpec]:
+    """Find all circuits in the collection."""
+    circuits = []
+    for d in base.iterdir():
+        if d.is_dir() and (d / 'config.json').exists() and (d / 'model.safetensors').exists():
+            try:
+                c = Circuit(d, device='cpu')
+                circuits.append(c.spec)
+            except Exception as e:
+                print(f"Skip {d.name}: {e}")
+    return sorted(circuits, key=lambda x: (x.inputs, x.neurons))
+def load_circuit(name: str, device: str = 'cuda') -> Circuit:
+    """Load circuit by name."""
+    path = CIRCUITS_PATH / name
+    if not path.exists():
+        path = CIRCUITS_PATH / f'threshold-{name}'
+    if not path.exists():
+        raise ValueError(f"Circuit not found: {name}")
+    return Circuit(path, device)
+# =============================================================================
+# GPU UTILITIES
+# =============================================================================
+def gpu_memory() -> Dict:
+    if torch.cuda.is_available():
+        return {
+            'allocated': torch.cuda.memory_allocated() / 1e9,
+            'reserved': torch.cuda.memory_reserved() / 1e9,
+            'total': torch.cuda.get_device_properties(0).total_memory / 1e9
+        }
+    return {'allocated': 0, 'reserved': 0, 'total': 0}
+def create_population(weights: Dict[str, torch.Tensor],
+                      pop_size: int, device: str) -> Dict[str, torch.Tensor]:
+    """Replicate weights for batched evaluation."""
+    return {
+        k: v.unsqueeze(0).expand(pop_size, *v.shape).clone().to(device)
+        for k, v in weights.items()
+    }
+# =============================================================================
+# GENERIC EVALUATOR
+# =============================================================================
+class Evaluator:
+    """
+    Generic evaluator for any threshold circuit.
+    Builds truth table and tests exhaustively.
+    """
+    def __init__(self, circuit: Circuit, forward_fn: Callable):
+        self.circuit = circuit
+        self.forward_fn = forward_fn
+        self.device = circuit.device
+        self.n_inputs = circuit.spec.inputs
+        self.n_cases = 2 ** self.n_inputs
+        self._build_inputs()
+        self._build_expected()
+    def _build_inputs(self):
+        """Generate all 2^n input combinations."""
+        if self.n_inputs > 20:
+            raise ValueError(f"Input space too large: 2^{self.n_inputs}")
+        idx = torch.arange(self.n_cases, device=self.device, dtype=torch.long)
+        bits = torch.arange(self.n_inputs, device=self.device, dtype=torch.long)
+        self.inputs = ((idx.unsqueeze(1) >> bits) & 1).float()
+    def _build_expected(self):
+        """Compute expected outputs using original weights."""
+        self.expected = self.forward_fn(self.inputs, self.circuit.weights)
+    def evaluate(self, weights: Dict[str, torch.Tensor]) -> float:
+        """Single evaluation: returns fitness 0.0-1.0"""
+        outputs = self.forward_fn(self.inputs, weights)
+        correct = (outputs == self.expected).all(dim=-1).float().sum()
+        return (correct / self.n_cases).item()
+    def evaluate_batch(self, population: Dict[str, torch.Tensor]) -> torch.Tensor:
+        """Batch evaluation: returns [pop_size] fitness tensor"""
+        pop_size = next(iter(population.values())).shape[0]
+        fitness = torch.zeros(pop_size, device=self.device)
+        for i in range(pop_size):
+            w = {k: v[i] for k, v in population.items()}
+            outputs = self.forward_fn(self.inputs, w)
+            correct = (outputs == self.expected).all(dim=-1).float().sum()
+            fitness[i] = correct / self.n_cases
+        return fitness
+# =============================================================================
+# PRUNING METHODS
+# =============================================================================
+@dataclass
+class PruneResult:
+    """Result from a pruning method."""
+    method: str
+    original_stats: Dict
+    final_stats: Dict
+    final_weights: Dict[str, torch.Tensor]
+    fitness: float
+    reductions: int
+    time_seconds: float
+    history: List[Dict] = field(default_factory=list)
+def get_candidates(weights: Dict[str, torch.Tensor]) -> List[Tuple[str, int, tuple, float]]:
+    """Get all non-zero weight positions."""
+    candidates = []
+    for name, tensor in weights.items():
+        flat = tensor.flatten()
+        for i in range(len(flat)):
+            val = flat[i].item()
+            if val != 0:
+                candidates.append((name, i, tensor.shape, val))
+    return candidates
+def apply_reduction(weights: Dict[str, torch.Tensor],
+                    name: str, idx: int, shape: tuple, old_val: float):
+    """Apply magnitude reduction: move weight 1 step toward zero."""
+    new_val = old_val - 1 if old_val > 0 else old_val + 1
+    flat = weights[name].flatten()
+    flat[idx] = new_val
+    weights[name] = flat.view(shape)
+def revert_reduction(weights: Dict[str, torch.Tensor],
+                     name: str, idx: int, shape: tuple, old_val: float):
+    """Revert a reduction."""
+    flat = weights[name].flatten()
+    flat[idx] = old_val
+    weights[name] = flat.view(shape)
+# -----------------------------------------------------------------------------
+# Method 1: Sequential Magnitude Reduction
+# -----------------------------------------------------------------------------
+def prune_magnitude(weights: Dict[str, torch.Tensor],
+                    eval_fn: Callable[[Dict], float],
+                    cfg: Config) -> PruneResult:
+    """Reduce weight magnitudes one at a time."""
+    start = time.perf_counter()
+    weights = {k: v.clone() for k, v in weights.items()}
+    original = _stats(weights)
+    reductions = 0
+    history = []
+    if cfg.verbose:
+        print(f"    Starting magnitude reduction...")
+        print(f"    Original: mag={original['magnitude']:.0f}, nonzero={original['nonzero']}")
+    for pass_num in range(cfg.magnitude_passes):
+        candidates = get_candidates(weights)
+        if not candidates:
+            if cfg.verbose:
+                print(f"    No candidates remaining at pass {pass_num}")
+            break
+        if cfg.verbose:
+            print(f"    Pass {pass_num}: testing {len(candidates)} candidates...")
+        pass_reductions = 0
+        tested = 0
+        for name, idx, shape, old_val in candidates:
+            apply_reduction(weights, name, idx, shape, old_val)
+            tested += 1
+            fitness = eval_fn(weights)
+            if fitness >= cfg.fitness_threshold:
+                pass_reductions += 1
+                reductions += 1
+                if cfg.verbose:
+                    new_val = old_val - 1 if old_val > 0 else old_val + 1
+                    print(f"      ✓ {name}[{idx}]: {old_val} -> {new_val}")
+            else:
+                revert_reduction(weights, name, idx, shape, old_val)
+            if cfg.verbose and tested % 50 == 0:
+                s = _stats(weights)
+                print(f"      Progress: {tested}/{len(candidates)}, reductions={pass_reductions}, mag={s['magnitude']:.0f}")
+        history.append({'pass': pass_num, 'reductions': pass_reductions})
+        s = _stats(weights)
+        if cfg.verbose:
+            print(f"    Pass {pass_num} complete: +{pass_reductions} reductions, mag={s['magnitude']:.0f}, nonzero={s['nonzero']}")
+        if pass_reductions == 0:
+            if cfg.verbose:
+                print(f"    No progress at pass {pass_num}, stopping.")
+            break
+    return PruneResult(
+        method='magnitude',
+        original_stats=original,
+        final_stats=_stats(weights),
+        final_weights=weights,
+        fitness=eval_fn(weights),
+        reductions=reductions,
+        time_seconds=time.perf_counter() - start,
+        history=history
+    )
+# -----------------------------------------------------------------------------
+# Method 2: Batched GPU Magnitude Reduction
+# -----------------------------------------------------------------------------
+def prune_magnitude_batched(weights: Dict[str, torch.Tensor],
+                            eval_fn: Callable[[Dict], float],
+                            batch_eval_fn: Callable[[Dict], torch.Tensor],
+                            cfg: Config) -> PruneResult:
+    """GPU-batched magnitude reduction."""
+    start = time.perf_counter()
+    weights = {k: v.clone() for k, v in weights.items()}
+    original = _stats(weights)
+    device = cfg.device
+    reductions = 0
+    history = []
+    for pass_num in range(cfg.magnitude_passes):
+        candidates = get_candidates(weights)
+        if not candidates:
+            break
+        # Phase 1: Batch test all candidates
+        successful = []
+        n = len(candidates)
+        for batch_start in range(0, n, cfg.batch_size):
+            batch = candidates[batch_start:batch_start + cfg.batch_size]
+            batch_len = len(batch)
+            pop = {name: tensor.unsqueeze(0).expand(batch_len, *tensor.shape).clone().to(device)
+                   for name, tensor in weights.items()}
+            for pop_idx, (name, flat_idx, shape, old_val) in enumerate(batch):
+                new_val = old_val - 1 if old_val > 0 else old_val + 1
+                flat_view = pop[name][pop_idx].flatten()
+                flat_view[flat_idx] = new_val
+            fitness = batch_eval_fn(pop)
+            for pop_idx, cand in enumerate(batch):
+                if fitness[pop_idx].item() >= cfg.fitness_threshold:
+                    successful.append(cand)
+        # Phase 2: Apply with conflict resolution
+        pass_reductions = 0
+        for name, idx, shape, old_val in successful:
+            current_val = weights[name].flatten()[idx].item()
+            if current_val == old_val:
+                apply_reduction(weights, name, idx, shape, old_val)
+                if eval_fn(weights) >= cfg.fitness_threshold:
+                    pass_reductions += 1
+                    reductions += 1
+                else:
+                    revert_reduction(weights, name, idx, shape, old_val)
+        history.append({'pass': pass_num, 'reductions': pass_reductions, 'candidates': len(successful)})
+        if cfg.verbose:
+            s = _stats(weights)
+            print(f"    Pass {pass_num}: {pass_reductions}/{len(successful)} applied, mag={s['magnitude']:.0f}")
+        if pass_reductions == 0:
+            break
+    return PruneResult(
+        method='batched_magnitude',
+        original_stats=original,
+        final_stats=_stats(weights),
+        final_weights=weights,
+        fitness=eval_fn(weights),
+        reductions=reductions,
+        time_seconds=time.perf_counter() - start,
+        history=history
+    )
+# -----------------------------------------------------------------------------
+# Method 3: Zero Pruning
+# -----------------------------------------------------------------------------
+def prune_zero(weights: Dict[str, torch.Tensor],
+               eval_fn: Callable[[Dict], float],
+               cfg: Config) -> PruneResult:
+    """Try setting weights directly to zero."""
+    start = time.perf_counter()
+    weights = {k: v.clone() for k, v in weights.items()}
+    original = _stats(weights)
+    candidates = get_candidates(weights)
+    random.shuffle(candidates)
+    if cfg.verbose:
+        print(f"    Starting zero pruning...")
+        print(f"    Original: mag={original['magnitude']:.0f}, nonzero={original['nonzero']}")
+        print(f"    Testing {len(candidates)} candidates (random order)...")
+    reductions = 0
+    tested = 0
+    for name, idx, shape, old_val in candidates:
+        flat = weights[name].flatten()
+        flat[idx] = 0
+        weights[name] = flat.view(shape)
+        tested += 1
+        if eval_fn(weights) >= cfg.fitness_threshold:
+            reductions += 1
+            if cfg.verbose:
+                print(f"      ✓ {name}[{idx}]: {old_val} -> 0 (zeroed)")
+        else:
+            flat = weights[name].flatten()
+            flat[idx] = old_val
+            weights[name] = flat.view(shape)
+        if cfg.verbose and tested % 50 == 0:
+            s = _stats(weights)
+            print(f"      Progress: {tested}/{len(candidates)}, zeroed={reductions}, mag={s['magnitude']:.0f}")
+    if cfg.verbose:
+        s = _stats(weights)
+        print(f"    Zero pruning complete: {reductions} weights zeroed")
+        print(f"    Final: mag={s['magnitude']:.0f}, nonzero={s['nonzero']}")
+    return PruneResult(
+        method='zero',
+        original_stats=original,
+        final_stats=_stats(weights),
+        final_weights=weights,
+        fitness=eval_fn(weights),
+        reductions=reductions,
+        time_seconds=time.perf_counter() - start
+    )
+# -----------------------------------------------------------------------------
+# Method 4: Quantization
+# -----------------------------------------------------------------------------
+def prune_quantize(weights: Dict[str, torch.Tensor],
+                   eval_fn: Callable[[Dict], float],
+                   cfg: Config) -> PruneResult:
+    """Force weights to target set (default: {-1,0,1})."""
+    start = time.perf_counter()
+    weights = {k: v.clone() for k, v in weights.items()}
+    original = _stats(weights)
+    target = torch.tensor(cfg.quantize_targets, device=weights[next(iter(weights))].device)
+    target_set = set(cfg.quantize_targets)
+    if cfg.verbose:
+        print(f"    Starting quantization...")
+        print(f"    Target values: {sorted(cfg.quantize_targets)}")
+        print(f"    Original unique values: {original.get('unique_count', len(set(v.item() for t in weights.values() for v in t.flatten())))}")
+        print(f"    Original: mag={original['magnitude']:.0f}, nonzero={original['nonzero']}")
+    # Count how many need quantizing
+    needs_quant = sum(1 for t in weights.values() for v in t.flatten() if v.item() not in target_set)
+    if cfg.verbose:
+        print(f"    Weights needing quantization: {needs_quant}")
+    reductions = 0
+    tested = 0
+    for name, tensor in list(weights.items()):
+        flat = tensor.flatten()
+        for i in range(len(flat)):
+            old_val = flat[i].item()
+            if old_val not in target_set:
+                distances = (target - old_val).abs()
+                closest = target[distances.argmin()].item()
+                flat[i] = closest
+                weights[name] = flat.view(tensor.shape)
+                tested += 1
+                if eval_fn(weights) >= cfg.fitness_threshold:
+                    reductions += 1
+                    if cfg.verbose:
+                        print(f"      ✓ {name}[{i}]: {old_val} -> {closest}")
+                else:
+                    flat[i] = old_val
+                    weights[name] = flat.view(tensor.shape)
+                if cfg.verbose and tested % 20 == 0:
+                    print(f"      Progress: {tested}/{needs_quant}, quantized={reductions}")
+    if cfg.verbose:
+        s = _stats(weights)
+        unique_now = len(set(v.item() for t in weights.values() for v in t.flatten()))
+        print(f"    Quantization complete: {reductions}/{tested} quantized")
+        print(f"    Final unique values: {unique_now}")
+        print(f"    Final: mag={s['magnitude']:.0f}, nonzero={s['nonzero']}")
+    return PruneResult(
+        method='quantize',
+        original_stats=original,
+        final_stats=_stats(weights),
+        final_weights=weights,
+        fitness=eval_fn(weights),
+        reductions=reductions,
+        time_seconds=time.perf_counter() - start
+    )
+# -----------------------------------------------------------------------------
+# Method 5: Evolutionary Search
+# -----------------------------------------------------------------------------
+def prune_evolutionary(weights: Dict[str, torch.Tensor],
+                       batch_eval_fn: Callable[[Dict], torch.Tensor],
+                       cfg: Config) -> PruneResult:
+    """Evolutionary search with parsimony pressure."""
+    start = time.perf_counter()
+    original = _stats(weights)
+    device = cfg.device
+    pop_size = cfg.evo_pop_size
+    if cfg.verbose:
+        print(f"    Starting evolutionary search...")
+        print(f"    Population: {pop_size}, Generations: {cfg.evo_generations}")
+        print(f"    Mutation rate: {cfg.evo_mutation_rate}, Parsimony: {cfg.evo_parsimony}")
+        print(f"    Original: mag={original['magnitude']:.0f}, nonzero={original['nonzero']}")
+    # Initialize population
+    pop = {k: v.unsqueeze(0).expand(pop_size, *v.shape).clone().to(device)
+           for k, v in weights.items()}
+    best_weights = {k: v.clone() for k, v in weights.items()}
+    best_score = -float('inf')
+    best_fitness = 0.0
+    history = []
+    improved_at = 0
+    for gen in range(cfg.evo_generations):
+        # Evaluate
+        fitness = batch_eval_fn(pop)
+        # Compute magnitude penalty
+        mags = torch.stack([
+            sum(pop[name][i].abs().sum() for name in pop)
+            for i in range(pop_size)
+        ])
+        adjusted = fitness - cfg.evo_parsimony * mags
+        # Track best
+        best_idx = adjusted.argmax().item()
+        gen_best_fitness = fitness[best_idx].item()
+        gen_best_adj = adjusted[best_idx].item()
+        gen_best_mag = mags[best_idx].item()
+        if gen_best_fitness >= cfg.fitness_threshold:
+            if gen_best_adj > best_score:
+                best_score = gen_best_adj
+                best_fitness = gen_best_fitness
+                best_weights = {k: v[best_idx].clone() for k, v in pop.items()}
+                improved_at = gen
+                if cfg.verbose:
+                    s = _stats(best_weights)
+                    print(f"    Gen {gen}: NEW BEST! score={best_score:.4f}, fitness={best_fitness:.4f}, mag={s['magnitude']:.0f}")
+        # Stats for this generation
+        valid_mask = fitness >= cfg.fitness_threshold
+        n_valid = valid_mask.sum().item()
+        avg_fitness = fitness.mean().item()
+        avg_mag = mags.mean().item()
+        if gen % 50 == 0:
+            s = _stats(best_weights)
+            if cfg.verbose:
+                print(f"    Gen {gen}: valid={n_valid}/{pop_size}, avg_fit={avg_fitness:.4f}, avg_mag={avg_mag:.0f}, best_mag={s['magnitude']:.0f}")
+            history.append({'gen': gen, 'score': best_score, 'mag': s['magnitude'], 'n_valid': n_valid})
+        # Selection + mutation
+        probs = torch.softmax(adjusted, dim=0)
+        indices = torch.multinomial(probs, pop_size, replacement=True)
+        new_pop = {}
+        for name, tensor in pop.items():
+            selected = tensor[indices].clone()
+            mask = torch.rand_like(selected) < cfg.evo_mutation_rate
+            mutations = torch.randint(-1, 2, selected.shape, device=device).float()
+            selected = selected + mask.float() * mutations
+            new_pop[name] = selected
+        pop = new_pop
+    # Final report
+    final_stats = _stats(best_weights)
+    elapsed = time.perf_counter() - start
+    if cfg.verbose:
+        print(f"    Evolution complete in {elapsed:.1f}s")
+        print(f"    Best found at generation {improved_at}")
+        print(f"    Final: mag={final_stats['magnitude']:.0f}, nonzero={final_stats['nonzero']}")
+        reduction_pct = 100 * (1 - final_stats['magnitude'] / original['magnitude'])
+        print(f"    Magnitude reduction: {reduction_pct:.1f}%")
+    return PruneResult(
+        method='evolutionary',
+        original_stats=original,
+        final_stats=final_stats,
+        final_weights=best_weights,
+        fitness=best_score + cfg.evo_parsimony * final_stats['magnitude'],
+        reductions=int(original['magnitude'] - final_stats['magnitude']),
+        time_seconds=elapsed,
+        history=history
+    )
+# -----------------------------------------------------------------------------
+# Method 6: Simulated Annealing
+# -----------------------------------------------------------------------------
+def prune_annealing(weights: Dict[str, torch.Tensor],
+                    eval_fn: Callable[[Dict], float],
+                    cfg: Config) -> PruneResult:
+    """Simulated annealing for circuit minimization."""
+    start = time.perf_counter()
+    weights = {k: v.clone() for k, v in weights.items()}
+    original = _stats(weights)
+    current = weights
+    current_energy = _energy(current, eval_fn, cfg)
+    best = {k: v.clone() for k, v in current.items()}
+    best_energy = current_energy
+    temp = cfg.annealing_initial_temp
+    history = []
+    for i in range(cfg.annealing_iterations):
+        # Perturb
+        neighbor = {k: v.clone() for k, v in current.items()}
+        name = random.choice(list(neighbor.keys()))
+        flat = neighbor[name].flatten()
+        idx = random.randint(0, len(flat) - 1)
+        mutation = random.choice([-1, 1, 0])
+        if mutation == 0:
+            flat[idx] = 0
+        else:
+            flat[idx] = flat[idx] + mutation
+        neighbor[name] = flat.view(neighbor[name].shape)
+        neighbor_energy = _energy(neighbor, eval_fn, cfg)
+        delta = neighbor_energy - current_energy
+        if delta < 0 or random.random() < math.exp(-delta / max(temp, 1e-10)):
+            current = neighbor
+            current_energy = neighbor_energy
+            if current_energy < best_energy:
+                if eval_fn(current) >= cfg.fitness_threshold:
+                    best = {k: v.clone() for k, v in current.items()}
+                    best_energy = current_energy
+        temp *= cfg.annealing_cooling
+        if i % 1000 == 0:
+            s = _stats(best)
+            if cfg.verbose:
+                print(f"    Iter {i}: temp={temp:.4f}, mag={s['magnitude']:.0f}")
+            history.append({'iter': i, 'temp': temp, 'mag': s['magnitude']})
+    return PruneResult(
+        method='annealing',
+        original_stats=original,
+        final_stats=_stats(best),
+        final_weights=best,
+        fitness=eval_fn(best),
+        reductions=int(original['magnitude'] - _stats(best)['magnitude']),
+        time_seconds=time.perf_counter() - start,
+        history=history
+    )
+def _energy(weights, eval_fn, cfg):
+    fitness = eval_fn(weights)
+    mag = sum(t.abs().sum().item() for t in weights.values())
+    if fitness < cfg.fitness_threshold:
+        return 1e6 + mag
+    return mag
+# -----------------------------------------------------------------------------
+# Method 7: Pareto Frontier
+# -----------------------------------------------------------------------------
+def prune_pareto(weights: Dict[str, torch.Tensor],
+                 eval_fn: Callable[[Dict], float],
+                 cfg: Config) -> PruneResult:
+    """Search Pareto frontier of correctness vs size."""
+    start = time.perf_counter()
+    original = _stats(weights)
+    frontier = []
+    for target in cfg.pareto_levels:
+        if cfg.verbose:
+            print(f"    Target fitness >= {target}")
+        relaxed_cfg = Config(
+            device=cfg.device,
+            fitness_threshold=target,
+            magnitude_passes=50,
+            verbose=False
+        )
+        result = prune_magnitude({k: v.clone() for k, v in weights.items()}, eval_fn, relaxed_cfg)
+        frontier.append({
+            'target': target,
+            'actual': result.fitness,
+            'magnitude': result.final_stats['magnitude'],
+            'nonzero': result.final_stats['nonzero']
+        })
+        if cfg.verbose:
+            print(f"      -> fitness={result.fitness:.4f}, mag={result.final_stats['magnitude']:.0f}")
+    return PruneResult(
+        method='pareto',
+        original_stats=original,
+        final_stats=frontier[-1] if frontier else original,
+        final_weights=weights,
+        fitness=frontier[0]['actual'] if frontier else 1.0,
+        reductions=len(frontier),
+        time_seconds=time.perf_counter() - start,
+        history=frontier
+    )
+# -----------------------------------------------------------------------------
+# Helpers
+# -----------------------------------------------------------------------------
+def _stats(weights: Dict[str, torch.Tensor]) -> Dict:
+    total = sum(t.numel() for t in weights.values())
+    nonzero = sum((t != 0).sum().item() for t in weights.values())
+    mag = sum(t.abs().sum().item() for t in weights.values())
+    maxw = max(t.abs().max().item() for t in weights.values()) if weights else 0
+    return {'total': total, 'nonzero': nonzero, 'magnitude': mag, 'max': maxw}
+import math
+# =============================================================================
+# CIRCUIT-SPECIFIC FORWARD FUNCTIONS
+# =============================================================================
+def make_hamming_decoder_forward(device='cuda'):
+    """Create forward function for Hamming(7,4) decoder."""
+    def forward(inputs, weights):
+        """
+        Batched forward pass for Hamming decoder.
+        inputs: [n_cases, 7]
+        weights: dict of weight tensors
+        Returns: [n_cases, 4]
+        """
+        n_cases = inputs.shape[0]
+        w = weights
+        outputs = []
+        for case_idx in range(n_cases):
+            c = [inputs[case_idx, i].item() for i in range(7)]
+            def xor2(a, b, prefix):
+                inp = torch.tensor([float(a), float(b)], device=device)
+                or_out = float((inp * w[f'{prefix}.layer1.or.weight'].flatten()[:2]).sum() +
+                               w[f'{prefix}.layer1.or.bias'].squeeze() >= 0)
+                nand_out = float((inp * w[f'{prefix}.layer1.nand.weight'].flatten()[:2]).sum() +
+                                 w[f'{prefix}.layer1.nand.bias'].squeeze() >= 0)
+                l1 = torch.tensor([or_out, nand_out], device=device)
+                return int((l1 * w[f'{prefix}.layer2.weight'].flatten()).sum() +
+                           w[f'{prefix}.layer2.bias'].squeeze() >= 0)
+            def xor4(indices, prefix):
+                i0, i1, i2, i3 = indices
+                inp = torch.tensor([float(c[i]) for i in range(7)], device=device)
+                or_out = float((inp * w[f'{prefix}.xor_{i0}{i1}.layer1.or.weight'].flatten()).sum() +
+                               w[f'{prefix}.xor_{i0}{i1}.layer1.or.bias'].squeeze() >= 0)
+                nand_out = float((inp * w[f'{prefix}.xor_{i0}{i1}.layer1.nand.weight'].flatten()).sum() +
+                                 w[f'{prefix}.xor_{i0}{i1}.layer1.nand.bias'].squeeze() >= 0)
+                xor_ab = int((torch.tensor([or_out, nand_out], device=device) *
+                              w[f'{prefix}.xor_{i0}{i1}.layer2.weight'].flatten()).sum() +
+                             w[f'{prefix}.xor_{i0}{i1}.layer2.bias'].squeeze() >= 0)
+                or_out = float((inp * w[f'{prefix}.xor_{i2}{i3}.layer1.or.weight'].flatten()).sum() +
+                               w[f'{prefix}.xor_{i2}{i3}.layer1.or.bias'].squeeze() >= 0)
+                nand_out = float((inp * w[f'{prefix}.xor_{i2}{i3}.layer1.nand.weight'].flatten()).sum() +
+                                 w[f'{prefix}.xor_{i2}{i3}.layer1.nand.bias'].squeeze() >= 0)
+                xor_cd = int((torch.tensor([or_out, nand_out], device=device) *
+                              w[f'{prefix}.xor_{i2}{i3}.layer2.weight'].flatten()).sum() +
+                             w[f'{prefix}.xor_{i2}{i3}.layer2.bias'].squeeze() >= 0)
+                inp2 = torch.tensor([float(xor_ab), float(xor_cd)], device=device)
+                or_out = float((inp2 * w[f'{prefix}.xor_final.layer1.or.weight'].flatten()).sum() +
+                               w[f'{prefix}.xor_final.layer1.or.bias'].squeeze() >= 0)
+                nand_out = float((inp2 * w[f'{prefix}.xor_final.layer1.nand.weight'].flatten()).sum() +
+                                 w[f'{prefix}.xor_final.layer1.nand.bias'].squeeze() >= 0)
+                return int((torch.tensor([or_out, nand_out], device=device) *
+                            w[f'{prefix}.xor_final.layer2.weight'].flatten()).sum() +
+                           w[f'{prefix}.xor_final.layer2.bias'].squeeze() >= 0)
+            s1 = xor4([0, 2, 4, 6], 's1')
+            s2 = xor4([1, 2, 5, 6], 's2')
+            s3 = xor4([3, 4, 5, 6], 's3')
+            syndrome = torch.tensor([float(s1), float(s2), float(s3)], device=device)
+            flip3 = int((syndrome * w['flip3.weight'].flatten()).sum() + w['flip3.bias'].squeeze() >= 0)
+            flip5 = int((syndrome * w['flip5.weight'].flatten()).sum() + w['flip5.bias'].squeeze() >= 0)
+            flip6 = int((syndrome * w['flip6.weight'].flatten()).sum() + w['flip6.bias'].squeeze() >= 0)
+            flip7 = int((syndrome * w['flip7.weight'].flatten()).sum() + w['flip7.bias'].squeeze() >= 0)
+            d1 = xor2(c[2], flip3, 'd1.xor')
+            d2 = xor2(c[4], flip5, 'd2.xor')
+            d3 = xor2(c[5], flip6, 'd3.xor')
+            d4 = xor2(c[6], flip7, 'd4.xor')
+            outputs.append([d1, d2, d3, d4])
+        return torch.tensor(outputs, device=device, dtype=torch.float32)
+    # Build test cases with error injection
+    def hamming_encode(data):
+        d1, d2, d3, d4 = (data >> 0) & 1, (data >> 1) & 1, (data >> 2) & 1, (data >> 3) & 1
+        p1, p2, p3 = d1 ^ d2 ^ d4, d1 ^ d3 ^ d4, d2 ^ d3 ^ d4
+        return (p1 << 0) | (p2 << 1) | (d1 << 2) | (p3 << 3) | (d2 << 4) | (d3 << 5) | (d4 << 6)
+    inputs_list, expected_list = [], []
+    for data in range(16):
+        cw = hamming_encode(data)
+        inputs_list.append([(cw >> i) & 1 for i in range(7)])
+        expected_list.append([(data >> i) & 1 for i in range(4)])
+    for data in range(16):
+        cw = hamming_encode(data)
+        for flip in range(7):
+            corrupted = cw ^ (1 << flip)
+            inputs_list.append([(corrupted >> i) & 1 for i in range(7)])
+            expected_list.append([(data >> i) & 1 for i in range(4)])
+    test_inputs = torch.tensor(inputs_list, device=device, dtype=torch.float32)
+    test_expected = torch.tensor(expected_list, device=device, dtype=torch.float32)
+    return forward, test_inputs, test_expected
+def make_generic_forward(circuit: Circuit):
+    """Create generic forward by loading model.py dynamically."""
+    model_py = circuit.path / 'model.py'
+    if not model_py.exists():
+        return None, None, None
+    spec = importlib.util.spec_from_file_location("circuit_model", model_py)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    # Find the main function
+    fn_names = [circuit.spec.name.replace('threshold-', '').replace('-', '_'),
+                'forward', 'evaluate', 'run']
+    main_fn = None
+    for name in dir(module):
+        if name.lower() in [n.lower() for n in fn_names] and callable(getattr(module, name)):
+            main_fn = getattr(module, name)
+            break
+    if main_fn is None:
+        return None, None, None
+    # Build inputs
+    n = circuit.spec.inputs
+    n_cases = 2 ** n
+    device = circuit.device
+    idx = torch.arange(n_cases, device=device, dtype=torch.long)
+    bits = torch.arange(n, device=device, dtype=torch.long)
+    inputs = ((idx.unsqueeze(1) >> bits) & 1).float()
+    # Compute expected
+    outputs = []
+    for i in range(n_cases):
+        args = [int(inputs[i, j].item()) for j in range(n)]
+        result = main_fn(*args, circuit.weights)
+        if isinstance(result, (list, tuple)):
+            outputs.append([float(x) for x in result])
+        else:
+            outputs.append([float(result)])
+    expected = torch.tensor(outputs, device=device, dtype=torch.float32)
+    def forward(inp, weights):
+        out = []
+        for i in range(inp.shape[0]):
+            args = [int(inp[i, j].item()) for j in range(n)]
+            result = main_fn(*args, weights)
+            if isinstance(result, (list, tuple)):
+                out.append([float(x) for x in result])
+            else:
+                out.append([float(result)])
+        return torch.tensor(out, device=device, dtype=torch.float32)
+    return forward, inputs, expected
+# =============================================================================
+# MAIN ORCHESTRATOR
+# =============================================================================
+def run_all_methods(circuit: Circuit, cfg: Config) -> Dict[str, PruneResult]:
+    """Run all enabled pruning methods on a circuit."""
+    print(f"\n{'='*70}")
+    print(f" PRUNING: {circuit.spec.name}")
+    print(f"{'='*70}")
+    original = circuit.stats()
+    print(f" Inputs: {circuit.spec.inputs}, Outputs: {circuit.spec.outputs}")
+    print(f" Neurons: {circuit.spec.neurons}, Layers: {circuit.spec.layers}")
+    print(f" Parameters: {original['total']}, Non-zero: {original['nonzero']}")
+    print(f" Magnitude: {original['magnitude']:.0f}")
+    print(f"{'='*70}")
+    # Get forward function
+    if 'hamming74decoder' in circuit.spec.name:
+        forward_fn, test_inputs, test_expected = make_hamming_decoder_forward(cfg.device)
+    else:
+        forward_fn, test_inputs, test_expected = make_generic_forward(circuit)
+    if forward_fn is None:
+        print("ERROR: Could not create forward function")
+        return {}
+    # Create evaluators
+    def eval_fn(weights):
+        outputs = forward_fn(test_inputs, weights)
+        correct = (outputs == test_expected).all(dim=-1).float().sum()
+        return (correct / test_inputs.shape[0]).item()
+    def batch_eval_fn(population):
+        pop_size = next(iter(population.values())).shape[0]
+        fitness = torch.zeros(pop_size, device=cfg.device)
+        for i in range(pop_size):
+            w = {k: v[i] for k, v in population.items()}
+            outputs = forward_fn(test_inputs, w)
+            correct = (outputs == test_expected).all(dim=-1).float().sum()
+            fitness[i] = correct / test_inputs.shape[0]
+        return fitness
+    # Verify initial
+    initial = eval_fn(circuit.weights)
+    print(f"\n Initial fitness: {initial:.6f}")
+    if initial < cfg.fitness_threshold:
+        print(" ERROR: Circuit doesn't pass baseline!")
+        return {}
+    results = {}
+    # Run methods
+    if cfg.run_magnitude:
+        print(f"\n[1] MAGNITUDE REDUCTION (sequential)")
+        results['magnitude'] = prune_magnitude(circuit.clone(), eval_fn, cfg)
+        _print_result(results['magnitude'])
+    if cfg.run_batched_magnitude:
+        print(f"\n[2] MAGNITUDE REDUCTION (batched GPU)")
+        results['batched'] = prune_magnitude_batched(circuit.clone(), eval_fn, batch_eval_fn, cfg)
+        _print_result(results['batched'])
+    if cfg.run_zero:
+        print(f"\n[3] ZERO PRUNING")
+        results['zero'] = prune_zero(circuit.clone(), eval_fn, cfg)
+        _print_result(results['zero'])
+    if cfg.run_quantize:
+        print(f"\n[4] QUANTIZATION")
+        results['quantize'] = prune_quantize(circuit.clone(), eval_fn, cfg)
+        _print_result(results['quantize'])
+    if cfg.run_evolutionary:
+        print(f"\n[5] EVOLUTIONARY")
+        results['evolutionary'] = prune_evolutionary(circuit.clone(), batch_eval_fn, cfg)
+        _print_result(results['evolutionary'])
+    if cfg.run_annealing:
+        print(f"\n[6] SIMULATED ANNEALING")
+        results['annealing'] = prune_annealing(circuit.clone(), eval_fn, cfg)
+        _print_result(results['annealing'])
+    if cfg.run_pareto:
+        print(f"\n[7] PARETO FRONTIER")
+        results['pareto'] = prune_pareto(circuit.clone(), eval_fn, cfg)
+        _print_result(results['pareto'])
+    # Summary
+    print(f"\n{'='*70}")
+    print(" SUMMARY")
+    print(f"{'='*70}")
+    print(f"\n{'Method':<20} {'Fitness':<10} {'Magnitude':<12} {'Nonzero':<10} {'Time':<10}")
+    print("-" * 70)
+    print(f"{'Original':<20} {'1.0000':<10} {original['magnitude']:<12.0f} {original['nonzero']:<10} {'-':<10}")
+    best_method, best_mag = None, float('inf')
+    for name, r in sorted(results.items(), key=lambda x: x[1].final_stats.get('magnitude', float('inf'))):
+        mag = r.final_stats.get('magnitude', 0)
+        nz = r.final_stats.get('nonzero', 0)
+        print(f"{name:<20} {r.fitness:<10.4f} {mag:<12.0f} {nz:<10} {r.time_seconds:<10.1f}s")
+        if r.fitness >= cfg.fitness_threshold and mag < best_mag:
+            best_mag = mag
+            best_method = name
+    if best_method:
+        reduction = 1 - best_mag / original['magnitude']
+        print(f"\n BEST: {best_method} ({reduction*100:.1f}% magnitude reduction)")
+    return results
+def _print_result(r: PruneResult):
+    print(f"    Fitness: {r.fitness:.6f}")
+    print(f"    Magnitude: {r.final_stats.get('magnitude', 0):.0f}")
+    print(f"    Nonzero: {r.final_stats.get('nonzero', 0)}")
+    print(f"    Time: {r.time_seconds:.1f}s")
+# =============================================================================
+# CLI
+# =============================================================================
+def main():
+    parser = argparse.ArgumentParser(description='Prune threshold circuits')
+    parser.add_argument('circuit', nargs='?', help='Circuit name')
+    parser.add_argument('--list', action='store_true', help='List available circuits')
+    parser.add_argument('--all', action='store_true', help='Run on all circuits')
+    parser.add_argument('--max-inputs', type=int, default=10, help='Max inputs for --all')
+    parser.add_argument('--device', default='cuda', help='cuda or cpu')
+    parser.add_argument('--batch-size', type=int, default=80000)
+    parser.add_argument('--methods', type=str, help='Comma-separated methods')
+    parser.add_argument('--fitness', type=float, default=0.9999)
+    parser.add_argument('--quiet', action='store_true')
+    parser.add_argument('--save', action='store_true', help='Save best result')
+    args = parser.parse_args()
+    if args.list:
+        specs = discover_circuits()
+        print(f"\nAvailable circuits ({len(specs)}):\n")
+        for s in specs:
+            print(f"  {s.name:<40} {s.inputs}in/{s.outputs}out  {s.neurons}N  {s.layers}L")
+        return
+    cfg = Config(
+        device=args.device,
+        batch_size=args.batch_size,
+        fitness_threshold=args.fitness,
+        verbose=not args.quiet
+    )
+    if args.methods:
+        methods = args.methods.lower().split(',')
+        cfg.run_magnitude = 'magnitude' in methods or 'mag' in methods
+        cfg.run_batched_magnitude = 'batched' in methods or 'batch' in methods
+        cfg.run_zero = 'zero' in methods
+        cfg.run_quantize = 'quantize' in methods or 'quant' in methods
+        cfg.run_evolutionary = 'evo' in methods or 'evolutionary' in methods
+        cfg.run_annealing = 'anneal' in methods or 'sa' in methods
+        cfg.run_pareto = 'pareto' in methods
+    RESULTS_PATH.mkdir(exist_ok=True)
+    if args.all:
+        specs = [s for s in discover_circuits() if s.inputs <= args.max_inputs]
+        print(f"\nRunning on {len(specs)} circuits...")
+        for spec in specs:
+            try:
+                circuit = Circuit(spec.path, cfg.device)
+                results = run_all_methods(circuit, cfg)
+            except Exception as e:
+                print(f"ERROR on {spec.name}: {e}")
+    elif args.circuit:
+        circuit = load_circuit(args.circuit, cfg.device)
+        results = run_all_methods(circuit, cfg)
+        if args.save and results:
+            best = min(results.values(), key=lambda r: r.final_stats.get('magnitude', float('inf')))
+            if best.fitness >= cfg.fitness_threshold:
+                path = circuit.save(best.final_weights, f'pruned_{best.method}')
+                print(f"\nSaved to: {path}")
+    else:
+        parser.print_help()
+        print("\n\nExamples:")
+        print("  python prune.py --list")
+        print("  python prune.py threshold-hamming74decoder")
+        print("  python prune.py threshold-hamming74decoder --methods mag,zero,evo")
+        print("  python prune.py --all --max-inputs 8")
+if __name__ == '__main__':
+    main()