Add magnitude-ordered exhaustive search with GPU-vectorized evaluation

- Exhaustive search now searches by magnitude level (0, 1, 2, ...) and stops
at first valid magnitude, finding ALL solutions at that level
- Add matrix-style weight detection for circuits using layer1.weight [H,I] format
- Add GPU-vectorized evaluation for matrix-style circuits (massive speedup)
- Add MUX/DEMUX native forward function detection
- Report full metrics for solutions: magnitude, nonzero count, max|w|, sparsity
- Fix CIRCUITS_PATH to point to threshold-logic-circuits

Files changed (1) hide show

prune.py +226 -65

prune.py CHANGED Viewed

@@ -47,7 +47,7 @@ except ImportError:
 warnings.filterwarnings('ignore')
-CIRCUITS_PATH = Path('D:/threshold-circuits')
 RESULTS_PATH = CIRCUITS_PATH / 'pruned_results'
@@ -534,6 +534,17 @@ class AdaptiveCircuit:
                     model = cls(weights)
                     return list(model(int(inputs[0]), int(inputs[1])))
                 return wrapper
             for attr_name in dir(module):
                 if attr_name.startswith('_'):
@@ -914,6 +925,11 @@ class BatchedEvaluator:
         self.n_inputs = circuit.spec.inputs
         self.n_outputs = circuit.spec.outputs
         self.use_vmap = VMAP_AVAILABLE and cfg.device == 'cuda' and not circuit.has_native
         self.vmap_forward = None
         self.vmapped_fn = None
@@ -935,7 +951,87 @@ class BatchedEvaluator:
             batched_status = "yes" if self.batched_ready and not self.use_native_eval else "no"
             native_status = "forced" if self.use_native_eval else ("available" if circuit.has_native else "none")
             seq_deps = "yes" if self.has_sequential_deps else "no"
-            print(f"  [EVAL] Evaluator ready: batch={self.max_batch:,}, vmap={vmap_status}, batched={batched_status}, native={native_status}, seq_deps={seq_deps}")
     def _detect_sequential_dependencies(self) -> bool:
         """Detect if circuit has neurons that depend on other neurons' outputs."""
@@ -1086,6 +1182,13 @@ class BatchedEvaluator:
         if pop_size > self.max_batch:
             return self._evaluate_chunked(population)
         if self.use_native_eval:
             return self._evaluate_native_parallel(population)
@@ -2656,26 +2759,55 @@ def prune_fanin(circuit: AdaptiveCircuit, evaluator: BatchedEvaluator, cfg: Conf
     )
 def prune_exhaustive(circuit: AdaptiveCircuit, evaluator: BatchedEvaluator, cfg: Config) -> PruneResult:
     """
-    Exhaustive search over all integer weight combinations.
-    Unlike evolutionary/annealing methods that perturb existing weights,
-    this searches ALL possible combinations within a range. Can find
-    globally optimal solutions but only feasible for small circuits.
-    Complexity: O((2*range+1)^n_params) - exponential in parameter count.
     """
     start = time.perf_counter()
     original = circuit.stats()
     n_params = original['total']
-    search_range = cfg.exhaustive_range
     if n_params > cfg.exhaustive_max_params:
         if cfg.verbose:
             print(f"    [EXHAUSTIVE] Skipping: {n_params} params exceeds max {cfg.exhaustive_max_params}")
-            print(f"    [EXHAUSTIVE] Search space would be {(2*search_range+1)**n_params:,} combinations")
         return PruneResult(
             method='exhaustive',
             original_stats=original,
@@ -2686,28 +2818,15 @@ def prune_exhaustive(circuit: AdaptiveCircuit, evaluator: BatchedEvaluator, cfg:
             metadata={'skipped': True, 'reason': 'too_many_params'}
         )
-    search_space = (2 * search_range + 1) ** n_params
     if cfg.verbose:
         print(f"    [EXHAUSTIVE] Parameters: {n_params}")
-        print(f"    [EXHAUSTIVE] Range: [{-search_range}, {search_range}]")
-        print(f"    [EXHAUSTIVE] Search space: {search_space:,} combinations")
     weight_keys = list(circuit.weights.keys())
     weight_shapes = {k: circuit.weights[k].shape for k in weight_keys}
     weight_sizes = {k: circuit.weights[k].numel() for k in weight_keys}
-    best_weights = circuit.clone_weights()
-    best_mag = original['magnitude']
-    best_fitness = evaluator.evaluate_single(best_weights)
-    values = list(range(-search_range, search_range + 1))
-    tested = 0
-    valid_found = 0
-    report_interval = max(1, search_space // 100)
-    last_report_time = start
     def vector_to_weights(vec):
         weights = {}
         idx = 0
@@ -2717,50 +2836,92 @@ def prune_exhaustive(circuit: AdaptiveCircuit, evaluator: BatchedEvaluator, cfg:
             idx += size
         return weights
-    if cfg.verbose:
-        print(f"    [EXHAUSTIVE] Starting search...")
-        print(f"    [EXHAUSTIVE] Progress updates every 1%")
-    for combo in product(values, repeat=n_params):
-        tested += 1
-        weights = vector_to_weights(combo)
-        fitness = evaluator.evaluate_single(weights)
-        if fitness >= cfg.fitness_threshold:
-            valid_found += 1
-            mag = sum(abs(v) for v in combo)
-            if mag < best_mag:
-                best_mag = mag
-                best_weights = {k: v.clone() for k, v in weights.items()}
-                best_fitness = fitness
-                if cfg.verbose:
-                    elapsed = time.perf_counter() - start
-                    print(f"      [{elapsed:6.1f}s] NEW BEST: magnitude={mag}, weights={combo}")
-        if cfg.verbose and tested % report_interval == 0:
-            now = time.perf_counter()
-            elapsed = now - start
-            interval_time = now - last_report_time
-            rate = report_interval / interval_time if interval_time > 0 else 0
-            overall_rate = tested / elapsed if elapsed > 0 else 0
-            eta = (search_space - tested) / overall_rate if overall_rate > 0 else 0
-            pct = 100 * tested / search_space
-            print(f"      [{elapsed:6.1f}s] {pct:5.1f}% | {tested:,}/{search_space:,} | "
-                  f"valid: {valid_found:,} | best: {best_mag:.0f} | "
-                  f"{rate:,.0f}/s (avg {overall_rate:,.0f}/s) | ETA: {eta:.0f}s")
-            last_report_time = now
     if cfg.verbose:
-        elapsed = time.perf_counter() - start
         print(f"    [EXHAUSTIVE COMPLETE]")
-        print(f"      - Combinations tested: {tested:,}")
-        print(f"      - Valid solutions found: {valid_found:,}")
-        print(f"      - Best magnitude: {best_mag:.0f} (original: {original['magnitude']:.0f})")
-        print(f"      - Reduction: {(1 - best_mag/original['magnitude'])*100:.1f}%")
-        print(f"      - Time: {elapsed:.1f}s ({tested/elapsed:.0f} combos/s)")
     return PruneResult(
         method='exhaustive',
@@ -2768,12 +2929,12 @@ def prune_exhaustive(circuit: AdaptiveCircuit, evaluator: BatchedEvaluator, cfg:
         final_stats=circuit.stats(best_weights),
         final_weights=best_weights,
         fitness=best_fitness,
-        time_seconds=time.perf_counter() - start,
         metadata={
-            'search_space': search_space,
-            'tested': tested,
-            'valid_found': valid_found,
-            'search_range': search_range
         }
     )

 warnings.filterwarnings('ignore')
+CIRCUITS_PATH = Path('D:/threshold-logic-circuits')
 RESULTS_PATH = CIRCUITS_PATH / 'pruned_results'
                     model = cls(weights)
                     return list(model(int(inputs[0]), int(inputs[1])))
                 return wrapper
+            elif 'mux' in name and hasattr(module, 'mux'):
+                mux_fn = module.mux
+                def wrapper(inputs, weights):
+                    return [mux_fn(int(inputs[0]), int(inputs[1]), int(inputs[2]), weights)]
+                return wrapper
+            elif 'demux' in name and hasattr(module, 'demux'):
+                demux_fn = module.demux
+                def wrapper(inputs, weights):
+                    result = demux_fn(int(inputs[0]), int(inputs[1]), weights)
+                    return list(result) if isinstance(result, (list, tuple)) else [result]
+                return wrapper
             for attr_name in dir(module):
                 if attr_name.startswith('_'):
         self.n_inputs = circuit.spec.inputs
         self.n_outputs = circuit.spec.outputs
+        self.matrix_style = self._detect_matrix_style()
+        self.matrix_layout = None
+        if self.matrix_style:
+            self.matrix_layout = self._build_matrix_layout()
         self.use_vmap = VMAP_AVAILABLE and cfg.device == 'cuda' and not circuit.has_native
         self.vmap_forward = None
         self.vmapped_fn = None
             batched_status = "yes" if self.batched_ready and not self.use_native_eval else "no"
             native_status = "forced" if self.use_native_eval else ("available" if circuit.has_native else "none")
             seq_deps = "yes" if self.has_sequential_deps else "no"
+            matrix_status = "yes" if self.matrix_style else "no"
+            print(f"  [EVAL] Evaluator ready: batch={self.max_batch:,}, vmap={vmap_status}, matrix={matrix_status}, batched={batched_status}, native={native_status}, seq_deps={seq_deps}")
+    def _detect_matrix_style(self) -> bool:
+        """Detect if circuit uses matrix-style weights (layer1.weight with multiple rows)."""
+        weights = self.circuit.weights
+        for key, tensor in weights.items():
+            if 'layer1.weight' in key or key == 'layer1.weight':
+                if tensor.dim() == 2 and tensor.shape[0] > 1:
+                    return True
+        return False
+    def _build_matrix_layout(self) -> dict:
+        """Build layout info for matrix-style weight extraction.
+        MUST match the exact dict order used by circuit.weights_to_vector."""
+        weights = self.circuit.weights
+        weight_keys = list(weights.keys())
+        layout = {'layers': {}, 'total_params': 0, 'key_order': weight_keys}
+        idx = 0
+        for key in weight_keys:
+            tensor = weights[key]
+            size = tensor.numel()
+            if '.weight' in key or key.endswith('weight'):
+                layer_name = key.replace('.weight', '')
+                layout['layers'].setdefault(layer_name, {})
+                layout['layers'][layer_name]['w_start'] = idx
+                layout['layers'][layer_name]['w_end'] = idx + size
+                layout['layers'][layer_name]['w_shape'] = tuple(tensor.shape)
+            elif '.bias' in key or key.endswith('bias'):
+                layer_name = key.replace('.bias', '')
+                layout['layers'].setdefault(layer_name, {})
+                layout['layers'][layer_name]['b_start'] = idx
+                layout['layers'][layer_name]['b_end'] = idx + size
+                layout['layers'][layer_name]['b_shape'] = tuple(tensor.shape)
+            idx += size
+        layout['total_params'] = idx
+        layout['layer_order'] = sorted(layout['layers'].keys())
+        return layout
+    def _evaluate_matrix_vectorized(self, population: torch.Tensor) -> torch.Tensor:
+        """
+        Fully vectorized evaluation for matrix-style weights.
+        Handles circuits with layer1.weight [H, I], layer1.bias [H], layer2.weight [O, H], layer2.bias [O].
+        """
+        pop_size = population.shape[0]
+        device = population.device
+        layers = self.matrix_layout['layers']
+        layer_order = self.matrix_layout['layer_order']
+        if len(layer_order) < 2:
+            raise ValueError("Matrix-style eval requires at least 2 layers")
+        with torch.no_grad():
+            l1_name = layer_order[0]
+            l1 = layers[l1_name]
+            l1_w = population[:, l1['w_start']:l1['w_end']].view(pop_size, *l1['w_shape'])
+            l1_b = population[:, l1['b_start']:l1['b_end']].view(pop_size, *l1['b_shape'])
+            l2_name = layer_order[1]
+            l2 = layers[l2_name]
+            l2_w = population[:, l2['w_start']:l2['w_end']].view(pop_size, *l2['w_shape'])
+            l2_b = population[:, l2['b_start']:l2['b_end']].view(pop_size, *l2['b_shape'])
+            inp = self.test_inputs
+            hidden = torch.einsum('ti,bhi->bth', inp, l1_w) + l1_b.unsqueeze(1)
+            hidden = (hidden >= 0).float()
+            output = torch.einsum('bth,boh->bto', hidden, l2_w) + l2_b.unsqueeze(1)
+            output = (output >= 0).float()
+            expected = self.test_expected.unsqueeze(0).expand(pop_size, -1, -1)
+            correct = (output == expected).all(dim=-1).float().sum(dim=-1)
+            fitness = correct / self.n_cases
+            return fitness
     def _detect_sequential_dependencies(self) -> bool:
         """Detect if circuit has neurons that depend on other neurons' outputs."""
         if pop_size > self.max_batch:
             return self._evaluate_chunked(population)
+        if self.matrix_style and self.matrix_layout:
+            try:
+                return self._evaluate_matrix_vectorized(population)
+            except Exception as e:
+                if self.cfg.verbose:
+                    print(f"      Matrix vectorized eval failed ({e}), trying other methods")
         if self.use_native_eval:
             return self._evaluate_native_parallel(population)
     )
+def _partitions(total: int, n: int, max_val: int):
+    """Generate all ways to partition 'total' into 'n' non-negative integers <= max_val."""
+    if n == 0:
+        if total == 0:
+            yield []
+        return
+    for i in range(min(total, max_val) + 1):
+        for rest in _partitions(total - i, n - 1, max_val):
+            yield [i] + rest
+def _all_signs(abs_vals: list):
+    """Generate all sign combinations for absolute values."""
+    if not abs_vals:
+        yield []
+        return
+    for rest in _all_signs(abs_vals[1:]):
+        if abs_vals[0] == 0:
+            yield [0] + rest
+        else:
+            yield [abs_vals[0]] + rest
+            yield [-abs_vals[0]] + rest
+def _configs_at_magnitude(mag: int, n_params: int):
+    """Generate all n_params-length configs with given total magnitude."""
+    for partition in _partitions(mag, n_params, mag):
+        for signed in _all_signs(partition):
+            yield tuple(signed)
 def prune_exhaustive(circuit: AdaptiveCircuit, evaluator: BatchedEvaluator, cfg: Config) -> PruneResult:
     """
+    Exhaustive search by magnitude level - finds provably optimal solutions.
+    Searches magnitude 0, then 1, then 2, ... until valid solutions found.
+    Returns ALL valid solutions at the minimum magnitude (to discover families).
+    Much faster than arbitrary-order search since it stops at first valid magnitude.
     """
     start = time.perf_counter()
     original = circuit.stats()
     n_params = original['total']
+    max_mag = int(original['magnitude'])
     if n_params > cfg.exhaustive_max_params:
         if cfg.verbose:
             print(f"    [EXHAUSTIVE] Skipping: {n_params} params exceeds max {cfg.exhaustive_max_params}")
         return PruneResult(
             method='exhaustive',
             original_stats=original,
             metadata={'skipped': True, 'reason': 'too_many_params'}
         )
     if cfg.verbose:
         print(f"    [EXHAUSTIVE] Parameters: {n_params}")
+        print(f"    [EXHAUSTIVE] Original magnitude: {max_mag}")
+        print(f"    [EXHAUSTIVE] Searching by magnitude level (0, 1, 2, ...)")
     weight_keys = list(circuit.weights.keys())
     weight_shapes = {k: circuit.weights[k].shape for k in weight_keys}
     weight_sizes = {k: circuit.weights[k].numel() for k in weight_keys}
     def vector_to_weights(vec):
         weights = {}
         idx = 0
             idx += size
         return weights
+    total_tested = 0
+    all_solutions = []
+    optimal_mag = None
+    for mag in range(0, max_mag + 1):
+        mag_start = time.perf_counter()
+        configs = list(_configs_at_magnitude(mag, n_params))
+        n_configs = len(configs)
+        if n_configs == 0:
+            continue
+        if cfg.verbose:
+            print(f"    Magnitude {mag}: {n_configs:,} configurations...", end=" ", flush=True)
+        valid_at_mag = []
+        batch_size = min(100000, n_configs)
+        for batch_start in range(0, n_configs, batch_size):
+            batch_end = min(batch_start + batch_size, n_configs)
+            batch_configs = configs[batch_start:batch_end]
+            population = torch.tensor(batch_configs, dtype=torch.float32, device=cfg.device)
+            try:
+                fitness_batch = evaluator.evaluate_population(population)
+            except:
+                fitness_batch = torch.tensor([
+                    evaluator.evaluate_single(vector_to_weights(c))
+                    for c in batch_configs
+                ], device=cfg.device)
+            valid_mask = fitness_batch >= cfg.fitness_threshold
+            for i, is_valid in enumerate(valid_mask.tolist()):
+                if is_valid:
+                    valid_at_mag.append(batch_configs[i])
+        total_tested += n_configs
+        mag_time = time.perf_counter() - mag_start
+        if valid_at_mag:
+            if cfg.verbose:
+                print(f"FOUND {len(valid_at_mag)} solutions! ({mag_time:.2f}s)")
+            optimal_mag = mag
+            all_solutions = valid_at_mag
+            if cfg.verbose:
+                print(f"    [EXHAUSTIVE] Optimal magnitude: {optimal_mag}")
+                print(f"    [EXHAUSTIVE] Solutions found: {len(all_solutions)}")
+                print(f"    [EXHAUSTIVE] Solution analysis:")
+                print(f"      {'#':<3} {'Mag':<5} {'NZ':<4} {'Max|w|':<7} {'Sparse%':<8} {'Weights'}")
+                print(f"      {'-'*60}")
+                for i, sol in enumerate(all_solutions[:20]):
+                    mag = sum(abs(v) for v in sol)
+                    nz = sum(1 for v in sol if v != 0)
+                    max_w = max(abs(v) for v in sol)
+                    sparsity = 100 * (len(sol) - nz) / len(sol)
+                    print(f"      {i+1:<3} {mag:<5} {nz:<4} {max_w:<7} {sparsity:<8.1f} {sol}")
+                if len(all_solutions) > 20:
+                    print(f"      ... and {len(all_solutions) - 20} more")
+            break
+        else:
+            if cfg.verbose:
+                print(f"none ({mag_time:.2f}s)")
+    elapsed = time.perf_counter() - start
+    if all_solutions:
+        best_combo = all_solutions[0]
+        best_weights = vector_to_weights(best_combo)
+        best_fitness = evaluator.evaluate_single(best_weights)
+    else:
+        best_weights = circuit.clone_weights()
+        best_fitness = evaluator.evaluate_single(best_weights)
+        optimal_mag = max_mag
     if cfg.verbose:
         print(f"    [EXHAUSTIVE COMPLETE]")
+        print(f"      - Configurations tested: {total_tested:,}")
+        print(f"      - Optimal magnitude: {optimal_mag} (original: {max_mag})")
+        print(f"      - Total solutions at optimal: {len(all_solutions)}")
+        print(f"      - Reduction: {(1 - optimal_mag/max_mag)*100:.1f}%")
+        print(f"      - Time: {elapsed:.1f}s")
     return PruneResult(
         method='exhaustive',
         final_stats=circuit.stats(best_weights),
         final_weights=best_weights,
         fitness=best_fitness,
+        time_seconds=elapsed,
         metadata={
+            'optimal_magnitude': optimal_mag,
+            'total_tested': total_tested,
+            'solutions_count': len(all_solutions),
+            'all_solutions': all_solutions[:100]
         }
     )