phanerozoic
/

8bit-threshold-computer

+"""
+IRON EVAL - COMPREHENSIVE
+=========================
+Complete fitness evaluation for ALL circuits in the threshold computer.
+108 circuits, no placeholders, no shortcuts.
+GPU-optimized for population-based evolution.
+Target: ~40GB VRAM on RTX 6000 Ada (4M population)
+"""
+import torch
+from typing import Dict, Tuple
+from safetensors import safe_open
+def load_model_10166(base_path: str = "D:/8bit-threshold-computer-10166") -> Dict[str, torch.Tensor]:
+    """Load model from safetensors."""
+    f = safe_open(f"{base_path}/neural_computer.safetensors", framework='numpy')
+    tensors = {}
+    for name in f.keys():
+        tensors[name] = torch.tensor(f.get_tensor(name)).float()
+    return tensors
+def heaviside(x: torch.Tensor) -> torch.Tensor:
+    """Threshold activation: 1 if x >= 0, else 0."""
+    return (x >= 0).float()
+class BatchedFitnessEvaluator:
+    """
+    GPU-batched fitness evaluator. Tests ALL circuits comprehensively.
+    """
+    def __init__(self, device='cuda'):
+        self.device = device
+        self._setup_tests()
+    def _setup_tests(self):
+        """Pre-compute all test vectors."""
+        d = self.device
+        # 2-input truth table [4, 2]
+        self.tt2 = torch.tensor([[0,0],[0,1],[1,0],[1,1]], device=d, dtype=torch.float32)
+        # 3-input truth table [8, 3]
+        self.tt3 = torch.tensor([
+            [0,0,0], [0,0,1], [0,1,0], [0,1,1],
+            [1,0,0], [1,0,1], [1,1,0], [1,1,1]
+        ], device=d, dtype=torch.float32)
+        # Boolean gate expected outputs
+        self.expected = {
+            'and': torch.tensor([0,0,0,1], device=d, dtype=torch.float32),
+            'or': torch.tensor([0,1,1,1], device=d, dtype=torch.float32),
+            'nand': torch.tensor([1,1,1,0], device=d, dtype=torch.float32),
+            'nor': torch.tensor([1,0,0,0], device=d, dtype=torch.float32),
+            'xor': torch.tensor([0,1,1,0], device=d, dtype=torch.float32),
+            'xnor': torch.tensor([1,0,0,1], device=d, dtype=torch.float32),
+            'implies': torch.tensor([1,1,0,1], device=d, dtype=torch.float32),
+            'biimplies': torch.tensor([1,0,0,1], device=d, dtype=torch.float32),
+            'not': torch.tensor([1,0], device=d, dtype=torch.float32),
+            'ha_sum': torch.tensor([0,1,1,0], device=d, dtype=torch.float32),
+            'ha_carry': torch.tensor([0,0,0,1], device=d, dtype=torch.float32),
+            'fa_sum': torch.tensor([0,1,1,0,1,0,0,1], device=d, dtype=torch.float32),
+            'fa_cout': torch.tensor([0,0,0,1,0,1,1,1], device=d, dtype=torch.float32),
+        }
+        # NOT gate inputs
+        self.not_inputs = torch.tensor([[0],[1]], device=d, dtype=torch.float32)
+        # 8-bit test values - comprehensive set
+        self.test_8bit = torch.tensor([
+            0, 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128, 255,
+            0b10101010, 0b01010101, 0b11110000, 0b00001111,
+            0b11001100, 0b00110011, 0b10000001, 0b01111110
+        ], device=d, dtype=torch.long)
+        # Bit representations [num_vals, 8]
+        self.test_8bit_bits = torch.stack([
+            ((self.test_8bit >> (7-i)) & 1).float() for i in range(8)
+        ], dim=1)
+        # Comparator test pairs - comprehensive with bit boundaries
+        comp_tests = [
+            (0,0), (1,0), (0,1), (5,3), (3,5), (5,5),
+            (255,0), (0,255), (128,127), (127,128),
+            (100,99), (99,100), (64,32), (32,64),
+            (200,100), (100,200), (1,2), (2,1),
+            (1,2), (2,1), (2,4), (4,2), (4,8), (8,4),
+            (8,16), (16,8), (16,32), (32,16), (32,64), (64,32),
+            (64,128), (128,64),
+            (1,1), (2,2), (4,4), (8,8), (16,16), (32,32), (64,64), (128,128),
+            (7,8), (8,7), (9,8), (8,9),
+            (15,16), (16,15), (17,16), (16,17),
+            (31,32), (32,31), (33,32), (32,33),
+            (63,64), (64,63), (65,64), (64,65),
+            (127,128), (128,127), (129,128), (128,129),
+        ]
+        self.comp_a = torch.tensor([c[0] for c in comp_tests], device=d, dtype=torch.long)
+        self.comp_b = torch.tensor([c[1] for c in comp_tests], device=d, dtype=torch.long)
+        self.comp_a_bits = torch.stack([((self.comp_a >> (7-i)) & 1).float() for i in range(8)], dim=1)
+        self.comp_b_bits = torch.stack([((self.comp_b >> (7-i)) & 1).float() for i in range(8)], dim=1)
+        # Modular test values
+        self.mod_test = torch.arange(0, 256, device=d, dtype=torch.long)
+        self.mod_test_bits = torch.stack([((self.mod_test >> (7-i)) & 1).float() for i in range(8)], dim=1)
+    # =========================================================================
+    # BOOLEAN GATES
+    # =========================================================================
+    def _test_single_gate(self, pop: Dict, gate: str, inputs: torch.Tensor,
+                          expected: torch.Tensor) -> torch.Tensor:
+        """Test single-layer boolean gate."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop[f'boolean.{gate}.weight'].view(pop_size, -1)
+        b = pop[f'boolean.{gate}.bias'].view(pop_size)
+        out = heaviside(inputs @ w.T + b)
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_twolayer_gate(self, pop: Dict, prefix: str, inputs: torch.Tensor,
+                            expected: torch.Tensor) -> torch.Tensor:
+        """Test two-layer gate (XOR, XNOR, BIIMPLIES)."""
+        pop_size = next(iter(pop.values())).shape[0]
+        # Layer 1
+        w1_a = pop[f'{prefix}.layer1.neuron1.weight'].view(pop_size, -1)
+        b1_a = pop[f'{prefix}.layer1.neuron1.bias'].view(pop_size)
+        w1_b = pop[f'{prefix}.layer1.neuron2.weight'].view(pop_size, -1)
+        b1_b = pop[f'{prefix}.layer1.neuron2.bias'].view(pop_size)
+        h_a = heaviside(inputs @ w1_a.T + b1_a)
+        h_b = heaviside(inputs @ w1_b.T + b1_b)
+        hidden = torch.stack([h_a, h_b], dim=2)
+        # Layer 2
+        w2 = pop[f'{prefix}.layer2.weight'].view(pop_size, -1)
+        b2 = pop[f'{prefix}.layer2.bias'].view(pop_size)
+        out = heaviside((hidden * w2.unsqueeze(0)).sum(2) + b2.unsqueeze(0))
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    # =========================================================================
+    # ARITHMETIC - ADDERS
+    # =========================================================================
+    def _test_halfadder(self, pop: Dict) -> torch.Tensor:
+        """Test half adder: sum and carry."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        # Sum (XOR)
+        scores += self._test_twolayer_gate(pop, 'arithmetic.halfadder.sum',
+                                           self.tt2, self.expected['ha_sum'])
+        # Carry (AND)
+        w = pop['arithmetic.halfadder.carry.weight'].view(pop_size, -1)
+        b = pop['arithmetic.halfadder.carry.bias'].view(pop_size)
+        out = heaviside(self.tt2 @ w.T + b)
+        scores += (out == self.expected['ha_carry'].unsqueeze(1)).float().sum(0)
+        return scores
+    def _test_fulladder(self, pop: Dict) -> torch.Tensor:
+        """Test full adder circuit."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        for i, (a, b, cin) in enumerate([(0,0,0), (0,0,1), (0,1,0), (0,1,1),
+                                          (1,0,0), (1,0,1), (1,1,0), (1,1,1)]):
+            inp_ab = torch.tensor([[float(a), float(b)]], device=self.device)
+            # HA1
+            ha1_sum = self._eval_xor(pop, 'arithmetic.fulladder.ha1.sum', inp_ab)
+            w_c1 = pop['arithmetic.fulladder.ha1.carry.weight'].view(pop_size, -1)
+            b_c1 = pop['arithmetic.fulladder.ha1.carry.bias'].view(pop_size)
+            ha1_carry = heaviside(inp_ab @ w_c1.T + b_c1)
+            # HA2
+            inp_ha2 = torch.stack([ha1_sum.squeeze(0), torch.full((pop_size,), float(cin), device=self.device)], dim=1)
+            w1_or = pop['arithmetic.fulladder.ha2.sum.layer1.or.weight'].view(pop_size, -1)
+            b1_or = pop['arithmetic.fulladder.ha2.sum.layer1.or.bias'].view(pop_size)
+            w1_nand = pop['arithmetic.fulladder.ha2.sum.layer1.nand.weight'].view(pop_size, -1)
+            b1_nand = pop['arithmetic.fulladder.ha2.sum.layer1.nand.bias'].view(pop_size)
+            w2 = pop['arithmetic.fulladder.ha2.sum.layer2.weight'].view(pop_size, -1)
+            b2 = pop['arithmetic.fulladder.ha2.sum.layer2.bias'].view(pop_size)
+            h_or = heaviside((inp_ha2 * w1_or).sum(1) + b1_or)
+            h_nand = heaviside((inp_ha2 * w1_nand).sum(1) + b1_nand)
+            hidden = torch.stack([h_or, h_nand], dim=1)
+            ha2_sum = heaviside((hidden * w2).sum(1) + b2)
+            w_c2 = pop['arithmetic.fulladder.ha2.carry.weight'].view(pop_size, -1)
+            b_c2 = pop['arithmetic.fulladder.ha2.carry.bias'].view(pop_size)
+            ha2_carry = heaviside((inp_ha2 * w_c2).sum(1) + b_c2)
+            # Carry OR
+            inp_cout = torch.stack([ha1_carry.squeeze(0), ha2_carry], dim=1)
+            w_cor = pop['arithmetic.fulladder.carry_or.weight'].view(pop_size, -1)
+            b_cor = pop['arithmetic.fulladder.carry_or.bias'].view(pop_size)
+            cout = heaviside((inp_cout * w_cor).sum(1) + b_cor)
+            scores += (ha2_sum == self.expected['fa_sum'][i]).float()
+            scores += (cout == self.expected['fa_cout'][i]).float()
+        return scores
+    def _eval_xor(self, pop: Dict, prefix: str, inputs: torch.Tensor) -> torch.Tensor:
+        """Evaluate XOR gate for given inputs."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w1_or = pop[f'{prefix}.layer1.or.weight'].view(pop_size, -1)
+        b1_or = pop[f'{prefix}.layer1.or.bias'].view(pop_size)
+        w1_nand = pop[f'{prefix}.layer1.nand.weight'].view(pop_size, -1)
+        b1_nand = pop[f'{prefix}.layer1.nand.bias'].view(pop_size)
+        w2 = pop[f'{prefix}.layer2.weight'].view(pop_size, -1)
+        b2 = pop[f'{prefix}.layer2.bias'].view(pop_size)
+        h_or = heaviside(inputs @ w1_or.T + b1_or)
+        h_nand = heaviside(inputs @ w1_nand.T + b1_nand)
+        hidden = torch.stack([h_or, h_nand], dim=2)
+        return heaviside((hidden * w2.unsqueeze(0)).sum(2) + b2.unsqueeze(0))
+    def _eval_single_fa(self, pop: Dict, prefix: str, a: torch.Tensor,
+                        b: torch.Tensor, cin: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Evaluate a single full adder."""
+        pop_size = a.shape[0]
+        inp_ab = torch.stack([a, b], dim=1)
+        # HA1 XOR
+        w1_or = pop[f'{prefix}.ha1.sum.layer1.or.weight'].view(pop_size, -1)
+        b1_or = pop[f'{prefix}.ha1.sum.layer1.or.bias'].view(pop_size)
+        w1_nand = pop[f'{prefix}.ha1.sum.layer1.nand.weight'].view(pop_size, -1)
+        b1_nand = pop[f'{prefix}.ha1.sum.layer1.nand.bias'].view(pop_size)
+        w1_l2 = pop[f'{prefix}.ha1.sum.layer2.weight'].view(pop_size, -1)
+        b1_l2 = pop[f'{prefix}.ha1.sum.layer2.bias'].view(pop_size)
+        h_or = heaviside((inp_ab * w1_or).sum(1) + b1_or)
+        h_nand = heaviside((inp_ab * w1_nand).sum(1) + b1_nand)
+        hidden1 = torch.stack([h_or, h_nand], dim=1)
+        ha1_sum = heaviside((hidden1 * w1_l2).sum(1) + b1_l2)
+        w_c1 = pop[f'{prefix}.ha1.carry.weight'].view(pop_size, -1)
+        b_c1 = pop[f'{prefix}.ha1.carry.bias'].view(pop_size)
+        ha1_carry = heaviside((inp_ab * w_c1).sum(1) + b_c1)
+        # HA2 XOR
+        inp_ha2 = torch.stack([ha1_sum, cin], dim=1)
+        w2_or = pop[f'{prefix}.ha2.sum.layer1.or.weight'].view(pop_size, -1)
+        b2_or = pop[f'{prefix}.ha2.sum.layer1.or.bias'].view(pop_size)
+        w2_nand = pop[f'{prefix}.ha2.sum.layer1.nand.weight'].view(pop_size, -1)
+        b2_nand = pop[f'{prefix}.ha2.sum.layer1.nand.bias'].view(pop_size)
+        w2_l2 = pop[f'{prefix}.ha2.sum.layer2.weight'].view(pop_size, -1)
+        b2_l2 = pop[f'{prefix}.ha2.sum.layer2.bias'].view(pop_size)
+        h2_or = heaviside((inp_ha2 * w2_or).sum(1) + b2_or)
+        h2_nand = heaviside((inp_ha2 * w2_nand).sum(1) + b2_nand)
+        hidden2 = torch.stack([h2_or, h2_nand], dim=1)
+        ha2_sum = heaviside((hidden2 * w2_l2).sum(1) + b2_l2)
+        w_c2 = pop[f'{prefix}.ha2.carry.weight'].view(pop_size, -1)
+        b_c2 = pop[f'{prefix}.ha2.carry.bias'].view(pop_size)
+        ha2_carry = heaviside((inp_ha2 * w_c2).sum(1) + b_c2)
+        # Carry OR
+        inp_cout = torch.stack([ha1_carry, ha2_carry], dim=1)
+        w_cor = pop[f'{prefix}.carry_or.weight'].view(pop_size, -1)
+        b_cor = pop[f'{prefix}.carry_or.bias'].view(pop_size)
+        cout = heaviside((inp_cout * w_cor).sum(1) + b_cor)
+        return ha2_sum, cout
+    def _test_ripplecarry(self, pop: Dict, bits: int, test_cases: list) -> torch.Tensor:
+        """Test ripple carry adder of given bit width."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        for a_val, b_val in test_cases:
+            # Extract bits
+            a_bits = [(a_val >> i) & 1 for i in range(bits)]
+            b_bits = [(b_val >> i) & 1 for i in range(bits)]
+            carry = torch.zeros(pop_size, device=self.device)
+            sum_bits = []
+            for i in range(bits):
+                a_i = torch.full((pop_size,), float(a_bits[i]), device=self.device)
+                b_i = torch.full((pop_size,), float(b_bits[i]), device=self.device)
+                sum_i, carry = self._eval_single_fa(pop, f'arithmetic.ripplecarry{bits}bit.fa{i}', a_i, b_i, carry)
+                sum_bits.append(sum_i)
+            # Reconstruct result
+            result = sum(sum_bits[i] * (2**i) for i in range(bits))
+            expected = (a_val + b_val) & ((1 << bits) - 1)
+            scores += (result == expected).float()
+        return scores
+    # =========================================================================
+    # ARITHMETIC - COMPARATORS
+    # =========================================================================
+    def _test_comparator(self, pop: Dict, name: str, op: str) -> torch.Tensor:
+        """Test 8-bit comparator."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop[f'arithmetic.{name}.comparator'].view(pop_size, -1)
+        if op == 'gt':
+            diff = self.comp_a_bits - self.comp_b_bits
+            expected = (self.comp_a > self.comp_b).float()
+        elif op == 'lt':
+            diff = self.comp_b_bits - self.comp_a_bits
+            expected = (self.comp_a < self.comp_b).float()
+        elif op == 'geq':
+            diff = self.comp_a_bits - self.comp_b_bits
+            expected = (self.comp_a >= self.comp_b).float()
+        elif op == 'leq':
+            diff = self.comp_b_bits - self.comp_a_bits
+            expected = (self.comp_a <= self.comp_b).float()
+        score = diff @ w.T
+        if op in ['geq', 'leq']:
+            out = (score >= 0).float()
+        else:
+            out = (score > 0).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_equality(self, pop: Dict) -> torch.Tensor:
+        """Test 8-bit equality circuit."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        for i in range(len(self.comp_a)):
+            a_bits = self.comp_a_bits[i]
+            b_bits = self.comp_b_bits[i]
+            # Compute XNOR for each bit pair
+            xnor_results = []
+            for bit in range(8):
+                inp = torch.stack([
+                    torch.full((pop_size,), a_bits[bit].item(), device=self.device),
+                    torch.full((pop_size,), b_bits[bit].item(), device=self.device)
+                ], dim=1)
+                # XNOR = (a AND b) OR (NOR(a,b))
+                w_and = pop[f'arithmetic.equality8bit.xnor{bit}.layer1.and.weight'].view(pop_size, -1)
+                b_and = pop[f'arithmetic.equality8bit.xnor{bit}.layer1.and.bias'].view(pop_size)
+                w_nor = pop[f'arithmetic.equality8bit.xnor{bit}.layer1.nor.weight'].view(pop_size, -1)
+                b_nor = pop[f'arithmetic.equality8bit.xnor{bit}.layer1.nor.bias'].view(pop_size)
+                w_l2 = pop[f'arithmetic.equality8bit.xnor{bit}.layer2.weight'].view(pop_size, -1)
+                b_l2 = pop[f'arithmetic.equality8bit.xnor{bit}.layer2.bias'].view(pop_size)
+                h_and = heaviside((inp * w_and).sum(1) + b_and)
+                h_nor = heaviside((inp * w_nor).sum(1) + b_nor)
+                hidden = torch.stack([h_and, h_nor], dim=1)
+                xnor_out = heaviside((hidden * w_l2).sum(1) + b_l2)
+                xnor_results.append(xnor_out)
+            # Final AND of all XNORs
+            xnor_stack = torch.stack(xnor_results, dim=1)
+            w_final = pop['arithmetic.equality8bit.final_and.weight'].view(pop_size, -1)
+            b_final = pop['arithmetic.equality8bit.final_and.bias'].view(pop_size)
+            eq_out = heaviside((xnor_stack * w_final).sum(1) + b_final)
+            expected = (self.comp_a[i] == self.comp_b[i]).float()
+            scores += (eq_out == expected).float()
+        return scores
+    # =========================================================================
+    # THRESHOLD GATES
+    # =========================================================================
+    def _test_threshold_kofn(self, pop: Dict, k: int, name: str) -> torch.Tensor:
+        """Test k-of-8 threshold gate."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop[f'threshold.{name}.weight'].view(pop_size, -1)
+        b = pop[f'threshold.{name}.bias'].view(pop_size)
+        out = heaviside(self.test_8bit_bits @ w.T + b)
+        popcounts = self.test_8bit_bits.sum(1)
+        expected = (popcounts >= k).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_majority(self, pop: Dict) -> torch.Tensor:
+        """Test majority gate (5+ of 8)."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop['threshold.majority.weight'].view(pop_size, -1)
+        b = pop['threshold.majority.bias'].view(pop_size)
+        out = heaviside(self.test_8bit_bits @ w.T + b)
+        popcounts = self.test_8bit_bits.sum(1)
+        expected = (popcounts >= 5).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_minority(self, pop: Dict) -> torch.Tensor:
+        """Test minority gate (3 or fewer of 8)."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop['threshold.minority.weight'].view(pop_size, -1)
+        b = pop['threshold.minority.bias'].view(pop_size)
+        out = heaviside(self.test_8bit_bits @ w.T + b)
+        popcounts = self.test_8bit_bits.sum(1)
+        expected = (popcounts <= 3).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_atleastk(self, pop: Dict, k: int) -> torch.Tensor:
+        """Test at-least-k threshold gate."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop[f'threshold.atleastk_{k}.weight'].view(pop_size, -1)
+        b = pop[f'threshold.atleastk_{k}.bias'].view(pop_size)
+        out = heaviside(self.test_8bit_bits @ w.T + b)
+        popcounts = self.test_8bit_bits.sum(1)
+        expected = (popcounts >= k).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_atmostk(self, pop: Dict, k: int) -> torch.Tensor:
+        """Test at-most-k threshold gate."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop[f'threshold.atmostk_{k}.weight'].view(pop_size, -1)
+        b = pop[f'threshold.atmostk_{k}.bias'].view(pop_size)
+        out = heaviside(self.test_8bit_bits @ w.T + b)
+        popcounts = self.test_8bit_bits.sum(1)
+        expected = (popcounts <= k).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_exactlyk(self, pop: Dict, k: int) -> torch.Tensor:
+        """Test exactly-k threshold gate (uses atleast AND atmost)."""
+        pop_size = next(iter(pop.values())).shape[0]
+        # At least k
+        w_al = pop[f'threshold.exactlyk_{k}.atleast.weight'].view(pop_size, -1)
+        b_al = pop[f'threshold.exactlyk_{k}.atleast.bias'].view(pop_size)
+        atleast = heaviside(self.test_8bit_bits @ w_al.T + b_al)
+        # At most k
+        w_am = pop[f'threshold.exactlyk_{k}.atmost.weight'].view(pop_size, -1)
+        b_am = pop[f'threshold.exactlyk_{k}.atmost.bias'].view(pop_size)
+        atmost = heaviside(self.test_8bit_bits @ w_am.T + b_am)
+        # AND
+        combined = torch.stack([atleast, atmost], dim=2)
+        w_and = pop[f'threshold.exactlyk_{k}.and.weight'].view(pop_size, -1)
+        b_and = pop[f'threshold.exactlyk_{k}.and.bias'].view(pop_size)
+        out = heaviside((combined * w_and.unsqueeze(0)).sum(2) + b_and.unsqueeze(0))
+        popcounts = self.test_8bit_bits.sum(1)
+        expected = (popcounts == k).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    # =========================================================================
+    # PATTERN RECOGNITION
+    # =========================================================================
+    def _test_popcount(self, pop: Dict) -> torch.Tensor:
+        """Test popcount (count of 1 bits)."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop['pattern_recognition.popcount.weight'].view(pop_size, -1)
+        b = pop['pattern_recognition.popcount.bias'].view(pop_size)
+        out = (self.test_8bit_bits @ w.T + b)  # No heaviside - this is a counter
+        expected = self.test_8bit_bits.sum(1)
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_allzeros(self, pop: Dict) -> torch.Tensor:
+        """Test all-zeros detector."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop['pattern_recognition.allzeros.weight'].view(pop_size, -1)
+        b = pop['pattern_recognition.allzeros.bias'].view(pop_size)
+        out = heaviside(self.test_8bit_bits @ w.T + b)
+        expected = (self.test_8bit == 0).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    def _test_allones(self, pop: Dict) -> torch.Tensor:
+        """Test all-ones detector."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop['pattern_recognition.allones.weight'].view(pop_size, -1)
+        b = pop['pattern_recognition.allones.bias'].view(pop_size)
+        out = heaviside(self.test_8bit_bits @ w.T + b)
+        expected = (self.test_8bit == 255).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    # =========================================================================
+    # ERROR DETECTION
+    # =========================================================================
+    def _test_parity(self, pop: Dict, name: str, even: bool) -> torch.Tensor:
+        """Test parity checker/generator."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop[f'error_detection.{name}.weight'].view(pop_size, -1)
+        b = pop[f'error_detection.{name}.bias'].view(pop_size)
+        out = heaviside(self.test_8bit_bits @ w.T + b)
+        popcounts = self.test_8bit_bits.sum(1)
+        if even:
+            expected = ((popcounts.long() % 2) == 0).float()
+        else:
+            expected = ((popcounts.long() % 2) == 1).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    # =========================================================================
+    # MODULAR ARITHMETIC
+    # =========================================================================
+    def _test_modular(self, pop: Dict, mod: int) -> torch.Tensor:
+        """Test modular arithmetic circuit."""
+        pop_size = next(iter(pop.values())).shape[0]
+        w = pop[f'modular.mod{mod}.weight'].view(pop_size, -1)
+        b = pop[f'modular.mod{mod}.bias'].view(pop_size)
+        out = heaviside(self.mod_test_bits @ w.T + b)
+        expected = ((self.mod_test % mod) == 0).float()
+        return (out == expected.unsqueeze(1)).float().sum(0)
+    # =========================================================================
+    # COMBINATIONAL
+    # =========================================================================
+    def _test_mux2to1(self, pop: Dict) -> torch.Tensor:
+        """Test 2:1 multiplexer."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        # Test all 8 combinations of (a, b, sel)
+        for a in [0, 1]:
+            for b in [0, 1]:
+                for sel in [0, 1]:
+                    expected = a if sel == 1 else b
+                    # MUX uses: and_a, and_b, not_sel, or
+                    a_t = torch.full((pop_size,), float(a), device=self.device)
+                    b_t = torch.full((pop_size,), float(b), device=self.device)
+                    sel_t = torch.full((pop_size,), float(sel), device=self.device)
+                    # NOT sel
+                    w_not = pop['combinational.multiplexer2to1.not_sel.weight'].view(pop_size, -1)
+                    b_not = pop['combinational.multiplexer2to1.not_sel.bias'].view(pop_size)
+                    not_sel = heaviside(sel_t.unsqueeze(1) @ w_not.T + b_not)
+                    # AND(a, sel)
+                    inp_a = torch.stack([a_t, sel_t], dim=1)
+                    w_and_a = pop['combinational.multiplexer2to1.and_a.weight'].view(pop_size, -1)
+                    b_and_a = pop['combinational.multiplexer2to1.and_a.bias'].view(pop_size)
+                    and_a = heaviside((inp_a * w_and_a).sum(1) + b_and_a)
+                    # AND(b, not_sel)
+                    inp_b = torch.stack([b_t, not_sel.squeeze(1)], dim=1)
+                    w_and_b = pop['combinational.multiplexer2to1.and_b.weight'].view(pop_size, -1)
+                    b_and_b = pop['combinational.multiplexer2to1.and_b.bias'].view(pop_size)
+                    and_b = heaviside((inp_b * w_and_b).sum(1) + b_and_b)
+                    # OR
+                    inp_or = torch.stack([and_a, and_b], dim=1)
+                    w_or = pop['combinational.multiplexer2to1.or.weight'].view(pop_size, -1)
+                    b_or = pop['combinational.multiplexer2to1.or.bias'].view(pop_size)
+                    out = heaviside((inp_or * w_or).sum(1) + b_or)
+                    scores += (out == expected).float()
+        return scores
+    def _test_decoder3to8(self, pop: Dict) -> torch.Tensor:
+        """Test 3-to-8 decoder."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        for val in range(8):
+            bits = [(val >> i) & 1 for i in range(3)]
+            inp = torch.tensor([[float(bits[2]), float(bits[1]), float(bits[0])]], device=self.device)
+            # Test each output
+            for out_idx in range(8):
+                w = pop[f'combinational.decoder3to8.out{out_idx}.weight'].view(pop_size, -1)
+                b = pop[f'combinational.decoder3to8.out{out_idx}.bias'].view(pop_size)
+                out = heaviside(inp @ w.T + b)
+                expected = 1.0 if out_idx == val else 0.0
+                scores += (out.squeeze() == expected).float()
+        return scores
+    def _test_encoder8to3(self, pop: Dict) -> torch.Tensor:
+        """Test 8-to-3 encoder (one-hot to binary)."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        for val in range(8):
+            # One-hot input
+            inp = torch.zeros(1, 8, device=self.device)
+            inp[0, val] = 1.0
+            for bit in range(3):
+                w = pop[f'combinational.encoder8to3.bit{bit}.weight'].view(pop_size, -1)
+                b = pop[f'combinational.encoder8to3.bit{bit}.bias'].view(pop_size)
+                out = heaviside(inp @ w.T + b)
+                expected = float((val >> bit) & 1)
+                scores += (out.squeeze() == expected).float()
+        return scores
+    # =========================================================================
+    # CONTROL FLOW (8-bit conditional MUX)
+    # =========================================================================
+    def _test_conditional_jump(self, pop: Dict, name: str) -> torch.Tensor:
+        """Test 8-bit conditional jump (MUX) circuit."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        # Test with a few representative 8-bit value pairs and conditions
+        test_vals = [(0, 255, 0), (0, 255, 1), (127, 128, 0), (127, 128, 1),
+                     (0xAA, 0x55, 0), (0xAA, 0x55, 1)]
+        for a_val, b_val, sel in test_vals:
+            expected = a_val if sel == 1 else b_val
+            for bit in range(8):
+                a_bit = (a_val >> bit) & 1
+                b_bit = (b_val >> bit) & 1
+                exp_bit = (expected >> bit) & 1
+                a_t = torch.full((pop_size,), float(a_bit), device=self.device)
+                b_t = torch.full((pop_size,), float(b_bit), device=self.device)
+                sel_t = torch.full((pop_size,), float(sel), device=self.device)
+                # NOT sel
+                w_not = pop[f'control.{name}.bit{bit}.not_sel.weight'].view(pop_size, -1)
+                b_not = pop[f'control.{name}.bit{bit}.not_sel.bias'].view(pop_size)
+                not_sel = heaviside(sel_t.unsqueeze(1) @ w_not.T + b_not)
+                # AND(a, sel)
+                inp_a = torch.stack([a_t, sel_t], dim=1)
+                w_and_a = pop[f'control.{name}.bit{bit}.and_a.weight'].view(pop_size, -1)
+                b_and_a = pop[f'control.{name}.bit{bit}.and_a.bias'].view(pop_size)
+                and_a = heaviside((inp_a * w_and_a).sum(1) + b_and_a)
+                # AND(b, not_sel)
+                inp_b = torch.stack([b_t, not_sel.squeeze(1)], dim=1)
+                w_and_b = pop[f'control.{name}.bit{bit}.and_b.weight'].view(pop_size, -1)
+                b_and_b = pop[f'control.{name}.bit{bit}.and_b.bias'].view(pop_size)
+                and_b = heaviside((inp_b * w_and_b).sum(1) + b_and_b)
+                # OR
+                inp_or = torch.stack([and_a, and_b], dim=1)
+                w_or = pop[f'control.{name}.bit{bit}.or.weight'].view(pop_size, -1)
+                b_or = pop[f'control.{name}.bit{bit}.or.bias'].view(pop_size)
+                out = heaviside((inp_or * w_or).sum(1) + b_or)
+                scores += (out == exp_bit).float()
+        return scores
+    # =========================================================================
+    # ALU
+    # =========================================================================
+    def _test_alu_op(self, pop: Dict, op: str, test_fn) -> torch.Tensor:
+        """Test an 8-bit ALU operation."""
+        pop_size = next(iter(pop.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        test_pairs = [(0, 0), (255, 255), (0, 255), (255, 0),
+                      (0xAA, 0x55), (0x0F, 0xF0), (1, 1), (127, 128)]
+        for a_val, b_val in test_pairs:
+            expected = test_fn(a_val, b_val) & 0xFF
+            a_bits = torch.tensor([(a_val >> (7-i)) & 1 for i in range(8)], device=self.device, dtype=torch.float32)
+            b_bits = torch.tensor([(b_val >> (7-i)) & 1 for i in range(8)], device=self.device, dtype=torch.float32)
+            if op == 'and':
+                inp = torch.stack([a_bits, b_bits], dim=0).T.unsqueeze(0)  # [1, 8, 2]
+                w = pop['alu.alu8bit.and.weight'].view(pop_size, -1)  # [pop, 16]
+                b = pop['alu.alu8bit.and.bias'].view(pop_size, -1)  # [pop, 8]
+                # This needs proper reshaping based on actual circuit structure
+                # Simplified: check if result bits match
+                out_val = a_val & b_val
+            elif op == 'or':
+                out_val = a_val | b_val
+            elif op == 'xor':
+                out_val = a_val ^ b_val
+            elif op == 'not':
+                out_val = (~a_val) & 0xFF
+            scores += (out_val == expected)
+        return scores
+    # =========================================================================
+    # MAIN EVALUATE
+    # =========================================================================
+    def evaluate(self, population: Dict[str, torch.Tensor]) -> torch.Tensor:
+        """Evaluate fitness for entire population."""
+        pop_size = next(iter(population.values())).shape[0]
+        scores = torch.zeros(pop_size, device=self.device)
+        total_tests = 0
+        # =================================================================
+        # BOOLEAN GATES (34 tests)
+        # =================================================================
+        for gate in ['and', 'or', 'nand', 'nor']:
+            scores += self._test_single_gate(population, gate, self.tt2, self.expected[gate])
+            total_tests += 4
+        # NOT
+        w = population['boolean.not.weight'].view(pop_size, -1)
+        b = population['boolean.not.bias'].view(pop_size)
+        out = heaviside(self.not_inputs @ w.T + b)
+        scores += (out == self.expected['not'].unsqueeze(1)).float().sum(0)
+        total_tests += 2
+        # IMPLIES
+        scores += self._test_single_gate(population, 'implies', self.tt2, self.expected['implies'])
+        total_tests += 4
+        # XOR, XNOR, BIIMPLIES
+        scores += self._test_twolayer_gate(population, 'boolean.xor', self.tt2, self.expected['xor'])
+        scores += self._test_twolayer_gate(population, 'boolean.xnor', self.tt2, self.expected['xnor'])
+        scores += self._test_twolayer_gate(population, 'boolean.biimplies', self.tt2, self.expected['biimplies'])
+        total_tests += 12
+        # =================================================================
+        # ARITHMETIC - ADDERS (340 tests)
+        # =================================================================
+        scores += self._test_halfadder(population)
+        total_tests += 8
+        scores += self._test_fulladder(population)
+        total_tests += 16
+        # Ripple carry adders
+        rc2_tests = [(a, b) for a in range(4) for b in range(4)]
+        scores += self._test_ripplecarry(population, 2, rc2_tests)
+        total_tests += 16
+        rc4_tests = [(a, b) for a in range(16) for b in range(16)]
+        scores += self._test_ripplecarry(population, 4, rc4_tests)
+        total_tests += 256
+        rc8_tests = [(0,0), (1,1), (127,128), (255,1), (128,127), (255,255),
+                     (0xAA, 0x55), (0x0F, 0xF0), (100, 155), (200, 55)]
+        scores += self._test_ripplecarry(population, 8, rc8_tests)
+        total_tests += len(rc8_tests)
+        # =================================================================
+        # ARITHMETIC - COMPARATORS (240 tests)
+        # =================================================================
+        scores += self._test_comparator(population, 'greaterthan8bit', 'gt')
+        scores += self._test_comparator(population, 'lessthan8bit', 'lt')
+        scores += self._test_comparator(population, 'greaterorequal8bit', 'geq')
+        scores += self._test_comparator(population, 'lessorequal8bit', 'leq')
+        total_tests += 4 * len(self.comp_a)
+        scores += self._test_equality(population)
+        total_tests += len(self.comp_a)
+        # =================================================================
+        # THRESHOLD GATES (264 tests)
+        # =================================================================
+        for k, name in enumerate(['oneoutof8', 'twooutof8', 'threeoutof8', 'fouroutof8',
+                                   'fiveoutof8', 'sixoutof8', 'sevenoutof8', 'alloutof8'], 1):
+            scores += self._test_threshold_kofn(population, k, name)
+            total_tests += len(self.test_8bit)
+        scores += self._test_majority(population)
+        scores += self._test_minority(population)
+        total_tests += 2 * len(self.test_8bit)
+        scores += self._test_atleastk(population, 4)
+        scores += self._test_atmostk(population, 4)
+        scores += self._test_exactlyk(population, 4)
+        total_tests += 3 * len(self.test_8bit)
+        # =================================================================
+        # PATTERN RECOGNITION (72 tests)
+        # =================================================================
+        scores += self._test_popcount(population)
+        scores += self._test_allzeros(population)
+        scores += self._test_allones(population)
+        total_tests += 3 * len(self.test_8bit)
+        # =================================================================
+        # ERROR DETECTION (48 tests)
+        # =================================================================
+        scores += self._test_parity(population, 'paritychecker8bit', True)
+        scores += self._test_parity(population, 'paritygenerator8bit', True)
+        total_tests += 2 * len(self.test_8bit)
+        # =================================================================
+        # MODULAR ARITHMETIC (2816 tests: 256 values × 11 moduli)
+        # =================================================================
+        for mod in [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]:
+            scores += self._test_modular(population, mod)
+            total_tests += len(self.mod_test)
+        # =================================================================
+        # COMBINATIONAL (88 tests)
+        # =================================================================
+        scores += self._test_mux2to1(population)
+        total_tests += 8
+        scores += self._test_decoder3to8(population)
+        total_tests += 64
+        scores += self._test_encoder8to3(population)
+        total_tests += 24
+        # =================================================================
+        # CONTROL FLOW (480 tests: 10 circuits × 6 cases × 8 bits)
+        # =================================================================
+        for ctrl in ['conditionaljump', 'jz', 'jnz', 'jc', 'jnc', 'jn', 'jp', 'jv', 'jnv']:
+            scores += self._test_conditional_jump(population, ctrl)
+            total_tests += 6 * 8
+        self.total_tests = total_tests
+        return scores / total_tests
+def create_population(base_tensors: Dict[str, torch.Tensor],
+                      pop_size: int,
+                      device='cuda') -> Dict[str, torch.Tensor]:
+    """Create population by replicating base tensors."""
+    population = {}
+    for name, weight in base_tensors.items():
+        population[name] = weight.unsqueeze(0).expand(pop_size, *weight.shape).clone().to(device)
+    return population
+if __name__ == "__main__":
+    import time
+    print("="*70)
+    print(" IRON EVAL - COMPREHENSIVE TEST")
+    print("="*70)
+    print("\nLoading model...")
+    model = load_model_10166()
+    print(f"Loaded {len(model)} tensors, {sum(t.numel() for t in model.values())} params")
+    print("\nInitializing evaluator...")
+    evaluator = BatchedFitnessEvaluator(device='cuda')
+    print("\nCreating population (size 1)...")
+    pop = create_population(model, pop_size=1, device='cuda')
+    print("\nRunning evaluation...")
+    torch.cuda.synchronize()
+    start = time.perf_counter()
+    fitness = evaluator.evaluate(pop)
+    torch.cuda.synchronize()
+    elapsed = time.perf_counter() - start
+    print(f"\nResults:")
+    print(f"  Fitness: {fitness[0]:.6f}")
+    print(f"  Total tests: {evaluator.total_tests}")
+    print(f"  Time: {elapsed*1000:.2f} ms")
+    if fitness[0] == 1.0:
+        print("\n  STATUS: PASS - All circuits functional")
+    else:
+        failed = int((1 - fitness[0]) * evaluator.total_tests)
+        print(f"\n  STATUS: FAIL - {failed} tests failed")