Add 3-operand adder circuit (arithmetic.add3_8bit)

- build.py: add_full_adder() and add_add3() functions
- build.py: infer_add3_inputs() for routing metadata
- eval.py: _test_add3() with 240 test cases including 15+27+33=75
- Fitness 1.000000, all tests pass

Files changed (4) hide show

README.md +7 -5
build.py +106 -1
eval.py +110 -0
neural_computer.safetensors +2 -2

README.md CHANGED Viewed

@@ -457,15 +457,17 @@ The interface generalizes to **all** 65,536 8-bit additions once trained—no me
 ### Extension Roadmap
-1. **Multi-operand expressions (15 + 27 + 33)** — Accumulator pattern: result = 0; for each operand, result = ADD(result, operand). Router must fire multiple times per input sequence. Requires stateful dispatch or unrolled circuit.
-2. **Order of operations (5 + 3 × 2 = 11)** — Parse expression into tree, evaluate depth-first. MUL before ADD. Requires either: (a) expression parser producing evaluation order, or (b) learned routing that implicitly respects precedence.
-3. **Parenthetical expressions ((5 + 3) × 2 = 16)** — Explicit grouping overrides precedence. Parser must recognize parens and build correct tree. Evaluation proceeds innermost-out. Adds complexity to extraction layer.
-4. **16-bit operations (0-65535)** — Chain two 8-bit circuits with carry propagation. ADD16: low = ADD8(A_lo, B_lo), high = ADD8(A_hi, B_hi, carry_out). MUL16: four partial products + shift-add. Doubles operand extraction width.
-5. **Floating point arithmetic** — IEEE 754-style with separate circuits for mantissa and exponent. ADD: align exponents, add mantissas, renormalize. MUL: add exponents, multiply mantissas. Requires sign handling, overflow detection, and rounding logic.
 ---

 ### Extension Roadmap
+1. **Order of operations (5 + 3 × 2 = 11)** — Parse expression into tree, evaluate depth-first. MUL before ADD. Requires either: (a) expression parser producing evaluation order, or (b) learned routing that implicitly respects precedence.
+2. **Parenthetical expressions ((5 + 3) × 2 = 16)** — Explicit grouping overrides precedence. Parser must recognize parens and build correct tree. Evaluation proceeds innermost-out. Adds complexity to extraction layer.
+3. **16-bit operations (0-65535)** — Chain two 8-bit circuits with carry propagation. ADD16: low = ADD8(A_lo, B_lo), high = ADD8(A_hi, B_hi, carry_out). MUL16: four partial products + shift-add. Doubles operand extraction width.
+4. **Floating point arithmetic** — IEEE 754-style with separate circuits for mantissa and exponent. ADD: align exponents, add mantissas, renormalize. MUL: add exponents, multiply mantissas. Requires sign handling, overflow detection, and rounding logic.
+### Completed Extensions
+- **3-operand addition (15 + 27 + 33 = 75)** — `arithmetic.add3_8bit` chains two 8-bit ripple carry stages. 16 full adders, 144 gates, 240 test cases verified.
 ---

build.py CHANGED Viewed

@@ -235,6 +235,51 @@ def add_fetch_load_store_buffers(tensors: Dict[str, torch.Tensor], addr_bits: in
         add_gate(tensors, f"control.mem_addr.bit{bit}", [1.0], [-1.0])
 def add_shl_shr(tensors: Dict[str, torch.Tensor]) -> None:
     """Add SHL (shift left) and SHR (shift right) circuits.
@@ -604,6 +649,58 @@ def infer_ripplecarry_inputs(gate: str, prefix: str, bits: int, reg: SignalRegis
     return []
 def infer_adcsbc_inputs(gate: str, prefix: str, is_sub: bool, reg: SignalRegistry) -> List[int]:
     for i in range(8):
         reg.register(f"{prefix}.$a[{i}]")
@@ -1080,6 +1177,8 @@ def infer_inputs_for_gate(gate: str, reg: SignalRegistry, tensors: Dict[str, tor
             return infer_ripplecarry_inputs(gate, "arithmetic.ripplecarry4bit", 4, reg)
         if 'ripplecarry8bit' in gate:
             return infer_ripplecarry_inputs(gate, "arithmetic.ripplecarry8bit", 8, reg)
         if 'adc8bit' in gate:
             return infer_adcsbc_inputs(gate, "arithmetic.adc8bit", False, reg)
         if 'sbc8bit' in gate:
@@ -1305,7 +1404,7 @@ def cmd_alu(args) -> None:
         "alu.alu8bit.neg.", "alu.alu8bit.rol.", "alu.alu8bit.ror.",
         "arithmetic.greaterthan8bit.", "arithmetic.lessthan8bit.",
         "arithmetic.greaterorequal8bit.", "arithmetic.lessorequal8bit.",
-        "arithmetic.equality8bit.",
         "control.push.", "control.pop.", "control.ret.",
         "combinational.barrelshifter.", "combinational.priorityencoder.",
     ])
@@ -1370,6 +1469,12 @@ def cmd_alu(args) -> None:
         print("  Added GT, GE, LT, LE (single-layer), EQ (two-layer)")
     except ValueError as e:
         print(f"  Comparators already exist: {e}")
     if args.apply:
         print(f"\nSaving: {args.model}")
         save_file(tensors, str(args.model))

         add_gate(tensors, f"control.mem_addr.bit{bit}", [1.0], [-1.0])
+def add_full_adder(tensors: Dict[str, torch.Tensor], prefix: str) -> None:
+    """Add a single full adder at the given prefix.
+    Full adder structure:
+    - ha1: first half adder (A XOR B for sum, A AND B for carry)
+    - ha2: second half adder (ha1.sum XOR Cin for sum, ha1.sum AND Cin for carry)
+    - carry_or: OR of ha1.carry and ha2.carry for final carry out
+    """
+    # XOR for ha1.sum (2-layer: OR + NAND -> AND)
+    add_gate(tensors, f"{prefix}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
+    add_gate(tensors, f"{prefix}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
+    add_gate(tensors, f"{prefix}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
+    # AND for ha1.carry
+    add_gate(tensors, f"{prefix}.ha1.carry", [1.0, 1.0], [-2.0])
+    # XOR for ha2.sum
+    add_gate(tensors, f"{prefix}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
+    add_gate(tensors, f"{prefix}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
+    add_gate(tensors, f"{prefix}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
+    # AND for ha2.carry
+    add_gate(tensors, f"{prefix}.ha2.carry", [1.0, 1.0], [-2.0])
+    # OR for final carry
+    add_gate(tensors, f"{prefix}.carry_or", [1.0, 1.0], [-1.0])
+def add_add3(tensors: Dict[str, torch.Tensor]) -> None:
+    """Add 3-operand 8-bit adder circuit.
+    Computes A + B + C using two chained ripple-carry stages:
+    - Stage 1: temp = A + B (8 full adders)
+    - Stage 2: result = temp + C (8 full adders)
+    Inputs: $a[0-7], $b[0-7], $c[0-7] (MSB-first)
+    Outputs: stage2.fa0-7.ha2.sum.layer2 (result bits), stage2.fa7.carry_or (overflow)
+    Total: 16 full adders = 144 gates
+    """
+    # Stage 1: A + B -> temp
+    for bit in range(8):
+        add_full_adder(tensors, f"arithmetic.add3_8bit.stage1.fa{bit}")
+    # Stage 2: temp + C -> result
+    for bit in range(8):
+        add_full_adder(tensors, f"arithmetic.add3_8bit.stage2.fa{bit}")
 def add_shl_shr(tensors: Dict[str, torch.Tensor]) -> None:
     """Add SHL (shift left) and SHR (shift right) circuits.
     return []
+def infer_add3_inputs(gate: str, reg: SignalRegistry) -> List[int]:
+    """Infer inputs for 3-operand adder: A + B + C."""
+    prefix = "arithmetic.add3_8bit"
+    # Register all inputs
+    for i in range(8):
+        reg.register(f"$a[{i}]")
+        reg.register(f"$b[{i}]")
+        reg.register(f"$c[{i}]")
+    # Parse stage and bit
+    if '.stage1.' in gate:
+        m = re.search(r'\.fa(\d+)\.', gate)
+        if not m:
+            return []
+        bit = int(m.group(1))
+        # Stage 1: A + B (LSB is index 7 in MSB-first)
+        a_bit = reg.get_id(f"$a[{7-bit}]")
+        b_bit = reg.get_id(f"$b[{7-bit}]")
+        cin = reg.get_id("#0") if bit == 0 else reg.register(f"{prefix}.stage1.fa{bit-1}.carry_or")
+        fa_prefix = f"{prefix}.stage1.fa{bit}"
+    elif '.stage2.' in gate:
+        m = re.search(r'\.fa(\d+)\.', gate)
+        if not m:
+            return []
+        bit = int(m.group(1))
+        # Stage 2: stage1_result + C
+        temp_bit = reg.register(f"{prefix}.stage1.fa{bit}.ha2.sum.layer2")
+        c_bit = reg.get_id(f"$c[{7-bit}]")
+        cin = reg.get_id("#0") if bit == 0 else reg.register(f"{prefix}.stage2.fa{bit-1}.carry_or")
+        a_bit = temp_bit
+        b_bit = c_bit
+        fa_prefix = f"{prefix}.stage2.fa{bit}"
+    else:
+        return []
+    if '.ha1.sum.layer1' in gate:
+        return [a_bit, b_bit]
+    if '.ha1.sum.layer2' in gate:
+        return [reg.register(f"{fa_prefix}.ha1.sum.layer1.or"), reg.register(f"{fa_prefix}.ha1.sum.layer1.nand")]
+    if '.ha1.carry' in gate and '.layer' not in gate:
+        return [a_bit, b_bit]
+    if '.ha2.sum.layer1' in gate:
+        return [reg.register(f"{fa_prefix}.ha1.sum.layer2"), cin]
+    if '.ha2.sum.layer2' in gate:
+        return [reg.register(f"{fa_prefix}.ha2.sum.layer1.or"), reg.register(f"{fa_prefix}.ha2.sum.layer1.nand")]
+    if '.ha2.carry' in gate and '.layer' not in gate:
+        return [reg.register(f"{fa_prefix}.ha1.sum.layer2"), cin]
+    if '.carry_or' in gate:
+        return [reg.register(f"{fa_prefix}.ha1.carry"), reg.register(f"{fa_prefix}.ha2.carry")]
+    return []
 def infer_adcsbc_inputs(gate: str, prefix: str, is_sub: bool, reg: SignalRegistry) -> List[int]:
     for i in range(8):
         reg.register(f"{prefix}.$a[{i}]")
             return infer_ripplecarry_inputs(gate, "arithmetic.ripplecarry4bit", 4, reg)
         if 'ripplecarry8bit' in gate:
             return infer_ripplecarry_inputs(gate, "arithmetic.ripplecarry8bit", 8, reg)
+        if 'add3_8bit' in gate:
+            return infer_add3_inputs(gate, reg)
         if 'adc8bit' in gate:
             return infer_adcsbc_inputs(gate, "arithmetic.adc8bit", False, reg)
         if 'sbc8bit' in gate:
         "alu.alu8bit.neg.", "alu.alu8bit.rol.", "alu.alu8bit.ror.",
         "arithmetic.greaterthan8bit.", "arithmetic.lessthan8bit.",
         "arithmetic.greaterorequal8bit.", "arithmetic.lessorequal8bit.",
+        "arithmetic.equality8bit.", "arithmetic.add3_8bit.",
         "control.push.", "control.pop.", "control.ret.",
         "combinational.barrelshifter.", "combinational.priorityencoder.",
     ])
         print("  Added GT, GE, LT, LE (single-layer), EQ (two-layer)")
     except ValueError as e:
         print(f"  Comparators already exist: {e}")
+    print("\nGenerating 3-operand adder circuit...")
+    try:
+        add_add3(tensors)
+        print("  Added ADD3 (16 full adders = 144 gates)")
+    except ValueError as e:
+        print(f"  ADD3 already exists: {e}")
     if args.apply:
         print(f"\nSaving: {args.model}")
         save_file(tensors, str(args.model))

eval.py CHANGED Viewed

@@ -527,6 +527,110 @@ class BatchedFitnessEvaluator:
         return correct, num_tests
     # =========================================================================
     # COMPARATORS
     # =========================================================================
@@ -2340,6 +2444,12 @@ class BatchedFitnessEvaluator:
             total_tests += t
             self.category_scores[f'ripplecarry{bits}'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
         # Comparators
         s, t = self._test_comparators(population, debug)
         scores += s

         return correct, num_tests
+    # =========================================================================
+    # 3-OPERAND ADDER
+    # =========================================================================
+    def _test_add3(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
+        """Test 3-operand 8-bit adder (A + B + C)."""
+        pop_size = next(iter(pop.values())).shape[0]
+        if debug:
+            print(f"\n=== 3-OPERAND ADDER ===")
+        prefix = 'arithmetic.add3_8bit'
+        bits = 8
+        # Strategic test cases for 3-operand addition
+        # Include edge cases and overflow scenarios
+        test_cases = []
+        # Small values
+        for a in [0, 1, 2]:
+            for b in [0, 1, 2]:
+                for c in [0, 1, 2]:
+                    test_cases.append((a, b, c))
+        # Edge values
+        edge = [0, 1, 127, 128, 254, 255]
+        for a in edge:
+            for b in edge:
+                for c in edge:
+                    test_cases.append((a, b, c))
+        # Specific multi-operand expression tests
+        test_cases.extend([
+            (15, 27, 33),   # Example from roadmap: 15 + 27 + 33 = 75
+            (100, 100, 55), # = 255 (exact fit)
+            (100, 100, 56), # = 256 -> 0 (overflow)
+            (85, 85, 85),   # = 255 (exact fit)
+            (86, 85, 85),   # = 256 -> 0 (overflow)
+        ])
+        test_cases = list(set(test_cases))
+        a_vals = torch.tensor([t[0] for t in test_cases], device=self.device)
+        b_vals = torch.tensor([t[1] for t in test_cases], device=self.device)
+        c_vals = torch.tensor([t[2] for t in test_cases], device=self.device)
+        num_tests = len(test_cases)
+        # Convert to bits [num_tests, bits] MSB-first
+        a_bits = torch.stack([((a_vals >> (bits - 1 - i)) & 1).float() for i in range(bits)], dim=1)
+        b_bits = torch.stack([((b_vals >> (bits - 1 - i)) & 1).float() for i in range(bits)], dim=1)
+        c_bits = torch.stack([((c_vals >> (bits - 1 - i)) & 1).float() for i in range(bits)], dim=1)
+        # Stage 1: A + B
+        carry1 = torch.zeros(num_tests, pop_size, device=self.device)
+        stage1_bits = []
+        for bit in range(bits):
+            bit_idx = bits - 1 - bit  # LSB first
+            s, carry1 = self._eval_single_fa(
+                pop, f'{prefix}.stage1.fa{bit}',
+                a_bits[:, bit_idx].unsqueeze(1).expand(-1, pop_size),
+                b_bits[:, bit_idx].unsqueeze(1).expand(-1, pop_size),
+                carry1
+            )
+            stage1_bits.append(s)
+        # Stage 2: stage1_result + C
+        carry2 = torch.zeros(num_tests, pop_size, device=self.device)
+        result_bits = []
+        for bit in range(bits):
+            bit_idx = bits - 1 - bit  # LSB first
+            s, carry2 = self._eval_single_fa(
+                pop, f'{prefix}.stage2.fa{bit}',
+                stage1_bits[bit],  # Already [num_tests, pop_size]
+                c_bits[:, bit_idx].unsqueeze(1).expand(-1, pop_size),
+                carry2
+            )
+            result_bits.append(s)
+        # Reconstruct result (bits are in LSB-first order, need to reverse for MSB-first)
+        result_bits = torch.stack(result_bits[::-1], dim=-1)  # MSB first
+        result = torch.zeros(num_tests, pop_size, device=self.device)
+        for i in range(bits):
+            result += result_bits[:, :, i] * (1 << (bits - 1 - i))
+        # Expected (8-bit wrap)
+        expected = ((a_vals + b_vals + c_vals) & 0xFF).unsqueeze(1).expand(-1, pop_size).float()
+        correct = (result == expected).float().sum(0)
+        failures = []
+        if pop_size == 1:
+            for i in range(min(num_tests, 100)):
+                if result[i, 0].item() != expected[i, 0].item():
+                    failures.append((
+                        [int(a_vals[i].item()), int(b_vals[i].item()), int(c_vals[i].item())],
+                        int(expected[i, 0].item()),
+                        int(result[i, 0].item())
+                    ))
+        self._record(prefix, int(correct[0].item()), num_tests, failures)
+        if debug:
+            r = self.results[-1]
+            print(f"  {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
+            if failures:
+                for inp, exp, got in failures[:5]:
+                    print(f"    FAIL: {inp[0]} + {inp[1]} + {inp[2]} = {exp}, got {got}")
+        return correct, num_tests
     # =========================================================================
     # COMPARATORS
     # =========================================================================
             total_tests += t
             self.category_scores[f'ripplecarry{bits}'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
+        # 3-operand adder
+        s, t = self._test_add3(population, debug)
+        scores += s
+        total_tests += t
+        self.category_scores['add3'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
         # Comparators
         s, t = self._test_comparators(population, debug)
         scores += s

neural_computer.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8f97c127018647da3a788ee40cbe498ee583d2031bbec04e9347894b1fb5c19
-size 34491396

 version https://git-lfs.github.com/spec/v1
+oid sha256:270309b1ac57e808827cee555b6f6f9e3f14c37abe23fa21069db4ff251a0b72
+size 34552948