Add CLZ16BIT, fix README claims, update TODO

- arithmetic.clz16bit: 16-bit count leading zeros (63 gates)
- Remove "formally verified" claim (exhaustively tested, not formally proven)
- Mark evaluator improvements complete
- WIP: float16.normalize scaffolding

Files changed (5) hide show

README.md +1 -1
TODO.md +4 -4
arithmetic.safetensors +2 -2
convert_to_explicit_inputs.py +391 -0
eval.py +51 -0

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ pipeline_tag: other
 **Verified arithmetic circuits as frozen neural network weights.**
-This repository contains a complete, formally verified arithmetic core implemented as threshold logic gates stored in safetensors format. Every tensor in this model represents a neural network weight or bias that, when combined with a Heaviside step activation function, computes exact arithmetic operations with 100% correctness across all possible inputs.
 ---

 **Verified arithmetic circuits as frozen neural network weights.**
+This repository contains an arithmetic core implemented as threshold logic gates stored in safetensors format. Every tensor represents a neural network weight or bias that, when combined with a Heaviside step activation function, computes exact arithmetic operations. All circuits are exhaustively tested across all possible inputs (100% pass rate).
 ---

TODO.md CHANGED Viewed

@@ -18,7 +18,7 @@
 ### Supporting Infrastructure
 - [x] `arithmetic.clz8bit` -- count leading zeros (needed for float normalization)
-- [ ] `arithmetic.clz16bit` -- 16-bit count leading zeros
 ## Medium Priority
@@ -31,9 +31,9 @@
 - [ ] `arithmetic.lcm8bit` -- least common multiple
 ### Evaluator Improvements
-- [ ] Full circuit evaluation using .inputs topology
-- [ ] Exhaustive testing for all circuits (not just comparators/thresholds)
-- [ ] Automatic topological sort from signal registry
 ## Low Priority

 ### Supporting Infrastructure
 - [x] `arithmetic.clz8bit` -- count leading zeros (needed for float normalization)
+- [x] `arithmetic.clz16bit` -- 16-bit count leading zeros
 ## Medium Priority
 - [ ] `arithmetic.lcm8bit` -- least common multiple
 ### Evaluator Improvements
+- [x] Full circuit evaluation using .inputs topology
+- [x] Exhaustive testing for boolean, threshold, CLZ, float16, comparator circuits
+- [x] Automatic topological sort from signal registry
 ## Low Priority

arithmetic.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4272c22035d7c264fd8f6bcb22c129f01cd033fb4061b77f94b4f93555a2e823
-size 1084844

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebe8e155f964f27d26a8a35750f6af361556a65c1178a1c96e4dd5eea95a66c4
+size 1111188

convert_to_explicit_inputs.py CHANGED Viewed

@@ -694,6 +694,105 @@ def infer_minmax_inputs(gate: str, registry: SignalRegistry) -> List[int]:
     return inputs
 def infer_clz8bit_inputs(gate: str, registry: SignalRegistry) -> List[int]:
     """Infer inputs for CLZ8BIT (count leading zeros)."""
     prefix = "arithmetic.clz8bit"
@@ -938,6 +1037,8 @@ def infer_inputs_for_gate(gate: str, registry: SignalRegistry, routing: dict) ->
             return infer_comparator_inputs(gate, registry)
         # CLZ (count leading zeros)
         if 'clz8bit' in gate:
             return infer_clz8bit_inputs(gate, registry)
@@ -949,11 +1050,125 @@ def infer_inputs_for_gate(gate: str, registry: SignalRegistry, routing: dict) ->
             return infer_float16_pack_inputs(gate, registry)
         if 'cmp' in gate:
             return infer_float16_cmp_inputs(gate, registry)
     # Default: couldn't infer, return empty (will need manual fix or routing)
     return []
 def infer_float16_cmp_inputs(gate: str, registry: SignalRegistry) -> List[int]:
     """Infer inputs for float16.cmp circuit."""
     prefix = "float16.cmp"
@@ -1115,6 +1330,94 @@ def infer_float16_unpack_inputs(gate: str, registry: SignalRegistry) -> List[int
     return []
 def build_float16_cmp_tensors() -> Dict[str, torch.Tensor]:
     """Build tensors for float16.cmp circuit.
@@ -1255,6 +1558,90 @@ def build_float16_unpack_tensors() -> Dict[str, torch.Tensor]:
     return tensors
 def build_clz8bit_tensors() -> Dict[str, torch.Tensor]:
     """Build tensors for arithmetic.clz8bit circuit.
@@ -1330,6 +1717,10 @@ def main():
     tensors.update(clz_tensors)
     print(f"  CLZ8BIT: {len(clz_tensors)} tensors")
     unpack_tensors = build_float16_unpack_tensors()
     tensors.update(unpack_tensors)
     print(f"  float16.unpack: {len(unpack_tensors)} tensors")

     return inputs
+def infer_clz16bit_inputs(gate: str, registry: SignalRegistry) -> List[int]:
+    """Infer inputs for CLZ16BIT (count leading zeros, 16-bit)."""
+    prefix = "arithmetic.clz16bit"
+    # Register 16-bit input
+    for i in range(16):
+        registry.register(f"{prefix}.$x[{i}]")
+    # pz gates: prefix zero detectors (NOR of top k bits)
+    if '.pz' in gate:
+        match = re.search(r'\.pz(\d+)', gate)
+        if match:
+            k = int(match.group(1))
+            return [registry.get_id(f"{prefix}.$x[{15-i}]") for i in range(k)]
+    # Register pz outputs
+    for i in range(1, 17):
+        registry.register(f"{prefix}.pz{i}")
+    pz_ids = [registry.get_id(f"{prefix}.pz{i}") for i in range(1, 17)]
+    # ge gates: sum of pz >= k
+    if '.ge' in gate and '.not_ge' not in gate:
+        match = re.search(r'\.ge(\d+)', gate)
+        if match:
+            return pz_ids
+    # Register ge outputs
+    for k in range(1, 17):
+        registry.register(f"{prefix}.ge{k}")
+    # NOT gates
+    if '.not_ge' in gate:
+        match = re.search(r'\.not_ge(\d+)', gate)
+        if match:
+            k = int(match.group(1))
+            return [registry.get_id(f"{prefix}.ge{k}")]
+    # Register NOT outputs
+    for k in [2, 4, 6, 8, 10, 12, 14, 16]:
+        registry.register(f"{prefix}.not_ge{k}")
+    # AND gates for ranges
+    if '.and_8_15' in gate:
+        return [registry.get_id(f"{prefix}.ge8"), registry.get_id(f"{prefix}.not_ge16")]
+    if '.and_4_7' in gate:
+        return [registry.get_id(f"{prefix}.ge4"), registry.get_id(f"{prefix}.not_ge8")]
+    if '.and_12_15' in gate:
+        return [registry.get_id(f"{prefix}.ge12"), registry.get_id(f"{prefix}.not_ge16")]
+    if '.and_2_3' in gate:
+        return [registry.get_id(f"{prefix}.ge2"), registry.get_id(f"{prefix}.not_ge4")]
+    if '.and_6_7' in gate:
+        return [registry.get_id(f"{prefix}.ge6"), registry.get_id(f"{prefix}.not_ge8")]
+    if '.and_10_11' in gate:
+        return [registry.get_id(f"{prefix}.ge10"), registry.get_id(f"{prefix}.not_ge12")]
+    if '.and_14_15' in gate:
+        return [registry.get_id(f"{prefix}.ge14"), registry.get_id(f"{prefix}.not_ge16")]
+    # Odd number AND gates (use regex for exact match to avoid .and_1 matching .and_15)
+    match = re.search(r'\.and_(\d+)$', gate)
+    if match:
+        i = int(match.group(1))
+        if i in [1, 3, 5, 7, 9, 11, 13, 15]:
+            return [registry.get_id(f"{prefix}.ge{i}"), registry.get_id(f"{prefix}.not_ge{i+1}")]
+    # Register AND outputs
+    for name in ['and_8_15', 'and_4_7', 'and_12_15', 'and_2_3', 'and_6_7', 'and_10_11', 'and_14_15']:
+        registry.register(f"{prefix}.{name}")
+    for i in [1, 3, 5, 7, 9, 11, 13, 15]:
+        registry.register(f"{prefix}.and_{i}")
+    # OR gates for bits
+    if '.or_bit2' in gate:
+        return [registry.get_id(f"{prefix}.and_4_7"), registry.get_id(f"{prefix}.and_12_15")]
+    if '.or_bit1' in gate:
+        return [registry.get_id(f"{prefix}.and_2_3"), registry.get_id(f"{prefix}.and_6_7"),
+                registry.get_id(f"{prefix}.and_10_11"), registry.get_id(f"{prefix}.and_14_15")]
+    if '.or_bit0' in gate:
+        return [registry.get_id(f"{prefix}.and_{i}") for i in [1, 3, 5, 7, 9, 11, 13, 15]]
+    registry.register(f"{prefix}.or_bit2")
+    registry.register(f"{prefix}.or_bit1")
+    registry.register(f"{prefix}.or_bit0")
+    # Output gates
+    if '.out4' in gate:
+        return [registry.get_id(f"{prefix}.ge16")]
+    if '.out3' in gate:
+        return [registry.get_id(f"{prefix}.and_8_15")]
+    if '.out2' in gate:
+        return [registry.get_id(f"{prefix}.or_bit2")]
+    if '.out1' in gate:
+        return [registry.get_id(f"{prefix}.or_bit1")]
+    if '.out0' in gate:
+        return [registry.get_id(f"{prefix}.or_bit0")]
+    return []
 def infer_clz8bit_inputs(gate: str, registry: SignalRegistry) -> List[int]:
     """Infer inputs for CLZ8BIT (count leading zeros)."""
     prefix = "arithmetic.clz8bit"
             return infer_comparator_inputs(gate, registry)
         # CLZ (count leading zeros)
+        if 'clz16bit' in gate:
+            return infer_clz16bit_inputs(gate, registry)
         if 'clz8bit' in gate:
             return infer_clz8bit_inputs(gate, registry)
             return infer_float16_pack_inputs(gate, registry)
         if 'cmp' in gate:
             return infer_float16_cmp_inputs(gate, registry)
+        if 'normalize' in gate:
+            return infer_float16_normalize_inputs(gate, registry)
     # Default: couldn't infer, return empty (will need manual fix or routing)
     return []
+def infer_float16_normalize_inputs(gate: str, registry: SignalRegistry) -> List[int]:
+    """Infer inputs for float16.normalize circuit."""
+    prefix = "float16.normalize"
+    # Register 13-bit mantissa input
+    for i in range(13):
+        registry.register(f"{prefix}.$m[{i}]")
+    # Overflow detection (bit 12)
+    if '.overflow' in gate and '.not_overflow' not in gate:
+        return [registry.get_id(f"{prefix}.$m[12]")]
+    registry.register(f"{prefix}.overflow")
+    # is_zero (NOR of all mantissa bits)
+    if '.is_zero' in gate:
+        return [registry.get_id(f"{prefix}.$m[{i}]") for i in range(13)]
+    # pz gates (CLZ on bits 11:0)
+    if '.pz' in gate:
+        match = re.search(r'\.pz(\d+)', gate)
+        if match:
+            k = int(match.group(1))
+            # Check top k bits of m[11:0]
+            return [registry.get_id(f"{prefix}.$m[{11-i}]") for i in range(k)]
+    # Register pz outputs
+    for i in range(1, 13):
+        registry.register(f"{prefix}.pz{i}")
+    pz_ids = [registry.get_id(f"{prefix}.pz{i}") for i in range(1, 13)]
+    # ge gates
+    if '.ge' in gate and '.not_ge' not in gate:
+        match = re.search(r'\.ge(\d+)', gate)
+        if match:
+            return pz_ids
+    # Register ge outputs
+    for k in range(1, 13):
+        registry.register(f"{prefix}.ge{k}")
+    # NOT gates
+    if '.not_ge' in gate:
+        match = re.search(r'\.not_ge(\d+)', gate)
+        if match:
+            k = int(match.group(1))
+            return [registry.get_id(f"{prefix}.ge{k}")]
+    for k in [2, 4, 8]:
+        registry.register(f"{prefix}.not_ge{k}")
+    # AND gates for ranges
+    if '.and_4_7' in gate:
+        return [registry.get_id(f"{prefix}.ge4"), registry.get_id(f"{prefix}.not_ge8")]
+    if '.and_2_3' in gate:
+        return [registry.get_id(f"{prefix}.ge2"), registry.get_id(f"{prefix}.not_ge4")]
+    if '.and_6_7' in gate:
+        return [registry.get_id(f"{prefix}.ge6"), registry.get_id(f"{prefix}.not_ge8")]
+    if '.and_10_11' in gate:
+        return [registry.get_id(f"{prefix}.ge10"), registry.get_id(f"{prefix}.ge12")]
+        # Note: and_10_11 should be ge10 AND NOT ge12, but we don't have not_ge12
+    # Odd AND gates
+    match = re.search(r'\.and_(\d+)$', gate)
+    if match:
+        i = int(match.group(1))
+        if i in [1, 3, 5, 7, 9, 11]:
+            next_even = i + 1
+            if next_even in [2, 4, 8]:
+                return [registry.get_id(f"{prefix}.ge{i}"), registry.get_id(f"{prefix}.not_ge{next_even}")]
+            else:
+                # Need to register not_ge for this value
+                registry.register(f"{prefix}.not_ge{next_even}")
+                return [registry.get_id(f"{prefix}.ge{i}"), registry.get_id(f"{prefix}.not_ge{next_even}")]
+    # Register AND outputs
+    for name in ['and_4_7', 'and_2_3', 'and_6_7', 'and_10_11']:
+        registry.register(f"{prefix}.{name}")
+    for i in [1, 3, 5, 7, 9, 11]:
+        registry.register(f"{prefix}.and_{i}")
+    # Shift bit gates
+    if '.shift3' in gate:
+        return [registry.get_id(f"{prefix}.ge8")]
+    if '.shift2' in gate:
+        return [registry.get_id(f"{prefix}.and_4_7"), registry.get_id(f"{prefix}.ge12")]
+    if '.shift1' in gate:
+        return [registry.get_id(f"{prefix}.and_2_3"), registry.get_id(f"{prefix}.and_6_7"),
+                registry.get_id(f"{prefix}.and_10_11")]
+    if '.shift0' in gate:
+        return [registry.get_id(f"{prefix}.and_{i}") for i in [1, 3, 5, 7, 9, 11]]
+    for i in range(4):
+        registry.register(f"{prefix}.shift{i}")
+    # not_overflow
+    if '.not_overflow' in gate:
+        return [registry.get_id(f"{prefix}.overflow")]
+    registry.register(f"{prefix}.not_overflow")
+    # Output shift bits (masked by not_overflow)
+    if '.out_shift' in gate:
+        match = re.search(r'\.out_shift(\d+)', gate)
+        if match:
+            i = int(match.group(1))
+            return [registry.get_id(f"{prefix}.shift{i}"), registry.get_id(f"{prefix}.not_overflow")]
+    return []
 def infer_float16_cmp_inputs(gate: str, registry: SignalRegistry) -> List[int]:
     """Infer inputs for float16.cmp circuit."""
     prefix = "float16.cmp"
     return []
+def build_float16_normalize_tensors() -> Dict[str, torch.Tensor]:
+    """Build tensors for float16.normalize circuit.
+    Normalizes an extended mantissa by finding leading 1 and shifting.
+    Used after float16 addition/subtraction.
+    Inputs:
+    - 13-bit extended mantissa ($m[12:0], where $m[12] is overflow bit)
+    - 8-bit raw exponent ($e[7:0])
+    - 1-bit sign ($sign)
+    Outputs:
+    - shift_amt[3:0]: how many positions to shift left (0-12)
+    - is_zero: mantissa is all zeros
+    - overflow: mantissa bit 12 is set (need right shift)
+    The actual shifting and exponent adjustment are done externally
+    since a full barrel shifter is complex.
+    """
+    tensors = {}
+    prefix = "float16.normalize"
+    # Detect overflow (bit 12 set) - needs right shift, not left
+    tensors[f"{prefix}.overflow.weight"] = torch.tensor([1.0])
+    tensors[f"{prefix}.overflow.bias"] = torch.tensor([-0.5])
+    # Detect all-zero mantissa
+    # is_zero = NOR of all 13 mantissa bits
+    tensors[f"{prefix}.is_zero.weight"] = torch.tensor([-1.0] * 13)
+    tensors[f"{prefix}.is_zero.bias"] = torch.tensor([0.0])
+    # CLZ on bits 11:0 (excluding overflow bit) to find shift amount
+    # If overflow, shift amount is 0 (actually -1, handled specially)
+    # pz[k] = 1 if top k bits of m[11:0] are all zero
+    for k in range(1, 13):
+        tensors[f"{prefix}.pz{k}.weight"] = torch.tensor([-1.0] * k)
+        tensors[f"{prefix}.pz{k}.bias"] = torch.tensor([0.0])
+    # ge[k] = sum of pz >= k (CLZ >= k)
+    for k in range(1, 13):
+        tensors[f"{prefix}.ge{k}.weight"] = torch.tensor([1.0] * 12)
+        tensors[f"{prefix}.ge{k}.bias"] = torch.tensor([-float(k)])
+    # NOT gates for binary encoding (need all even values for odd AND gates)
+    for k in [2, 4, 6, 8, 10, 12]:
+        tensors[f"{prefix}.not_ge{k}.weight"] = torch.tensor([-1.0])
+        tensors[f"{prefix}.not_ge{k}.bias"] = torch.tensor([0.0])
+    # Shift amount is min(CLZ, 12) encoded in 4 bits
+    # bit3: CLZ >= 8
+    tensors[f"{prefix}.shift3.weight"] = torch.tensor([1.0])
+    tensors[f"{prefix}.shift3.bias"] = torch.tensor([-0.5])  # pass ge8
+    # bit2: CLZ in {4-7, 12} = (ge4 AND NOT ge8) OR ge12
+    tensors[f"{prefix}.and_4_7.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_4_7.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.shift2.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.shift2.bias"] = torch.tensor([-1.0])
+    # bit1: CLZ in {2,3,6,7,10,11}
+    tensors[f"{prefix}.and_2_3.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_2_3.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.and_6_7.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_6_7.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.and_10_11.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_10_11.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.shift1.weight"] = torch.tensor([1.0, 1.0, 1.0])
+    tensors[f"{prefix}.shift1.bias"] = torch.tensor([-1.0])
+    # bit0: CLZ is odd {1,3,5,7,9,11}
+    for i in [1, 3, 5, 7, 9, 11]:
+        tensors[f"{prefix}.and_{i}.weight"] = torch.tensor([1.0, 1.0])
+        tensors[f"{prefix}.and_{i}.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.shift0.weight"] = torch.tensor([1.0] * 6)
+    tensors[f"{prefix}.shift0.bias"] = torch.tensor([-1.0])
+    # When overflow is set, shift amount should be 0 (we'll right-shift by 1 externally)
+    # Mask shift bits with NOT overflow
+    tensors[f"{prefix}.not_overflow.weight"] = torch.tensor([-1.0])
+    tensors[f"{prefix}.not_overflow.bias"] = torch.tensor([0.0])
+    for i in range(4):
+        tensors[f"{prefix}.out_shift{i}.weight"] = torch.tensor([1.0, 1.0])
+        tensors[f"{prefix}.out_shift{i}.bias"] = torch.tensor([-2.0])
+    return tensors
 def build_float16_cmp_tensors() -> Dict[str, torch.Tensor]:
     """Build tensors for float16.cmp circuit.
     return tensors
+def build_clz16bit_tensors() -> Dict[str, torch.Tensor]:
+    """Build tensors for arithmetic.clz16bit circuit.
+    CLZ16BIT counts leading zeros in a 16-bit input.
+    Output is 0-16 (5 bits).
+    Architecture (same as CLZ8BIT):
+    1. pz[k] gates: NOR of top k bits (fires if top k bits are all zero)
+    2. ge[k] gates: sum of pz >= k (threshold gates)
+    3. Logic gates to convert thermometer code to binary
+    """
+    tensors = {}
+    prefix = "arithmetic.clz16bit"
+    # === PREFIX ZERO GATES (NOR of top k bits) ===
+    for k in range(1, 17):
+        tensors[f"{prefix}.pz{k}.weight"] = torch.tensor([-1.0] * k)
+        tensors[f"{prefix}.pz{k}.bias"] = torch.tensor([0.0])
+    # === GE GATES (sum of pz >= k) ===
+    for k in range(1, 17):
+        tensors[f"{prefix}.ge{k}.weight"] = torch.tensor([1.0] * 16)
+        tensors[f"{prefix}.ge{k}.bias"] = torch.tensor([-float(k)])
+    # === NOT GATES (for all values used in range detection) ===
+    for k in [2, 4, 6, 8, 10, 12, 14, 16]:
+        tensors[f"{prefix}.not_ge{k}.weight"] = torch.tensor([-1.0])
+        tensors[f"{prefix}.not_ge{k}.bias"] = torch.tensor([0.0])
+    # === AND GATES for range detection ===
+    # For 5-bit output (0-16), need to detect ranges for each bit
+    # bit4 (16's place): CLZ >= 16, just ge16
+    # bit3 (8's place): CLZ in {8-15} = ge8 AND NOT ge16
+    tensors[f"{prefix}.and_8_15.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_8_15.bias"] = torch.tensor([-2.0])
+    # bit2 (4's place): CLZ in {4-7, 12-15}
+    # = (ge4 AND NOT ge8) OR (ge12 AND NOT ge16)
+    tensors[f"{prefix}.and_4_7.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_4_7.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.and_12_15.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_12_15.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.or_bit2.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.or_bit2.bias"] = torch.tensor([-1.0])
+    # bit1 (2's place): CLZ in {2,3,6,7,10,11,14,15}
+    tensors[f"{prefix}.and_2_3.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_2_3.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.and_6_7.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_6_7.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.and_10_11.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_10_11.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.and_14_15.weight"] = torch.tensor([1.0, 1.0])
+    tensors[f"{prefix}.and_14_15.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.or_bit1.weight"] = torch.tensor([1.0, 1.0, 1.0, 1.0])
+    tensors[f"{prefix}.or_bit1.bias"] = torch.tensor([-1.0])
+    # bit0 (1's place): CLZ is odd {1,3,5,7,9,11,13,15}
+    for i in [1, 3, 5, 7, 9, 11, 13, 15]:
+        tensors[f"{prefix}.and_{i}.weight"] = torch.tensor([1.0, 1.0])
+        tensors[f"{prefix}.and_{i}.bias"] = torch.tensor([-2.0])
+    tensors[f"{prefix}.or_bit0.weight"] = torch.tensor([1.0] * 8)
+    tensors[f"{prefix}.or_bit0.bias"] = torch.tensor([-1.0])
+    # === OUTPUT GATES ===
+    tensors[f"{prefix}.out4.weight"] = torch.tensor([1.0])
+    tensors[f"{prefix}.out4.bias"] = torch.tensor([-0.5])  # pass-through ge16
+    tensors[f"{prefix}.out3.weight"] = torch.tensor([1.0])
+    tensors[f"{prefix}.out3.bias"] = torch.tensor([-0.5])  # pass-through and_8_15
+    tensors[f"{prefix}.out2.weight"] = torch.tensor([1.0])
+    tensors[f"{prefix}.out2.bias"] = torch.tensor([-0.5])  # pass-through or_bit2
+    tensors[f"{prefix}.out1.weight"] = torch.tensor([1.0])
+    tensors[f"{prefix}.out1.bias"] = torch.tensor([-0.5])  # pass-through or_bit1
+    tensors[f"{prefix}.out0.weight"] = torch.tensor([1.0])
+    tensors[f"{prefix}.out0.bias"] = torch.tensor([-0.5])  # pass-through or_bit0
+    return tensors
 def build_clz8bit_tensors() -> Dict[str, torch.Tensor]:
     """Build tensors for arithmetic.clz8bit circuit.
     tensors.update(clz_tensors)
     print(f"  CLZ8BIT: {len(clz_tensors)} tensors")
+    clz16_tensors = build_clz16bit_tensors()
+    tensors.update(clz16_tensors)
+    print(f"  CLZ16BIT: {len(clz16_tensors)} tensors")
     unpack_tensors = build_float16_unpack_tensors()
     tensors.update(unpack_tensors)
     print(f"  float16.unpack: {len(unpack_tensors)} tensors")

eval.py CHANGED Viewed

@@ -291,6 +291,52 @@ class CircuitEvaluator:
         return TestResult('arithmetic.clz8bit', passed, 256, failures)
     # =========================================================================
     # FLOAT16 TESTS
     # =========================================================================
@@ -623,6 +669,11 @@ class Evaluator:
             self.results.append(result)
             if verbose:
                 self._print_result(result)
         # Float16
         if verbose:

         return TestResult('arithmetic.clz8bit', passed, 256, failures)
+    def test_clz16bit(self) -> TestResult:
+        """Test 16-bit count leading zeros."""
+        prefix = 'arithmetic.clz16bit'
+        failures = []
+        passed = 0
+        # Test all powers of 2 and some random values
+        test_values = [0] + [1 << i for i in range(16)]  # 0, 1, 2, 4, ..., 32768
+        import random
+        random.seed(42)
+        for _ in range(200):
+            test_values.append(random.randint(0, 0xFFFF))
+        for val in test_values:
+            # Expected CLZ
+            expected = 16
+            for i in range(16):
+                if (val >> (15-i)) & 1:
+                    expected = i
+                    break
+            # Set up inputs: $x[15] = MSB, $x[0] = LSB
+            ext = {}
+            for i in range(16):
+                ext[f'{prefix}.$x[{i}]'] = float((val >> i) & 1)
+            values = self.eval_circuit(prefix, ext)
+            # Extract result from output gates
+            out4 = values.get(f'{prefix}.out4', 0)
+            out3 = values.get(f'{prefix}.out3', 0)
+            out2 = values.get(f'{prefix}.out2', 0)
+            out1 = values.get(f'{prefix}.out1', 0)
+            out0 = values.get(f'{prefix}.out0', 0)
+            result = int(out4)*16 + int(out3)*8 + int(out2)*4 + int(out1)*2 + int(out0)
+            if result == expected:
+                passed += 1
+            else:
+                if len(failures) < 10:
+                    failures.append((val, expected, result))
+        return TestResult('arithmetic.clz16bit', passed, len(test_values), failures)
     # =========================================================================
     # FLOAT16 TESTS
     # =========================================================================
             self.results.append(result)
             if verbose:
                 self._print_result(result)
+        if 'arithmetic.clz16bit.pz1.weight' in self.eval.tensors:
+            result = self.eval.test_clz16bit()
+            self.results.append(result)
+            if verbose:
+                self._print_result(result)
         # Float16
         if verbose: