phanerozoic
/

8bit-threshold-computer

+"""
+TEST #4: Adversarial Weight Perturbation
+=========================================
+Flip one weight in one gate. Prove exactly which tests fail and why.
+Show failure is localized and predictable, not catastrophic.
+A skeptic would demand: "Prove your system fails gracefully. Show me that
+perturbing one weight breaks only what it should break."
+"""
+import torch
+from safetensors.torch import load_file
+import copy
+# Load circuits
+original_model = load_file('neural_computer.safetensors')
+def heaviside(x):
+    return (x >= 0).float()
+def eval_gate(model, prefix, a, b):
+    """Evaluate a 2-input single-layer gate."""
+    inp = torch.tensor([float(a), float(b)])
+    w = model[f'{prefix}.weight']
+    bias = model[f'{prefix}.bias']
+    return int(heaviside(inp @ w + bias).item())
+def eval_xor(model, a, b):
+    """Evaluate XOR gate (2-layer)."""
+    inp = torch.tensor([float(a), float(b)])
+    w1_n1 = model['boolean.xor.layer1.neuron1.weight']
+    b1_n1 = model['boolean.xor.layer1.neuron1.bias']
+    w1_n2 = model['boolean.xor.layer1.neuron2.weight']
+    b1_n2 = model['boolean.xor.layer1.neuron2.bias']
+    w2 = model['boolean.xor.layer2.weight']
+    b2 = model['boolean.xor.layer2.bias']
+    h1 = heaviside(inp @ w1_n1 + b1_n1)
+    h2 = heaviside(inp @ w1_n2 + b1_n2)
+    hidden = torch.tensor([h1.item(), h2.item()])
+    return int(heaviside(hidden @ w2 + b2).item())
+def eval_full_adder(model, a, b, cin, prefix):
+    """Evaluate full adder."""
+    def eval_xor_arith(inp, xor_prefix):
+        w1_or = model[f'{xor_prefix}.layer1.or.weight']
+        b1_or = model[f'{xor_prefix}.layer1.or.bias']
+        w1_nand = model[f'{xor_prefix}.layer1.nand.weight']
+        b1_nand = model[f'{xor_prefix}.layer1.nand.bias']
+        w2 = model[f'{xor_prefix}.layer2.weight']
+        b2 = model[f'{xor_prefix}.layer2.bias']
+        h_or = heaviside(inp @ w1_or + b1_or)
+        h_nand = heaviside(inp @ w1_nand + b1_nand)
+        hidden = torch.tensor([h_or.item(), h_nand.item()])
+        return heaviside(hidden @ w2 + b2).item()
+    inp_ab = torch.tensor([a, b], dtype=torch.float32)
+    ha1_sum = eval_xor_arith(inp_ab, f'{prefix}.ha1.sum')
+    w_c1 = model[f'{prefix}.ha1.carry.weight']
+    b_c1 = model[f'{prefix}.ha1.carry.bias']
+    ha1_carry = heaviside(inp_ab @ w_c1 + b_c1).item()
+    inp_ha2 = torch.tensor([ha1_sum, cin], dtype=torch.float32)
+    ha2_sum = eval_xor_arith(inp_ha2, f'{prefix}.ha2.sum')
+    w_c2 = model[f'{prefix}.ha2.carry.weight']
+    b_c2 = model[f'{prefix}.ha2.carry.bias']
+    ha2_carry = heaviside(inp_ha2 @ w_c2 + b_c2).item()
+    inp_cout = torch.tensor([ha1_carry, ha2_carry], dtype=torch.float32)
+    w_or = model[f'{prefix}.carry_or.weight']
+    b_or = model[f'{prefix}.carry_or.bias']
+    cout = heaviside(inp_cout @ w_or + b_or).item()
+    return int(ha2_sum), int(cout)
+def add_8bit(model, a, b):
+    """8-bit addition."""
+    carry = 0.0
+    result_bits = []
+    for i in range(8):
+        a_bit = (a >> i) & 1
+        b_bit = (b >> i) & 1
+        s, carry = eval_full_adder(model, float(a_bit), float(b_bit), carry,
+                                    f'arithmetic.ripplecarry8bit.fa{i}')
+        result_bits.append(s)
+    result = sum(result_bits[i] * (2**i) for i in range(8))
+    return result, int(carry)
+def test_boolean_gates(model):
+    """Test all basic Boolean gates, return (passed, failed, details)."""
+    failures = []
+    # AND
+    expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
+    for (a,b), exp in expected_and.items():
+        got = eval_gate(model, 'boolean.and', a, b)
+        if got != exp:
+            failures.append(('AND', a, b, exp, got))
+    # OR
+    expected_or = {(0,0):0, (0,1):1, (1,0):1, (1,1):1}
+    for (a,b), exp in expected_or.items():
+        got = eval_gate(model, 'boolean.or', a, b)
+        if got != exp:
+            failures.append(('OR', a, b, exp, got))
+    # NAND
+    expected_nand = {(0,0):1, (0,1):1, (1,0):1, (1,1):0}
+    for (a,b), exp in expected_nand.items():
+        got = eval_gate(model, 'boolean.nand', a, b)
+        if got != exp:
+            failures.append(('NAND', a, b, exp, got))
+    # NOR
+    expected_nor = {(0,0):1, (0,1):0, (1,0):0, (1,1):0}
+    for (a,b), exp in expected_nor.items():
+        got = eval_gate(model, 'boolean.nor', a, b)
+        if got != exp:
+            failures.append(('NOR', a, b, exp, got))
+    # XOR
+    expected_xor = {(0,0):0, (0,1):1, (1,0):1, (1,1):0}
+    for (a,b), exp in expected_xor.items():
+        got = eval_xor(model, a, b)
+        if got != exp:
+            failures.append(('XOR', a, b, exp, got))
+    total = 20  # 4 gates * 4 cases + XOR 4 cases
+    passed = total - len(failures)
+    return passed, len(failures), failures
+def test_addition_sample(model, n=100):
+    """Test a sample of additions."""
+    failures = []
+    for a in range(0, 256, 256//10):
+        for b in range(0, 256, 256//10):
+            result, _ = add_8bit(model, a, b)
+            expected = (a + b) % 256
+            if result != expected:
+                failures.append((a, b, expected, result))
+    return 100 - len(failures), len(failures), failures
+def perturb_weight(model, tensor_name, index, delta):
+    """Create a perturbed copy of the model."""
+    perturbed = {k: v.clone() for k, v in model.items()}
+    flat = perturbed[tensor_name].flatten()
+    old_val = flat[index].item()
+    flat[index] = old_val + delta
+    perturbed[tensor_name] = flat.view(model[tensor_name].shape)
+    return perturbed, old_val, old_val + delta
+# =============================================================================
+# PERTURBATION EXPERIMENTS
+# =============================================================================
+def experiment_perturb_and_gate():
+    """
+    Perturb the AND gate's first weight from 1 to 0.
+    Expected: AND becomes a threshold-1 gate (fires if b=1).
+    """
+    print("\n[EXPERIMENT 1] Perturb AND gate: w[0] = 1 -> 0")
+    print("-" * 60)
+    perturbed, old, new = perturb_weight(original_model, 'boolean.and.weight', 0, -1)
+    print(f"  Original: w={original_model['boolean.and.weight'].tolist()}, b={original_model['boolean.and.bias'].item()}")
+    print(f"  Perturbed: w={perturbed['boolean.and.weight'].tolist()}, b={perturbed['boolean.and.bias'].item()}")
+    print()
+    # Test AND gate directly
+    print("  AND gate truth table after perturbation:")
+    print("    Input    Expected  Got")
+    failures = []
+    expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
+    for (a,b), exp in expected_and.items():
+        got = eval_gate(perturbed, 'boolean.and', a, b)
+        status = "OK" if got == exp else "FAIL"
+        print(f"    ({a},{b})      {exp}        {got}    [{status}]")
+        if got != exp:
+            failures.append((a, b, exp, got))
+    print()
+    print(f"  Analysis: With w=[0,1], b=-2, gate fires when 0*a + 1*b >= 2")
+    print(f"            This is NEVER true (max sum = 1), so output is always 0")
+    print(f"            AND(1,1) now incorrectly returns 0")
+    print()
+    # Check cascade effect on adders
+    print("  Cascade effect on arithmetic (AND is used in carry logic):")
+    _, add_fails, add_details = test_addition_sample(perturbed)
+    print(f"    Addition failures: {add_fails}/100 sampled")
+    if add_fails > 0:
+        print(f"    Sample failures: {add_details[:3]}")
+    return len(failures), failures
+def experiment_perturb_or_gate():
+    """
+    Perturb the OR gate's bias from -1 to -2.
+    Expected: OR becomes AND (needs both inputs).
+    """
+    print("\n[EXPERIMENT 2] Perturb OR gate: bias = -1 -> -2")
+    print("-" * 60)
+    perturbed = {k: v.clone() for k, v in original_model.items()}
+    perturbed['boolean.or.bias'] = torch.tensor([-2.0])
+    print(f"  Original: w={original_model['boolean.or.weight'].tolist()}, b={original_model['boolean.or.bias'].item()}")
+    print(f"  Perturbed: w={perturbed['boolean.or.weight'].tolist()}, b={perturbed['boolean.or.bias'].item()}")
+    print()
+    print("  OR gate truth table after perturbation:")
+    print("    Input    Expected  Got")
+    failures = []
+    expected_or = {(0,0):0, (0,1):1, (1,0):1, (1,1):1}
+    for (a,b), exp in expected_or.items():
+        got = eval_gate(perturbed, 'boolean.or', a, b)
+        status = "OK" if got == exp else "FAIL"
+        print(f"    ({a},{b})      {exp}        {got}    [{status}]")
+        if got != exp:
+            failures.append((a, b, exp, got))
+    print()
+    print(f"  Analysis: With w=[1,1], b=-2, gate fires when a + b >= 2")
+    print(f"            This is AND, not OR. OR(0,1) and OR(1,0) now return 0")
+    print()
+    return len(failures), failures
+def experiment_perturb_xor_hidden():
+    """
+    Perturb XOR's first hidden neuron (OR) to become AND.
+    Expected: XOR becomes something else entirely.
+    """
+    print("\n[EXPERIMENT 3] Perturb XOR's hidden OR neuron: bias -1 -> -2")
+    print("-" * 60)
+    perturbed = {k: v.clone() for k, v in original_model.items()}
+    perturbed['boolean.xor.layer1.neuron1.bias'] = torch.tensor([-2.0])
+    print(f"  Original XOR hidden1 (OR): w={original_model['boolean.xor.layer1.neuron1.weight'].tolist()}, b={original_model['boolean.xor.layer1.neuron1.bias'].item()}")
+    print(f"  Perturbed: bias = -2 (now behaves as AND)")
+    print()
+    print("  XOR truth table after perturbation:")
+    print("    Input    Expected  Got")
+    failures = []
+    expected_xor = {(0,0):0, (0,1):1, (1,0):1, (1,1):0}
+    for (a,b), exp in expected_xor.items():
+        got = eval_xor(perturbed, a, b)
+        status = "OK" if got == exp else "FAIL"
+        print(f"    ({a},{b})      {exp}        {got}    [{status}]")
+        if got != exp:
+            failures.append((a, b, exp, got))
+    print()
+    print(f"  Analysis: XOR = AND(OR(a,b), NAND(a,b))")
+    print(f"            With OR->AND: XOR = AND(AND(a,b), NAND(a,b))")
+    print(f"            AND(a,b)=1 only when a=b=1, but NAND(1,1)=0")
+    print(f"            So AND(AND, NAND) = 0 for all inputs -> constant 0")
+    print()
+    return len(failures), failures
+def experiment_perturb_fa0_carry():
+    """
+    Perturb the first full adder's carry_or gate.
+    Expected: Carry propagation breaks at bit 0.
+    """
+    print("\n[EXPERIMENT 4] Perturb FA0 carry_or: bias 0 -> -2 (OR -> AND)")
+    print("-" * 60)
+    perturbed = {k: v.clone() for k, v in original_model.items()}
+    # Change carry_or from OR (b=-1) to AND (b=-2)
+    perturbed['arithmetic.ripplecarry8bit.fa0.carry_or.bias'] = torch.tensor([-2.0])
+    print(f"  Perturbation: FA0.carry_or bias changed from -1 to -2")
+    print(f"  Effect: OR gate becomes AND gate in carry chain")
+    print()
+    # Test specific carry-critical cases
+    test_cases = [
+        (1, 1, 2),      # 1+1=2, needs carry from bit 0
+        (3, 1, 4),      # 11+01=100, needs carry
+        (127, 1, 128),  # Carry through multiple bits
+        (255, 1, 0),    # Full carry chain
+        (128, 128, 0),  # High bit carry
+    ]
+    print("  Critical carry test cases:")
+    failures = []
+    for a, b, expected in test_cases:
+        result, _ = add_8bit(perturbed, a, b)
+        status = "OK" if result == expected else "FAIL"
+        print(f"    {a:3d} + {b:3d} = {result:3d} (expected {expected:3d}) [{status}]")
+        if result != expected:
+            failures.append((a, b, expected, result))
+    print()
+    print(f"  Analysis: FA0.carry_or computes c_out = ha1_carry OR ha2_carry")
+    print(f"            With OR->AND, carry only propagates when BOTH internal carries fire")
+    print(f"            This breaks 1+1 (ha1_carry=1, ha2_carry=0 -> AND gives 0)")
+    print()
+    return len(failures), failures
+def experiment_sign_flip():
+    """
+    Flip the sign of a weight.
+    Expected: Gate inverts its response to that input.
+    """
+    print("\n[EXPERIMENT 5] Sign flip: AND w[0] = 1 -> -1")
+    print("-" * 60)
+    perturbed, old, new = perturb_weight(original_model, 'boolean.and.weight', 0, -2)
+    print(f"  Original: w={original_model['boolean.and.weight'].tolist()}, b={original_model['boolean.and.bias'].item()}")
+    print(f"  Perturbed: w={perturbed['boolean.and.weight'].tolist()}, b={perturbed['boolean.and.bias'].item()}")
+    print()
+    print("  AND gate truth table after sign flip:")
+    print("    Input    Expected  Got       Analysis")
+    failures = []
+    expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
+    for (a,b), exp in expected_and.items():
+        got = eval_gate(perturbed, 'boolean.and', a, b)
+        weighted_sum = -1*a + 1*b - 2
+        status = "OK" if got == exp else "FAIL"
+        print(f"    ({a},{b})      {exp}        {got}        sum = -1*{a} + 1*{b} - 2 = {weighted_sum} [{status}]")
+        if got != exp:
+            failures.append((a, b, exp, got))
+    print()
+    print(f"  Analysis: With w=[-1,1], b=-2, fires when -a + b >= 2")
+    print(f"            Max value is -0 + 1 - 2 = -1, never >= 0")
+    print(f"            Gate becomes constant 0")
+    print()
+    return len(failures), failures
+def experiment_localization():
+    """
+    Perturb one gate, verify other gates are unaffected.
+    """
+    print("\n[EXPERIMENT 6] Failure Localization Test")
+    print("-" * 60)
+    # Perturb AND gate
+    perturbed = {k: v.clone() for k, v in original_model.items()}
+    perturbed['boolean.and.weight'] = torch.tensor([0.0, 1.0])
+    print("  Perturbation: AND gate w=[1,1] -> [0,1]")
+    print()
+    # Test each gate type
+    gates_status = {}
+    # AND (perturbed)
+    failures = []
+    for a in [0,1]:
+        for b in [0,1]:
+            got = eval_gate(perturbed, 'boolean.and', a, b)
+            exp = a & b
+            if got != exp:
+                failures.append((a,b))
+    gates_status['AND'] = 'BROKEN' if failures else 'OK'
+    # OR (should be unaffected)
+    failures = []
+    for a in [0,1]:
+        for b in [0,1]:
+            got = eval_gate(perturbed, 'boolean.or', a, b)
+            exp = a | b
+            if got != exp:
+                failures.append((a,b))
+    gates_status['OR'] = 'BROKEN' if failures else 'OK'
+    # NAND (should be unaffected)
+    failures = []
+    for a in [0,1]:
+        for b in [0,1]:
+            got = eval_gate(perturbed, 'boolean.nand', a, b)
+            exp = 1 - (a & b)
+            if got != exp:
+                failures.append((a,b))
+    gates_status['NAND'] = 'BROKEN' if failures else 'OK'
+    # NOR (should be unaffected)
+    failures = []
+    for a in [0,1]:
+        for b in [0,1]:
+            got = eval_gate(perturbed, 'boolean.nor', a, b)
+            exp = 1 - (a | b)
+            if got != exp:
+                failures.append((a,b))
+    gates_status['NOR'] = 'BROKEN' if failures else 'OK'
+    # XOR (should be unaffected - uses its own internal gates)
+    failures = []
+    for a in [0,1]:
+        for b in [0,1]:
+            got = eval_xor(perturbed, a, b)
+            exp = a ^ b
+            if got != exp:
+                failures.append((a,b))
+    gates_status['XOR'] = 'BROKEN' if failures else 'OK'
+    print("  Gate status after AND perturbation:")
+    for gate, status in gates_status.items():
+        indicator = "X" if status == 'BROKEN' else " "
+        print(f"    [{indicator}] {gate:6s} {status}")
+    print()
+    broken_count = sum(1 for s in gates_status.values() if s == 'BROKEN')
+    print(f"  Result: {broken_count}/5 gates affected")
+    print(f"  Localization: {'PASSED' if broken_count == 1 else 'FAILED'} - only perturbed gate broke")
+    return broken_count == 1
+# =============================================================================
+# MAIN
+# =============================================================================
+if __name__ == "__main__":
+    print("=" * 70)
+    print(" TEST #4: ADVERSARIAL WEIGHT PERTURBATION")
+    print(" Single-weight changes, localized and predictable failures")
+    print("=" * 70)
+    # First verify original model works
+    print("\n[BASELINE] Verifying original model...")
+    bool_passed, bool_failed, _ = test_boolean_gates(original_model)
+    add_passed, add_failed, _ = test_addition_sample(original_model)
+    print(f"  Boolean gates: {bool_passed}/{bool_passed + bool_failed} passed")
+    print(f"  Addition sample: {add_passed}/{add_passed + add_failed} passed")
+    if bool_failed > 0 or add_failed > 0:
+        print("  ERROR: Original model has failures!")
+        exit(1)
+    print("  Original model verified OK")
+    # Run experiments
+    results = []
+    n, _ = experiment_perturb_and_gate()
+    results.append(("AND w[0]: 1->0", n > 0, "Breaks AND(1,1)"))
+    n, _ = experiment_perturb_or_gate()
+    results.append(("OR bias: -1->-2", n > 0, "OR becomes AND"))
+    n, _ = experiment_perturb_xor_hidden()
+    results.append(("XOR hidden OR->AND", n > 0, "XOR becomes const 0"))
+    n, _ = experiment_perturb_fa0_carry()
+    results.append(("FA0 carry_or OR->AND", n > 0, "Carry chain breaks"))
+    n, _ = experiment_sign_flip()
+    results.append(("AND w[0] sign flip", n > 0, "AND becomes const 0"))
+    localized = experiment_localization()
+    results.append(("Failure localization", localized, "Only target gate breaks"))
+    print("\n" + "=" * 70)
+    print(" SUMMARY")
+    print("=" * 70)
+    all_passed = True
+    for name, passed, desc in results:
+        status = "PASS" if passed else "FAIL"
+        if not passed:
+            all_passed = False
+        print(f"  {name:25s} [{status}] - {desc}")
+    print()
+    if all_passed:
+        print("  STATUS: ALL PERTURBATIONS CAUSED PREDICTABLE, LOCALIZED FAILURES")
+    else:
+        print("  STATUS: SOME PERTURBATIONS DID NOT BEHAVE AS EXPECTED")
+    print("=" * 70)