phanerozoic
/

8bit-threshold-computer

@@ -1,480 +0,0 @@
-"""
-TEST #4: Adversarial Weight Perturbation
-=========================================
-Flip one weight in one gate. Prove exactly which tests fail and why.
-Show failure is localized and predictable, not catastrophic.
-A skeptic would demand: "Prove your system fails gracefully. Show me that
-perturbing one weight breaks only what it should break."
-"""
-import torch
-from safetensors.torch import load_file
-import copy
-# Load circuits
-original_model = load_file('neural_computer.safetensors')
-def heaviside(x):
-    return (x >= 0).float()
-def eval_gate(model, prefix, a, b):
-    """Evaluate a 2-input single-layer gate."""
-    inp = torch.tensor([float(a), float(b)])
-    w = model[f'{prefix}.weight']
-    bias = model[f'{prefix}.bias']
-    return int(heaviside(inp @ w + bias).item())
-def eval_xor(model, a, b):
-    """Evaluate XOR gate (2-layer)."""
-    inp = torch.tensor([float(a), float(b)])
-    w1_n1 = model['boolean.xor.layer1.neuron1.weight']
-    b1_n1 = model['boolean.xor.layer1.neuron1.bias']
-    w1_n2 = model['boolean.xor.layer1.neuron2.weight']
-    b1_n2 = model['boolean.xor.layer1.neuron2.bias']
-    w2 = model['boolean.xor.layer2.weight']
-    b2 = model['boolean.xor.layer2.bias']
-    h1 = heaviside(inp @ w1_n1 + b1_n1)
-    h2 = heaviside(inp @ w1_n2 + b1_n2)
-    hidden = torch.tensor([h1.item(), h2.item()])
-    return int(heaviside(hidden @ w2 + b2).item())
-def eval_full_adder(model, a, b, cin, prefix):
-    """Evaluate full adder."""
-    def eval_xor_arith(inp, xor_prefix):
-        w1_or = model[f'{xor_prefix}.layer1.or.weight']
-        b1_or = model[f'{xor_prefix}.layer1.or.bias']
-        w1_nand = model[f'{xor_prefix}.layer1.nand.weight']
-        b1_nand = model[f'{xor_prefix}.layer1.nand.bias']
-        w2 = model[f'{xor_prefix}.layer2.weight']
-        b2 = model[f'{xor_prefix}.layer2.bias']
-        h_or = heaviside(inp @ w1_or + b1_or)
-        h_nand = heaviside(inp @ w1_nand + b1_nand)
-        hidden = torch.tensor([h_or.item(), h_nand.item()])
-        return heaviside(hidden @ w2 + b2).item()
-    inp_ab = torch.tensor([a, b], dtype=torch.float32)
-    ha1_sum = eval_xor_arith(inp_ab, f'{prefix}.ha1.sum')
-    w_c1 = model[f'{prefix}.ha1.carry.weight']
-    b_c1 = model[f'{prefix}.ha1.carry.bias']
-    ha1_carry = heaviside(inp_ab @ w_c1 + b_c1).item()
-    inp_ha2 = torch.tensor([ha1_sum, cin], dtype=torch.float32)
-    ha2_sum = eval_xor_arith(inp_ha2, f'{prefix}.ha2.sum')
-    w_c2 = model[f'{prefix}.ha2.carry.weight']
-    b_c2 = model[f'{prefix}.ha2.carry.bias']
-    ha2_carry = heaviside(inp_ha2 @ w_c2 + b_c2).item()
-    inp_cout = torch.tensor([ha1_carry, ha2_carry], dtype=torch.float32)
-    w_or = model[f'{prefix}.carry_or.weight']
-    b_or = model[f'{prefix}.carry_or.bias']
-    cout = heaviside(inp_cout @ w_or + b_or).item()
-    return int(ha2_sum), int(cout)
-def add_8bit(model, a, b):
-    """8-bit addition."""
-    carry = 0.0
-    result_bits = []
-    for i in range(8):
-        a_bit = (a >> i) & 1
-        b_bit = (b >> i) & 1
-        s, carry = eval_full_adder(model, float(a_bit), float(b_bit), carry,
-                                    f'arithmetic.ripplecarry8bit.fa{i}')
-        result_bits.append(s)
-    result = sum(result_bits[i] * (2**i) for i in range(8))
-    return result, int(carry)
-def test_boolean_gates(model):
-    """Test all basic Boolean gates, return (passed, failed, details)."""
-    failures = []
-    # AND
-    expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
-    for (a,b), exp in expected_and.items():
-        got = eval_gate(model, 'boolean.and', a, b)
-        if got != exp:
-            failures.append(('AND', a, b, exp, got))
-    # OR
-    expected_or = {(0,0):0, (0,1):1, (1,0):1, (1,1):1}
-    for (a,b), exp in expected_or.items():
-        got = eval_gate(model, 'boolean.or', a, b)
-        if got != exp:
-            failures.append(('OR', a, b, exp, got))
-    # NAND
-    expected_nand = {(0,0):1, (0,1):1, (1,0):1, (1,1):0}
-    for (a,b), exp in expected_nand.items():
-        got = eval_gate(model, 'boolean.nand', a, b)
-        if got != exp:
-            failures.append(('NAND', a, b, exp, got))
-    # NOR
-    expected_nor = {(0,0):1, (0,1):0, (1,0):0, (1,1):0}
-    for (a,b), exp in expected_nor.items():
-        got = eval_gate(model, 'boolean.nor', a, b)
-        if got != exp:
-            failures.append(('NOR', a, b, exp, got))
-    # XOR
-    expected_xor = {(0,0):0, (0,1):1, (1,0):1, (1,1):0}
-    for (a,b), exp in expected_xor.items():
-        got = eval_xor(model, a, b)
-        if got != exp:
-            failures.append(('XOR', a, b, exp, got))
-    total = 20  # 4 gates * 4 cases + XOR 4 cases
-    passed = total - len(failures)
-    return passed, len(failures), failures
-def test_addition_sample(model, n=100):
-    """Test a sample of additions."""
-    failures = []
-    for a in range(0, 256, 256//10):
-        for b in range(0, 256, 256//10):
-            result, _ = add_8bit(model, a, b)
-            expected = (a + b) % 256
-            if result != expected:
-                failures.append((a, b, expected, result))
-    return 100 - len(failures), len(failures), failures
-def perturb_weight(model, tensor_name, index, delta):
-    """Create a perturbed copy of the model."""
-    perturbed = {k: v.clone() for k, v in model.items()}
-    flat = perturbed[tensor_name].flatten()
-    old_val = flat[index].item()
-    flat[index] = old_val + delta
-    perturbed[tensor_name] = flat.view(model[tensor_name].shape)
-    return perturbed, old_val, old_val + delta
-# =============================================================================
-# PERTURBATION EXPERIMENTS
-# =============================================================================
-def experiment_perturb_and_gate():
-    """
-    Perturb the AND gate's first weight from 1 to 0.
-    Expected: AND becomes a threshold-1 gate (fires if b=1).
-    """
-    print("\n[EXPERIMENT 1] Perturb AND gate: w[0] = 1 -> 0")
-    print("-" * 60)
-    perturbed, old, new = perturb_weight(original_model, 'boolean.and.weight', 0, -1)
-    print(f"  Original: w={original_model['boolean.and.weight'].tolist()}, b={original_model['boolean.and.bias'].item()}")
-    print(f"  Perturbed: w={perturbed['boolean.and.weight'].tolist()}, b={perturbed['boolean.and.bias'].item()}")
-    print()
-    # Test AND gate directly
-    print("  AND gate truth table after perturbation:")
-    print("    Input    Expected  Got")
-    failures = []
-    expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
-    for (a,b), exp in expected_and.items():
-        got = eval_gate(perturbed, 'boolean.and', a, b)
-        status = "OK" if got == exp else "FAIL"
-        print(f"    ({a},{b})      {exp}        {got}    [{status}]")
-        if got != exp:
-            failures.append((a, b, exp, got))
-    print()
-    print(f"  Analysis: With w=[0,1], b=-2, gate fires when 0*a + 1*b >= 2")
-    print(f"            This is NEVER true (max sum = 1), so output is always 0")
-    print(f"            AND(1,1) now incorrectly returns 0")
-    print()
-    # Check cascade effect on adders
-    print("  Cascade effect on arithmetic (AND is used in carry logic):")
-    _, add_fails, add_details = test_addition_sample(perturbed)
-    print(f"    Addition failures: {add_fails}/100 sampled")
-    if add_fails > 0:
-        print(f"    Sample failures: {add_details[:3]}")
-    return len(failures), failures
-def experiment_perturb_or_gate():
-    """
-    Perturb the OR gate's bias from -1 to -2.
-    Expected: OR becomes AND (needs both inputs).
-    """
-    print("\n[EXPERIMENT 2] Perturb OR gate: bias = -1 -> -2")
-    print("-" * 60)
-    perturbed = {k: v.clone() for k, v in original_model.items()}
-    perturbed['boolean.or.bias'] = torch.tensor([-2.0])
-    print(f"  Original: w={original_model['boolean.or.weight'].tolist()}, b={original_model['boolean.or.bias'].item()}")
-    print(f"  Perturbed: w={perturbed['boolean.or.weight'].tolist()}, b={perturbed['boolean.or.bias'].item()}")
-    print()
-    print("  OR gate truth table after perturbation:")
-    print("    Input    Expected  Got")
-    failures = []
-    expected_or = {(0,0):0, (0,1):1, (1,0):1, (1,1):1}
-    for (a,b), exp in expected_or.items():
-        got = eval_gate(perturbed, 'boolean.or', a, b)
-        status = "OK" if got == exp else "FAIL"
-        print(f"    ({a},{b})      {exp}        {got}    [{status}]")
-        if got != exp:
-            failures.append((a, b, exp, got))
-    print()
-    print(f"  Analysis: With w=[1,1], b=-2, gate fires when a + b >= 2")
-    print(f"            This is AND, not OR. OR(0,1) and OR(1,0) now return 0")
-    print()
-    return len(failures), failures
-def experiment_perturb_xor_hidden():
-    """
-    Perturb XOR's first hidden neuron (OR) to become AND.
-    Expected: XOR becomes something else entirely.
-    """
-    print("\n[EXPERIMENT 3] Perturb XOR's hidden OR neuron: bias -1 -> -2")
-    print("-" * 60)
-    perturbed = {k: v.clone() for k, v in original_model.items()}
-    perturbed['boolean.xor.layer1.neuron1.bias'] = torch.tensor([-2.0])
-    print(f"  Original XOR hidden1 (OR): w={original_model['boolean.xor.layer1.neuron1.weight'].tolist()}, b={original_model['boolean.xor.layer1.neuron1.bias'].item()}")
-    print(f"  Perturbed: bias = -2 (now behaves as AND)")
-    print()
-    print("  XOR truth table after perturbation:")
-    print("    Input    Expected  Got")
-    failures = []
-    expected_xor = {(0,0):0, (0,1):1, (1,0):1, (1,1):0}
-    for (a,b), exp in expected_xor.items():
-        got = eval_xor(perturbed, a, b)
-        status = "OK" if got == exp else "FAIL"
-        print(f"    ({a},{b})      {exp}        {got}    [{status}]")
-        if got != exp:
-            failures.append((a, b, exp, got))
-    print()
-    print(f"  Analysis: XOR = AND(OR(a,b), NAND(a,b))")
-    print(f"            With OR->AND: XOR = AND(AND(a,b), NAND(a,b))")
-    print(f"            AND(a,b)=1 only when a=b=1, but NAND(1,1)=0")
-    print(f"            So AND(AND, NAND) = 0 for all inputs -> constant 0")
-    print()
-    return len(failures), failures
-def experiment_perturb_fa0_carry():
-    """
-    Perturb the first full adder's carry_or gate.
-    Expected: Carry propagation breaks at bit 0.
-    """
-    print("\n[EXPERIMENT 4] Perturb FA0 carry_or: bias 0 -> -2 (OR -> AND)")
-    print("-" * 60)
-    perturbed = {k: v.clone() for k, v in original_model.items()}
-    # Change carry_or from OR (b=-1) to AND (b=-2)
-    perturbed['arithmetic.ripplecarry8bit.fa0.carry_or.bias'] = torch.tensor([-2.0])
-    print(f"  Perturbation: FA0.carry_or bias changed from -1 to -2")
-    print(f"  Effect: OR gate becomes AND gate in carry chain")
-    print()
-    # Test specific carry-critical cases
-    test_cases = [
-        (1, 1, 2),      # 1+1=2, needs carry from bit 0
-        (3, 1, 4),      # 11+01=100, needs carry
-        (127, 1, 128),  # Carry through multiple bits
-        (255, 1, 0),    # Full carry chain
-        (128, 128, 0),  # High bit carry
-    ]
-    print("  Critical carry test cases:")
-    failures = []
-    for a, b, expected in test_cases:
-        result, _ = add_8bit(perturbed, a, b)
-        status = "OK" if result == expected else "FAIL"
-        print(f"    {a:3d} + {b:3d} = {result:3d} (expected {expected:3d}) [{status}]")
-        if result != expected:
-            failures.append((a, b, expected, result))
-    print()
-    print(f"  Analysis: FA0.carry_or computes c_out = ha1_carry OR ha2_carry")
-    print(f"            With OR->AND, carry only propagates when BOTH internal carries fire")
-    print(f"            This breaks 1+1 (ha1_carry=1, ha2_carry=0 -> AND gives 0)")
-    print()
-    return len(failures), failures
-def experiment_sign_flip():
-    """
-    Flip the sign of a weight.
-    Expected: Gate inverts its response to that input.
-    """
-    print("\n[EXPERIMENT 5] Sign flip: AND w[0] = 1 -> -1")
-    print("-" * 60)
-    perturbed, old, new = perturb_weight(original_model, 'boolean.and.weight', 0, -2)
-    print(f"  Original: w={original_model['boolean.and.weight'].tolist()}, b={original_model['boolean.and.bias'].item()}")
-    print(f"  Perturbed: w={perturbed['boolean.and.weight'].tolist()}, b={perturbed['boolean.and.bias'].item()}")
-    print()
-    print("  AND gate truth table after sign flip:")
-    print("    Input    Expected  Got       Analysis")
-    failures = []
-    expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
-    for (a,b), exp in expected_and.items():
-        got = eval_gate(perturbed, 'boolean.and', a, b)
-        weighted_sum = -1*a + 1*b - 2
-        status = "OK" if got == exp else "FAIL"
-        print(f"    ({a},{b})      {exp}        {got}        sum = -1*{a} + 1*{b} - 2 = {weighted_sum} [{status}]")
-        if got != exp:
-            failures.append((a, b, exp, got))
-    print()
-    print(f"  Analysis: With w=[-1,1], b=-2, fires when -a + b >= 2")
-    print(f"            Max value is -0 + 1 - 2 = -1, never >= 0")
-    print(f"            Gate becomes constant 0")
-    print()
-    return len(failures), failures
-def experiment_localization():
-    """
-    Perturb one gate, verify other gates are unaffected.
-    """
-    print("\n[EXPERIMENT 6] Failure Localization Test")
-    print("-" * 60)
-    # Perturb AND gate
-    perturbed = {k: v.clone() for k, v in original_model.items()}
-    perturbed['boolean.and.weight'] = torch.tensor([0.0, 1.0])
-    print("  Perturbation: AND gate w=[1,1] -> [0,1]")
-    print()
-    # Test each gate type
-    gates_status = {}
-    # AND (perturbed)
-    failures = []
-    for a in [0,1]:
-        for b in [0,1]:
-            got = eval_gate(perturbed, 'boolean.and', a, b)
-            exp = a & b
-            if got != exp:
-                failures.append((a,b))
-    gates_status['AND'] = 'BROKEN' if failures else 'OK'
-    # OR (should be unaffected)
-    failures = []
-    for a in [0,1]:
-        for b in [0,1]:
-            got = eval_gate(perturbed, 'boolean.or', a, b)
-            exp = a | b
-            if got != exp:
-                failures.append((a,b))
-    gates_status['OR'] = 'BROKEN' if failures else 'OK'
-    # NAND (should be unaffected)
-    failures = []
-    for a in [0,1]:
-        for b in [0,1]:
-            got = eval_gate(perturbed, 'boolean.nand', a, b)
-            exp = 1 - (a & b)
-            if got != exp:
-                failures.append((a,b))
-    gates_status['NAND'] = 'BROKEN' if failures else 'OK'
-    # NOR (should be unaffected)
-    failures = []
-    for a in [0,1]:
-        for b in [0,1]:
-            got = eval_gate(perturbed, 'boolean.nor', a, b)
-            exp = 1 - (a | b)
-            if got != exp:
-                failures.append((a,b))
-    gates_status['NOR'] = 'BROKEN' if failures else 'OK'
-    # XOR (should be unaffected - uses its own internal gates)
-    failures = []
-    for a in [0,1]:
-        for b in [0,1]:
-            got = eval_xor(perturbed, a, b)
-            exp = a ^ b
-            if got != exp:
-                failures.append((a,b))
-    gates_status['XOR'] = 'BROKEN' if failures else 'OK'
-    print("  Gate status after AND perturbation:")
-    for gate, status in gates_status.items():
-        indicator = "X" if status == 'BROKEN' else " "
-        print(f"    [{indicator}] {gate:6s} {status}")
-    print()
-    broken_count = sum(1 for s in gates_status.values() if s == 'BROKEN')
-    print(f"  Result: {broken_count}/5 gates affected")
-    print(f"  Localization: {'PASSED' if broken_count == 1 else 'FAILED'} - only perturbed gate broke")
-    return broken_count == 1
-# =============================================================================
-# MAIN
-# =============================================================================
-if __name__ == "__main__":
-    print("=" * 70)
-    print(" TEST #4: ADVERSARIAL WEIGHT PERTURBATION")
-    print(" Single-weight changes, localized and predictable failures")
-    print("=" * 70)
-    # First verify original model works
-    print("\n[BASELINE] Verifying original model...")
-    bool_passed, bool_failed, _ = test_boolean_gates(original_model)
-    add_passed, add_failed, _ = test_addition_sample(original_model)
-    print(f"  Boolean gates: {bool_passed}/{bool_passed + bool_failed} passed")
-    print(f"  Addition sample: {add_passed}/{add_passed + add_failed} passed")
-    if bool_failed > 0 or add_failed > 0:
-        print("  ERROR: Original model has failures!")
-        exit(1)
-    print("  Original model verified OK")
-    # Run experiments
-    results = []
-    n, _ = experiment_perturb_and_gate()
-    results.append(("AND w[0]: 1->0", n > 0, "Breaks AND(1,1)"))
-    n, _ = experiment_perturb_or_gate()
-    results.append(("OR bias: -1->-2", n > 0, "OR becomes AND"))
-    n, _ = experiment_perturb_xor_hidden()
-    results.append(("XOR hidden OR->AND", n > 0, "XOR becomes const 0"))
-    n, _ = experiment_perturb_fa0_carry()
-    results.append(("FA0 carry_or OR->AND", n > 0, "Carry chain breaks"))
-    n, _ = experiment_sign_flip()
-    results.append(("AND w[0] sign flip", n > 0, "AND becomes const 0"))
-    localized = experiment_localization()
-    results.append(("Failure localization", localized, "Only target gate breaks"))
-    print("\n" + "=" * 70)
-    print(" SUMMARY")
-    print("=" * 70)
-    all_passed = True
-    for name, passed, desc in results:
-        status = "PASS" if passed else "FAIL"
-        if not passed:
-            all_passed = False
-        print(f"  {name:25s} [{status}] - {desc}")
-    print()
-    if all_passed:
-        print("  STATUS: ALL PERTURBATIONS CAUSED PREDICTABLE, LOCALIZED FAILURES")
-    else:
-        print("  STATUS: SOME PERTURBATIONS DID NOT BEHAVE AS EXPECTED")
-    print("=" * 70)