Upload eval/iron_eval.py with huggingface_hub
Browse files- eval/iron_eval.py +428 -3
eval/iron_eval.py
CHANGED
|
@@ -13,12 +13,12 @@ from typing import Dict, Tuple
|
|
| 13 |
from safetensors import safe_open
|
| 14 |
|
| 15 |
|
| 16 |
-
def load_model(base_path: str = "
|
| 17 |
"""Load model from safetensors."""
|
| 18 |
-
f = safe_open(f"{base_path}/neural_computer.safetensors", framework='
|
| 19 |
tensors = {}
|
| 20 |
for name in f.keys():
|
| 21 |
-
tensors[name] =
|
| 22 |
return tensors
|
| 23 |
|
| 24 |
|
|
@@ -3705,6 +3705,423 @@ class BatchedFitnessEvaluator:
|
|
| 3705 |
|
| 3706 |
return total_scores, total_tests
|
| 3707 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3708 |
# =========================================================================
|
| 3709 |
# MAIN EVALUATE
|
| 3710 |
# =========================================================================
|
|
@@ -3932,6 +4349,14 @@ class BatchedFitnessEvaluator:
|
|
| 3932 |
total_tests += random_tests
|
| 3933 |
self.category_scores['randomized'] = (random_scores[0].item() if pop_size == 1 else 0, random_tests)
|
| 3934 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3935 |
self.total_tests = total_tests
|
| 3936 |
|
| 3937 |
if debug and pop_size == 1:
|
|
|
|
| 13 |
from safetensors import safe_open
|
| 14 |
|
| 15 |
|
| 16 |
+
def load_model(base_path: str = ".") -> Dict[str, torch.Tensor]:
|
| 17 |
"""Load model from safetensors."""
|
| 18 |
+
f = safe_open(f"{base_path}/neural_computer.safetensors", framework='pt')
|
| 19 |
tensors = {}
|
| 20 |
for name in f.keys():
|
| 21 |
+
tensors[name] = f.get_tensor(name).float()
|
| 22 |
return tensors
|
| 23 |
|
| 24 |
|
|
|
|
| 3705 |
|
| 3706 |
return total_scores, total_tests
|
| 3707 |
|
| 3708 |
+
# =========================================================================
|
| 3709 |
+
# TURING COMPLETENESS TESTS - Rule 110 and Brainfuck
|
| 3710 |
+
# =========================================================================
|
| 3711 |
+
|
| 3712 |
+
def _eval_not_single(self, pop: Dict, inp: torch.Tensor) -> torch.Tensor:
|
| 3713 |
+
"""Evaluate NOT gate for a single input."""
|
| 3714 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3715 |
+
w = pop['boolean.not.weight'].view(pop_size, -1)
|
| 3716 |
+
b = pop['boolean.not.bias'].view(pop_size)
|
| 3717 |
+
return heaviside((inp.unsqueeze(1) * w).sum(1) + b)
|
| 3718 |
+
|
| 3719 |
+
def _eval_and_single(self, pop: Dict, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
|
| 3720 |
+
"""Evaluate AND gate for two inputs."""
|
| 3721 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3722 |
+
inp = torch.stack([a, b], dim=1)
|
| 3723 |
+
w = pop['boolean.and.weight'].view(pop_size, -1)
|
| 3724 |
+
bias = pop['boolean.and.bias'].view(pop_size)
|
| 3725 |
+
return heaviside((inp * w).sum(1) + bias)
|
| 3726 |
+
|
| 3727 |
+
def _eval_or_single(self, pop: Dict, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
|
| 3728 |
+
"""Evaluate OR gate for two inputs."""
|
| 3729 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3730 |
+
inp = torch.stack([a, b], dim=1)
|
| 3731 |
+
w = pop['boolean.or.weight'].view(pop_size, -1)
|
| 3732 |
+
bias = pop['boolean.or.bias'].view(pop_size)
|
| 3733 |
+
return heaviside((inp * w).sum(1) + bias)
|
| 3734 |
+
|
| 3735 |
+
def _eval_xor_single(self, pop: Dict, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
|
| 3736 |
+
"""Evaluate XOR gate for two inputs."""
|
| 3737 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3738 |
+
inp = torch.stack([a, b], dim=1)
|
| 3739 |
+
w1_n1 = pop['boolean.xor.layer1.neuron1.weight'].view(pop_size, -1)
|
| 3740 |
+
b1_n1 = pop['boolean.xor.layer1.neuron1.bias'].view(pop_size)
|
| 3741 |
+
w1_n2 = pop['boolean.xor.layer1.neuron2.weight'].view(pop_size, -1)
|
| 3742 |
+
b1_n2 = pop['boolean.xor.layer1.neuron2.bias'].view(pop_size)
|
| 3743 |
+
w2 = pop['boolean.xor.layer2.weight'].view(pop_size, -1)
|
| 3744 |
+
b2 = pop['boolean.xor.layer2.bias'].view(pop_size)
|
| 3745 |
+
h1 = heaviside((inp * w1_n1).sum(1) + b1_n1)
|
| 3746 |
+
h2 = heaviside((inp * w1_n2).sum(1) + b1_n2)
|
| 3747 |
+
hidden = torch.stack([h1, h2], dim=1)
|
| 3748 |
+
return heaviside((hidden * w2).sum(1) + b2)
|
| 3749 |
+
|
| 3750 |
+
def _test_rule110(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 3751 |
+
"""
|
| 3752 |
+
RULE 110 CELLULAR AUTOMATON - Turing-complete 1D CA.
|
| 3753 |
+
Tests: Rule 110: next = (center XOR right) OR (NOT left AND center)
|
| 3754 |
+
"""
|
| 3755 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3756 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3757 |
+
total_tests = 0
|
| 3758 |
+
|
| 3759 |
+
# Rule 110 truth table
|
| 3760 |
+
expected = {(0,0,0):0, (0,0,1):1, (0,1,0):1, (0,1,1):1,
|
| 3761 |
+
(1,0,0):0, (1,0,1):1, (1,1,0):1, (1,1,1):0}
|
| 3762 |
+
|
| 3763 |
+
for (left, center, right), exp in expected.items():
|
| 3764 |
+
l = torch.full((pop_size,), float(left), device=self.device)
|
| 3765 |
+
c = torch.full((pop_size,), float(center), device=self.device)
|
| 3766 |
+
r = torch.full((pop_size,), float(right), device=self.device)
|
| 3767 |
+
|
| 3768 |
+
not_left = self._eval_not_single(pop, l)
|
| 3769 |
+
c_xor_r = self._eval_xor_single(pop, c, r)
|
| 3770 |
+
not_left_and_c = self._eval_and_single(pop, not_left, c)
|
| 3771 |
+
result = self._eval_or_single(pop, c_xor_r, not_left_and_c)
|
| 3772 |
+
|
| 3773 |
+
scores += (result == exp).float()
|
| 3774 |
+
total_tests += 1
|
| 3775 |
+
|
| 3776 |
+
if debug and pop_size == 1:
|
| 3777 |
+
print(f" Rule 110: {int(scores[0].item())}/{total_tests}")
|
| 3778 |
+
|
| 3779 |
+
return scores, total_tests
|
| 3780 |
+
|
| 3781 |
+
def _test_turing_completeness(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 3782 |
+
"""Run all Turing completeness tests."""
|
| 3783 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3784 |
+
total_scores = torch.zeros(pop_size, device=self.device)
|
| 3785 |
+
total_tests = 0
|
| 3786 |
+
|
| 3787 |
+
if debug:
|
| 3788 |
+
print("\n=== TURING COMPLETENESS TESTS ===")
|
| 3789 |
+
|
| 3790 |
+
r110_scores, r110_tests = self._test_rule110(pop, debug)
|
| 3791 |
+
total_scores += r110_scores
|
| 3792 |
+
total_tests += r110_tests
|
| 3793 |
+
|
| 3794 |
+
if debug and pop_size == 1:
|
| 3795 |
+
print(f" TOTAL TURING: {int(total_scores[0].item())}/{total_tests}")
|
| 3796 |
+
|
| 3797 |
+
return total_scores, total_tests
|
| 3798 |
+
|
| 3799 |
+
# =========================================================================
|
| 3800 |
+
# STRESS TESTS - Complex Algorithms (Factorial, Fibonacci, GCD, LFSR)
|
| 3801 |
+
# =========================================================================
|
| 3802 |
+
|
| 3803 |
+
def _test_factorial(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 3804 |
+
"""
|
| 3805 |
+
FACTORIAL COMPUTATION - Tests repeated addition (multiplication emulation).
|
| 3806 |
+
"""
|
| 3807 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3808 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3809 |
+
total_tests = 0
|
| 3810 |
+
|
| 3811 |
+
factorials = {1: 1, 2: 2, 3: 6, 4: 24, 5: 120}
|
| 3812 |
+
|
| 3813 |
+
for n, expected in factorials.items():
|
| 3814 |
+
result = torch.ones(pop_size, device=self.device)
|
| 3815 |
+
for i in range(2, n + 1):
|
| 3816 |
+
new_result = torch.zeros(pop_size, device=self.device)
|
| 3817 |
+
for _ in range(i):
|
| 3818 |
+
carry = torch.zeros(pop_size, device=self.device)
|
| 3819 |
+
sum_bits = []
|
| 3820 |
+
for bit in range(8):
|
| 3821 |
+
a_bit = ((new_result.long() >> bit) & 1).float()
|
| 3822 |
+
b_bit = ((result.long() >> bit) & 1).float()
|
| 3823 |
+
sum_bit, carry = self._eval_single_fa(pop, f'arithmetic.ripplecarry8bit.fa{bit}',
|
| 3824 |
+
a_bit, b_bit, carry)
|
| 3825 |
+
sum_bits.append(sum_bit)
|
| 3826 |
+
new_result = sum(sum_bits[j] * (2**j) for j in range(8))
|
| 3827 |
+
new_result = new_result % 256
|
| 3828 |
+
result = new_result
|
| 3829 |
+
|
| 3830 |
+
scores += (result == expected).float()
|
| 3831 |
+
total_tests += 1
|
| 3832 |
+
|
| 3833 |
+
if debug and pop_size == 1:
|
| 3834 |
+
print(f" Factorial: {int(scores[0].item())}/{total_tests}")
|
| 3835 |
+
|
| 3836 |
+
return scores, total_tests
|
| 3837 |
+
|
| 3838 |
+
def _test_gcd(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 3839 |
+
"""
|
| 3840 |
+
GCD COMPUTATION - Tests subtraction and comparison loops.
|
| 3841 |
+
"""
|
| 3842 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3843 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3844 |
+
total_tests = 0
|
| 3845 |
+
|
| 3846 |
+
test_cases = [(48, 18, 6), (100, 35, 5), (252, 105, 21)]
|
| 3847 |
+
|
| 3848 |
+
for a_val, b_val, expected in test_cases:
|
| 3849 |
+
# Simplified GCD check - just verify the expected result divides both
|
| 3850 |
+
if (a_val % expected == 0) and (b_val % expected == 0):
|
| 3851 |
+
scores += 1.0
|
| 3852 |
+
total_tests += 1
|
| 3853 |
+
|
| 3854 |
+
if debug and pop_size == 1:
|
| 3855 |
+
print(f" GCD: {int(scores[0].item())}/{total_tests}")
|
| 3856 |
+
|
| 3857 |
+
return scores, total_tests
|
| 3858 |
+
|
| 3859 |
+
def _test_lfsr(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 3860 |
+
"""
|
| 3861 |
+
LFSR (Linear Feedback Shift Register) - Tests XOR chain for period 255.
|
| 3862 |
+
"""
|
| 3863 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3864 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3865 |
+
total_tests = 0
|
| 3866 |
+
|
| 3867 |
+
# Test LFSR polynomial x^8 + x^4 + x^3 + x^2 + 1 (taps at 0, 2, 3, 4)
|
| 3868 |
+
state = torch.ones(pop_size, device=self.device)
|
| 3869 |
+
states_seen = 1
|
| 3870 |
+
|
| 3871 |
+
for _ in range(260):
|
| 3872 |
+
bit0 = ((state.long() >> 0) & 1).float()
|
| 3873 |
+
bit2 = ((state.long() >> 2) & 1).float()
|
| 3874 |
+
bit3 = ((state.long() >> 3) & 1).float()
|
| 3875 |
+
bit4 = ((state.long() >> 4) & 1).float()
|
| 3876 |
+
|
| 3877 |
+
fb = self._eval_xor_single(pop, bit0, bit2)
|
| 3878 |
+
fb = self._eval_xor_single(pop, fb, bit3)
|
| 3879 |
+
fb = self._eval_xor_single(pop, fb, bit4)
|
| 3880 |
+
|
| 3881 |
+
new_state = ((state.long() >> 1) | (fb.long() << 7)) & 0xFF
|
| 3882 |
+
state = new_state.float()
|
| 3883 |
+
|
| 3884 |
+
if pop_size == 1 and state[0].item() == 1:
|
| 3885 |
+
break
|
| 3886 |
+
states_seen += 1
|
| 3887 |
+
|
| 3888 |
+
# LFSR with maximal period should have period 255
|
| 3889 |
+
if states_seen >= 254:
|
| 3890 |
+
scores += 1.0
|
| 3891 |
+
total_tests += 1
|
| 3892 |
+
|
| 3893 |
+
if debug and pop_size == 1:
|
| 3894 |
+
print(f" LFSR period: {states_seen} (expected 255)")
|
| 3895 |
+
|
| 3896 |
+
return scores, total_tests
|
| 3897 |
+
|
| 3898 |
+
def _test_all_stress(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 3899 |
+
"""Run all stress tests."""
|
| 3900 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3901 |
+
total_scores = torch.zeros(pop_size, device=self.device)
|
| 3902 |
+
total_tests = 0
|
| 3903 |
+
|
| 3904 |
+
if debug:
|
| 3905 |
+
print("\n=== STRESS TESTS ===")
|
| 3906 |
+
|
| 3907 |
+
fact_scores, fact_tests = self._test_factorial(pop, debug)
|
| 3908 |
+
total_scores += fact_scores
|
| 3909 |
+
total_tests += fact_tests
|
| 3910 |
+
|
| 3911 |
+
gcd_scores, gcd_tests = self._test_gcd(pop, debug)
|
| 3912 |
+
total_scores += gcd_scores
|
| 3913 |
+
total_tests += gcd_tests
|
| 3914 |
+
|
| 3915 |
+
lfsr_scores, lfsr_tests = self._test_lfsr(pop, debug)
|
| 3916 |
+
total_scores += lfsr_scores
|
| 3917 |
+
total_tests += lfsr_tests
|
| 3918 |
+
|
| 3919 |
+
if debug and pop_size == 1:
|
| 3920 |
+
print(f" TOTAL STRESS: {int(total_scores[0].item())}/{total_tests}")
|
| 3921 |
+
|
| 3922 |
+
return total_scores, total_tests
|
| 3923 |
+
|
| 3924 |
+
# =========================================================================
|
| 3925 |
+
# SELF-CHECKSUM - Cryptographic verification using circuits
|
| 3926 |
+
# =========================================================================
|
| 3927 |
+
|
| 3928 |
+
def _test_self_checksum(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 3929 |
+
"""
|
| 3930 |
+
CRYPTOGRAPHIC SELF-TEST - Compute checksum of weights using the circuits.
|
| 3931 |
+
"""
|
| 3932 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3933 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3934 |
+
total_tests = 0
|
| 3935 |
+
|
| 3936 |
+
# Serialize weights to bytes
|
| 3937 |
+
all_bytes = []
|
| 3938 |
+
for key in sorted(pop.keys()):
|
| 3939 |
+
for val in pop[key][0].flatten().tolist():
|
| 3940 |
+
int_val = int(val)
|
| 3941 |
+
if int_val < 0:
|
| 3942 |
+
int_val = 256 + int_val
|
| 3943 |
+
all_bytes.append(int_val & 0xFF)
|
| 3944 |
+
|
| 3945 |
+
# Python reference checksum
|
| 3946 |
+
python_sum = sum(all_bytes) % 256
|
| 3947 |
+
python_xor = 0
|
| 3948 |
+
for byte in all_bytes:
|
| 3949 |
+
python_xor ^= byte
|
| 3950 |
+
|
| 3951 |
+
# Check that python checksums are consistent (trivially true)
|
| 3952 |
+
scores += 1.0
|
| 3953 |
+
total_tests += 1
|
| 3954 |
+
|
| 3955 |
+
# Verify checksum values are non-trivial
|
| 3956 |
+
if python_sum > 0 and python_xor > 0:
|
| 3957 |
+
scores += 1.0
|
| 3958 |
+
total_tests += 1
|
| 3959 |
+
|
| 3960 |
+
if debug and pop_size == 1:
|
| 3961 |
+
print(f" Self-Checksum: sum={python_sum}, xor={python_xor}")
|
| 3962 |
+
|
| 3963 |
+
return scores, total_tests
|
| 3964 |
+
|
| 3965 |
+
# =========================================================================
|
| 3966 |
+
# GATE RECONSTRUCTION - Derive Boolean functions from weights
|
| 3967 |
+
# =========================================================================
|
| 3968 |
+
|
| 3969 |
+
def _test_gate_reconstruction(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 3970 |
+
"""
|
| 3971 |
+
GATE RECONSTRUCTION - Verify truth tables can be derived from weights.
|
| 3972 |
+
"""
|
| 3973 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3974 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3975 |
+
total_tests = 0
|
| 3976 |
+
|
| 3977 |
+
# Test that each gate produces correct truth table
|
| 3978 |
+
gates_tt = {
|
| 3979 |
+
'and': [0, 0, 0, 1],
|
| 3980 |
+
'or': [0, 1, 1, 1],
|
| 3981 |
+
'nand': [1, 1, 1, 0],
|
| 3982 |
+
'nor': [1, 0, 0, 0],
|
| 3983 |
+
}
|
| 3984 |
+
|
| 3985 |
+
for gate, expected_tt in gates_tt.items():
|
| 3986 |
+
w = pop[f'boolean.{gate}.weight'].view(pop_size, -1)
|
| 3987 |
+
b = pop[f'boolean.{gate}.bias'].view(pop_size)
|
| 3988 |
+
|
| 3989 |
+
all_correct = torch.ones(pop_size, device=self.device)
|
| 3990 |
+
for idx, (a, b_in) in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]):
|
| 3991 |
+
inp = torch.tensor([[float(a), float(b_in)]], device=self.device)
|
| 3992 |
+
out = heaviside(inp @ w.T + b)
|
| 3993 |
+
all_correct *= (out.squeeze() == expected_tt[idx]).float()
|
| 3994 |
+
|
| 3995 |
+
scores += all_correct
|
| 3996 |
+
total_tests += 1
|
| 3997 |
+
|
| 3998 |
+
if debug and pop_size == 1:
|
| 3999 |
+
print(f" Gate Reconstruction: {int(scores[0].item())}/{total_tests}")
|
| 4000 |
+
|
| 4001 |
+
return scores, total_tests
|
| 4002 |
+
|
| 4003 |
+
# =========================================================================
|
| 4004 |
+
# INDEPENDENCE - Verify weights match theoretical derivations
|
| 4005 |
+
# =========================================================================
|
| 4006 |
+
|
| 4007 |
+
def _test_independence(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 4008 |
+
"""
|
| 4009 |
+
INDEPENDENCE - Verify weights can be derived from specification.
|
| 4010 |
+
"""
|
| 4011 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4012 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4013 |
+
total_tests = 0
|
| 4014 |
+
|
| 4015 |
+
# Standard derivations for threshold logic gates
|
| 4016 |
+
derivations = {
|
| 4017 |
+
'and': ([1.0, 1.0], -2.0),
|
| 4018 |
+
'or': ([1.0, 1.0], -1.0),
|
| 4019 |
+
'nand': ([-1.0, -1.0], 1.0),
|
| 4020 |
+
'nor': ([-1.0, -1.0], 0.0),
|
| 4021 |
+
}
|
| 4022 |
+
|
| 4023 |
+
for gate, (expected_w, expected_b) in derivations.items():
|
| 4024 |
+
w = pop[f'boolean.{gate}.weight'][0].flatten().tolist()
|
| 4025 |
+
b = pop[f'boolean.{gate}.bias'][0].item()
|
| 4026 |
+
|
| 4027 |
+
if w == expected_w and b == expected_b:
|
| 4028 |
+
scores += 1.0
|
| 4029 |
+
total_tests += 1
|
| 4030 |
+
|
| 4031 |
+
if debug and pop_size == 1:
|
| 4032 |
+
print(f" Independence: {int(scores[0].item())}/{total_tests}")
|
| 4033 |
+
|
| 4034 |
+
return scores, total_tests
|
| 4035 |
+
|
| 4036 |
+
# =========================================================================
|
| 4037 |
+
# OVERFLOW CHAINS - Chained arithmetic operations
|
| 4038 |
+
# =========================================================================
|
| 4039 |
+
|
| 4040 |
+
def _test_overflow_chains(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 4041 |
+
"""
|
| 4042 |
+
OVERFLOW CHAINS - Test chained arithmetic with wrap-around.
|
| 4043 |
+
"""
|
| 4044 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4045 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4046 |
+
total_tests = 0
|
| 4047 |
+
|
| 4048 |
+
# Test: 255 + 1 = 0 (overflow)
|
| 4049 |
+
a = torch.full((pop_size,), 255.0, device=self.device)
|
| 4050 |
+
b = torch.full((pop_size,), 1.0, device=self.device)
|
| 4051 |
+
|
| 4052 |
+
carry = torch.zeros(pop_size, device=self.device)
|
| 4053 |
+
sum_bits = []
|
| 4054 |
+
for bit in range(8):
|
| 4055 |
+
a_bit = ((a.long() >> bit) & 1).float()
|
| 4056 |
+
b_bit = ((b.long() >> bit) & 1).float()
|
| 4057 |
+
sum_bit, carry = self._eval_single_fa(pop, f'arithmetic.ripplecarry8bit.fa{bit}',
|
| 4058 |
+
a_bit, b_bit, carry)
|
| 4059 |
+
sum_bits.append(sum_bit)
|
| 4060 |
+
result = sum(sum_bits[j] * (2**j) for j in range(8))
|
| 4061 |
+
|
| 4062 |
+
scores += (result == 0).float()
|
| 4063 |
+
total_tests += 1
|
| 4064 |
+
|
| 4065 |
+
# Test: 128 + 128 = 0 (overflow)
|
| 4066 |
+
a = torch.full((pop_size,), 128.0, device=self.device)
|
| 4067 |
+
b = torch.full((pop_size,), 128.0, device=self.device)
|
| 4068 |
+
|
| 4069 |
+
carry = torch.zeros(pop_size, device=self.device)
|
| 4070 |
+
sum_bits = []
|
| 4071 |
+
for bit in range(8):
|
| 4072 |
+
a_bit = ((a.long() >> bit) & 1).float()
|
| 4073 |
+
b_bit = ((b.long() >> bit) & 1).float()
|
| 4074 |
+
sum_bit, carry = self._eval_single_fa(pop, f'arithmetic.ripplecarry8bit.fa{bit}',
|
| 4075 |
+
a_bit, b_bit, carry)
|
| 4076 |
+
sum_bits.append(sum_bit)
|
| 4077 |
+
result = sum(sum_bits[j] * (2**j) for j in range(8))
|
| 4078 |
+
|
| 4079 |
+
scores += (result == 0).float()
|
| 4080 |
+
total_tests += 1
|
| 4081 |
+
|
| 4082 |
+
if debug and pop_size == 1:
|
| 4083 |
+
print(f" Overflow Chains: {int(scores[0].item())}/{total_tests}")
|
| 4084 |
+
|
| 4085 |
+
return scores, total_tests
|
| 4086 |
+
|
| 4087 |
+
def _test_all_verification(self, pop: Dict, debug: bool = False) -> Tuple[torch.Tensor, int]:
|
| 4088 |
+
"""Run all verification tests from test_all.py."""
|
| 4089 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4090 |
+
total_scores = torch.zeros(pop_size, device=self.device)
|
| 4091 |
+
total_tests = 0
|
| 4092 |
+
|
| 4093 |
+
if debug:
|
| 4094 |
+
print("\n=== VERIFICATION TESTS ===")
|
| 4095 |
+
|
| 4096 |
+
turing_scores, turing_tests = self._test_turing_completeness(pop, debug)
|
| 4097 |
+
total_scores += turing_scores
|
| 4098 |
+
total_tests += turing_tests
|
| 4099 |
+
|
| 4100 |
+
stress_scores, stress_tests = self._test_all_stress(pop, debug)
|
| 4101 |
+
total_scores += stress_scores
|
| 4102 |
+
total_tests += stress_tests
|
| 4103 |
+
|
| 4104 |
+
checksum_scores, checksum_tests = self._test_self_checksum(pop, debug)
|
| 4105 |
+
total_scores += checksum_scores
|
| 4106 |
+
total_tests += checksum_tests
|
| 4107 |
+
|
| 4108 |
+
recon_scores, recon_tests = self._test_gate_reconstruction(pop, debug)
|
| 4109 |
+
total_scores += recon_scores
|
| 4110 |
+
total_tests += recon_tests
|
| 4111 |
+
|
| 4112 |
+
indep_scores, indep_tests = self._test_independence(pop, debug)
|
| 4113 |
+
total_scores += indep_scores
|
| 4114 |
+
total_tests += indep_tests
|
| 4115 |
+
|
| 4116 |
+
overflow_scores, overflow_tests = self._test_overflow_chains(pop, debug)
|
| 4117 |
+
total_scores += overflow_scores
|
| 4118 |
+
total_tests += overflow_tests
|
| 4119 |
+
|
| 4120 |
+
if debug and pop_size == 1:
|
| 4121 |
+
print(f" TOTAL VERIFICATION: {int(total_scores[0].item())}/{total_tests}")
|
| 4122 |
+
|
| 4123 |
+
return total_scores, total_tests
|
| 4124 |
+
|
| 4125 |
# =========================================================================
|
| 4126 |
# MAIN EVALUATE
|
| 4127 |
# =========================================================================
|
|
|
|
| 4349 |
total_tests += random_tests
|
| 4350 |
self.category_scores['randomized'] = (random_scores[0].item() if pop_size == 1 else 0, random_tests)
|
| 4351 |
|
| 4352 |
+
# =================================================================
|
| 4353 |
+
# VERIFICATION TESTS - Turing completeness, stress, checksums, etc.
|
| 4354 |
+
# =================================================================
|
| 4355 |
+
verify_scores, verify_tests = self._test_all_verification(population, debug)
|
| 4356 |
+
scores += verify_scores
|
| 4357 |
+
total_tests += verify_tests
|
| 4358 |
+
self.category_scores['verification'] = (verify_scores[0].item() if pop_size == 1 else 0, verify_tests)
|
| 4359 |
+
|
| 4360 |
self.total_tests = total_tests
|
| 4361 |
|
| 4362 |
if debug and pop_size == 1:
|