Add IEEE 754 float16/float32 arithmetic circuits
Browse filesFloat16 (half-precision):
- Core: unpack/pack/classify/normalize with 4-stage barrel shifter
- ADD: exponent compare, mantissa alignment, add/sub with mux select
- MUL: sign XOR, exponent add, bias subtract, 11x11 mantissa multiply
- DIV: 11-stage restoring division with mux per stage
- CMP: NaN/zero detection, magnitude compare (EQ/LT/GT/LE/GE)
Float32 (single-precision):
- Core: 5-stage normalize, 8-bit exponent, 23-bit mantissa
- ADD: 5-stage alignment, 25-bit mantissa operations
- MUL: 24x24 mantissa multiply (576 partial products)
- DIV: 24-stage restoring division
- CMP: same structure with larger weight vectors
Integration:
- Wire float circuits into cmd_alu build command
- Add float prefixes to drop_list for clean rebuilds
- Add comprehensive eval tests for all float circuit shapes
Test results:
- 8-bit CPU: 6772/6772 (100%)
- 32-bit ALU: 7239/7256 (99.8%, only pre-existing priority encoder issue)
- build.py +729 -0
- eval.py +624 -1
- neural_alu32.safetensors +2 -2
- neural_computer.safetensors +2 -2
|
@@ -998,6 +998,642 @@ def add_neg_nbits(tensors: Dict[str, torch.Tensor], bits: int) -> None:
|
|
| 998 |
add_gate(tensors, f"alu.alu{bits}bit.neg.inc.bit{bit}.carry", [1.0, 1.0], [-2.0])
|
| 999 |
|
| 1000 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1001 |
def update_manifest(tensors: Dict[str, torch.Tensor], data_bits: int, addr_bits: int, mem_bytes: int) -> None:
|
| 1002 |
"""Update manifest metadata tensors.
|
| 1003 |
|
|
@@ -2243,6 +2879,7 @@ def cmd_alu(args) -> None:
|
|
| 2243 |
"arithmetic.greaterorequal8bit.", "arithmetic.lessorequal8bit.",
|
| 2244 |
"arithmetic.equality8bit.", "arithmetic.add3_8bit.", "arithmetic.expr_add_mul.", "arithmetic.expr_paren.",
|
| 2245 |
"combinational.barrelshifter.", "combinational.priorityencoder.",
|
|
|
|
| 2246 |
]
|
| 2247 |
|
| 2248 |
if bits in [16, 32]:
|
|
@@ -2397,6 +3034,98 @@ def cmd_alu(args) -> None:
|
|
| 2397 |
except ValueError as e:
|
| 2398 |
print(f" {bits}-bit NEG already exists: {e}")
|
| 2399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2400 |
if args.apply:
|
| 2401 |
print(f"\nSaving: {args.model}")
|
| 2402 |
save_file(tensors, str(args.model))
|
|
|
|
| 998 |
add_gate(tensors, f"alu.alu{bits}bit.neg.inc.bit{bit}.carry", [1.0, 1.0], [-2.0])
|
| 999 |
|
| 1000 |
|
| 1001 |
+
def add_float16_core(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1002 |
+
"""Add float16 core circuits (unpack, pack, classify, normalize).
|
| 1003 |
+
|
| 1004 |
+
IEEE 754 half-precision format (16 bits):
|
| 1005 |
+
- Bit 15: Sign (0=positive, 1=negative)
|
| 1006 |
+
- Bits 14-10: Exponent (5 bits, bias=15)
|
| 1007 |
+
- Bits 9-0: Mantissa/fraction (10 bits, implicit leading 1 for normalized)
|
| 1008 |
+
|
| 1009 |
+
Special values:
|
| 1010 |
+
- Zero: exp=0, frac=0
|
| 1011 |
+
- Subnormal: exp=0, frac≠0
|
| 1012 |
+
- Infinity: exp=31, frac=0
|
| 1013 |
+
- NaN: exp=31, frac≠0
|
| 1014 |
+
"""
|
| 1015 |
+
prefix = "float16"
|
| 1016 |
+
|
| 1017 |
+
for i in range(16):
|
| 1018 |
+
add_gate(tensors, f"{prefix}.unpack.bit{i}", [1.0], [0.0])
|
| 1019 |
+
|
| 1020 |
+
add_gate(tensors, f"{prefix}.classify.exp_zero", [-1.0] * 5, [0.0])
|
| 1021 |
+
add_gate(tensors, f"{prefix}.classify.exp_max", [1.0] * 5, [-5.0])
|
| 1022 |
+
add_gate(tensors, f"{prefix}.classify.frac_zero", [-1.0] * 10, [0.0])
|
| 1023 |
+
add_gate(tensors, f"{prefix}.classify.frac_nonzero", [1.0] * 10, [-1.0])
|
| 1024 |
+
|
| 1025 |
+
add_gate(tensors, f"{prefix}.classify.is_zero.and", [1.0, 1.0], [-2.0])
|
| 1026 |
+
add_gate(tensors, f"{prefix}.classify.is_subnormal.and", [1.0, 1.0], [-2.0])
|
| 1027 |
+
add_gate(tensors, f"{prefix}.classify.is_inf.and", [1.0, 1.0], [-2.0])
|
| 1028 |
+
add_gate(tensors, f"{prefix}.classify.is_nan.and", [1.0, 1.0], [-2.0])
|
| 1029 |
+
|
| 1030 |
+
for stage in range(4):
|
| 1031 |
+
shift = 1 << (3 - stage)
|
| 1032 |
+
for bit in range(11):
|
| 1033 |
+
add_gate(tensors, f"{prefix}.normalize.stage{stage}.bit{bit}.not_sel", [-1.0], [0.0])
|
| 1034 |
+
add_gate(tensors, f"{prefix}.normalize.stage{stage}.bit{bit}.and_a", [1.0, 1.0], [-2.0])
|
| 1035 |
+
add_gate(tensors, f"{prefix}.normalize.stage{stage}.bit{bit}.and_b", [1.0, 1.0], [-2.0])
|
| 1036 |
+
add_gate(tensors, f"{prefix}.normalize.stage{stage}.bit{bit}.or", [1.0, 1.0], [-1.0])
|
| 1037 |
+
|
| 1038 |
+
for stage in range(4):
|
| 1039 |
+
shift = 1 << (3 - stage)
|
| 1040 |
+
for bit in range(5):
|
| 1041 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1042 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1043 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1044 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1045 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1046 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1047 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1048 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1049 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1050 |
+
|
| 1051 |
+
for i in range(16):
|
| 1052 |
+
add_gate(tensors, f"{prefix}.pack.bit{i}", [1.0], [0.0])
|
| 1053 |
+
|
| 1054 |
+
|
| 1055 |
+
def add_float16_add(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1056 |
+
"""Add float16 addition circuit.
|
| 1057 |
+
|
| 1058 |
+
Algorithm:
|
| 1059 |
+
1. Unpack both operands
|
| 1060 |
+
2. Compare exponents, align mantissas
|
| 1061 |
+
3. Add/subtract mantissas based on signs
|
| 1062 |
+
4. Normalize result
|
| 1063 |
+
5. Handle special cases (inf, nan, zero)
|
| 1064 |
+
"""
|
| 1065 |
+
prefix = "float16.add"
|
| 1066 |
+
|
| 1067 |
+
pos_weights = [float(1 << (4 - i)) for i in range(5)]
|
| 1068 |
+
neg_weights = [-w for w in pos_weights]
|
| 1069 |
+
add_gate(tensors, f"{prefix}.exp_cmp.a_gt_b", pos_weights + neg_weights, [-1.0])
|
| 1070 |
+
add_gate(tensors, f"{prefix}.exp_cmp.a_lt_b", neg_weights + pos_weights, [-1.0])
|
| 1071 |
+
|
| 1072 |
+
for bit in range(5):
|
| 1073 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1074 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1075 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1076 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1077 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1078 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1079 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1080 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1081 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1082 |
+
add_gate(tensors, f"{prefix}.exp_diff.not_b.bit{bit}", [-1.0], [0.0])
|
| 1083 |
+
|
| 1084 |
+
for stage in range(4):
|
| 1085 |
+
shift = 1 << (3 - stage)
|
| 1086 |
+
for bit in range(11):
|
| 1087 |
+
add_gate(tensors, f"{prefix}.align.stage{stage}.bit{bit}.not_sel", [-1.0], [0.0])
|
| 1088 |
+
add_gate(tensors, f"{prefix}.align.stage{stage}.bit{bit}.and_a", [1.0, 1.0], [-2.0])
|
| 1089 |
+
add_gate(tensors, f"{prefix}.align.stage{stage}.bit{bit}.and_b", [1.0, 1.0], [-2.0])
|
| 1090 |
+
add_gate(tensors, f"{prefix}.align.stage{stage}.bit{bit}.or", [1.0, 1.0], [-1.0])
|
| 1091 |
+
|
| 1092 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.or", [1.0, 1.0], [-1.0])
|
| 1093 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1094 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer2", [1.0, 1.0], [-2.0])
|
| 1095 |
+
|
| 1096 |
+
for bit in range(12):
|
| 1097 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1098 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1099 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1100 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1101 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1102 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1103 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1104 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1105 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1106 |
+
|
| 1107 |
+
for bit in range(11):
|
| 1108 |
+
add_gate(tensors, f"{prefix}.mant_sub.not_b.bit{bit}", [-1.0], [0.0])
|
| 1109 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1110 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1111 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1112 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1113 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1114 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1115 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1116 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1117 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1118 |
+
|
| 1119 |
+
for bit in range(11):
|
| 1120 |
+
add_gate(tensors, f"{prefix}.mant_select.bit{bit}.not_sel", [-1.0], [0.0])
|
| 1121 |
+
add_gate(tensors, f"{prefix}.mant_select.bit{bit}.and_add", [1.0, 1.0], [-2.0])
|
| 1122 |
+
add_gate(tensors, f"{prefix}.mant_select.bit{bit}.and_sub", [1.0, 1.0], [-2.0])
|
| 1123 |
+
add_gate(tensors, f"{prefix}.mant_select.bit{bit}.or", [1.0, 1.0], [-1.0])
|
| 1124 |
+
|
| 1125 |
+
|
| 1126 |
+
def add_float16_mul(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1127 |
+
"""Add float16 multiplication circuit.
|
| 1128 |
+
|
| 1129 |
+
Algorithm:
|
| 1130 |
+
1. Unpack both operands
|
| 1131 |
+
2. XOR signs for result sign
|
| 1132 |
+
3. Add exponents (subtract bias)
|
| 1133 |
+
4. Multiply mantissas (11x11 -> 22 bits)
|
| 1134 |
+
5. Normalize result
|
| 1135 |
+
6. Handle special cases
|
| 1136 |
+
"""
|
| 1137 |
+
prefix = "float16.mul"
|
| 1138 |
+
|
| 1139 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.or", [1.0, 1.0], [-1.0])
|
| 1140 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1141 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer2", [1.0, 1.0], [-2.0])
|
| 1142 |
+
|
| 1143 |
+
for bit in range(6):
|
| 1144 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1145 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1146 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1147 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1148 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1149 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1150 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1151 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1152 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1153 |
+
|
| 1154 |
+
for bit in range(5):
|
| 1155 |
+
add_gate(tensors, f"{prefix}.bias_sub.not_bias.bit{bit}", [-1.0], [0.0])
|
| 1156 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1157 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1158 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1159 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1160 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1161 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1162 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1163 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1164 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1165 |
+
|
| 1166 |
+
for i in range(11):
|
| 1167 |
+
for j in range(11):
|
| 1168 |
+
add_gate(tensors, f"{prefix}.mant_mul.pp.a{i}b{j}", [1.0, 1.0], [-2.0])
|
| 1169 |
+
|
| 1170 |
+
for stage in range(10):
|
| 1171 |
+
for bit in range(22):
|
| 1172 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1173 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1174 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1175 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1176 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1177 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1178 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1179 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1180 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1181 |
+
|
| 1182 |
+
|
| 1183 |
+
def add_float16_div(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1184 |
+
"""Add float16 division circuit.
|
| 1185 |
+
|
| 1186 |
+
Algorithm:
|
| 1187 |
+
1. Unpack both operands
|
| 1188 |
+
2. XOR signs for result sign
|
| 1189 |
+
3. Subtract exponents (add bias)
|
| 1190 |
+
4. Divide mantissas (restoring division)
|
| 1191 |
+
5. Normalize result
|
| 1192 |
+
6. Handle special cases (div by zero -> inf)
|
| 1193 |
+
"""
|
| 1194 |
+
prefix = "float16.div"
|
| 1195 |
+
|
| 1196 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.or", [1.0, 1.0], [-1.0])
|
| 1197 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1198 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer2", [1.0, 1.0], [-2.0])
|
| 1199 |
+
|
| 1200 |
+
for bit in range(5):
|
| 1201 |
+
add_gate(tensors, f"{prefix}.exp_sub.not_b.bit{bit}", [-1.0], [0.0])
|
| 1202 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1203 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1204 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1205 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1206 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1207 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1208 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1209 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1210 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1211 |
+
|
| 1212 |
+
for bit in range(5):
|
| 1213 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1214 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1215 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1216 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1217 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1218 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1219 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1220 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1221 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1222 |
+
|
| 1223 |
+
for stage in range(11):
|
| 1224 |
+
pos_weights = [float(1 << (10 - i)) for i in range(11)]
|
| 1225 |
+
neg_weights = [-w for w in pos_weights]
|
| 1226 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.cmp", pos_weights + neg_weights, [0.0])
|
| 1227 |
+
|
| 1228 |
+
for bit in range(11):
|
| 1229 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.not_d.bit{bit}", [-1.0], [0.0])
|
| 1230 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1231 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1232 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1233 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1234 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1235 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1236 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1237 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1238 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1239 |
+
|
| 1240 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.mux.bit{bit}.not_sel", [-1.0], [0.0])
|
| 1241 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.mux.bit{bit}.and_old", [1.0, 1.0], [-2.0])
|
| 1242 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.mux.bit{bit}.and_new", [1.0, 1.0], [-2.0])
|
| 1243 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.mux.bit{bit}.or", [1.0, 1.0], [-1.0])
|
| 1244 |
+
|
| 1245 |
+
|
| 1246 |
+
def add_float16_cmp(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1247 |
+
"""Add float16 comparison circuits (EQ, LT, LE, GT, GE).
|
| 1248 |
+
|
| 1249 |
+
Float comparison:
|
| 1250 |
+
1. Handle NaN (any comparison with NaN is false except NaN != NaN)
|
| 1251 |
+
2. Handle signed zeros (+0 == -0)
|
| 1252 |
+
3. For same signs: compare as integers (exponent then mantissa)
|
| 1253 |
+
4. For different signs: negative < positive (unless both zero)
|
| 1254 |
+
"""
|
| 1255 |
+
prefix = "float16.cmp"
|
| 1256 |
+
|
| 1257 |
+
add_gate(tensors, f"{prefix}.a.exp_max", [1.0] * 5, [-5.0])
|
| 1258 |
+
add_gate(tensors, f"{prefix}.a.frac_nz", [1.0] * 10, [-1.0])
|
| 1259 |
+
add_gate(tensors, f"{prefix}.a.is_nan", [1.0, 1.0], [-2.0])
|
| 1260 |
+
|
| 1261 |
+
add_gate(tensors, f"{prefix}.b.exp_max", [1.0] * 5, [-5.0])
|
| 1262 |
+
add_gate(tensors, f"{prefix}.b.frac_nz", [1.0] * 10, [-1.0])
|
| 1263 |
+
add_gate(tensors, f"{prefix}.b.is_nan", [1.0, 1.0], [-2.0])
|
| 1264 |
+
|
| 1265 |
+
add_gate(tensors, f"{prefix}.either_nan", [1.0, 1.0], [-1.0])
|
| 1266 |
+
|
| 1267 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.or", [1.0, 1.0], [-1.0])
|
| 1268 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1269 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer2", [1.0, 1.0], [-2.0])
|
| 1270 |
+
|
| 1271 |
+
add_gate(tensors, f"{prefix}.a.is_zero.exp_zero", [-1.0] * 5, [0.0])
|
| 1272 |
+
add_gate(tensors, f"{prefix}.a.is_zero.frac_zero", [-1.0] * 10, [0.0])
|
| 1273 |
+
add_gate(tensors, f"{prefix}.a.is_zero.and", [1.0, 1.0], [-2.0])
|
| 1274 |
+
|
| 1275 |
+
add_gate(tensors, f"{prefix}.b.is_zero.exp_zero", [-1.0] * 5, [0.0])
|
| 1276 |
+
add_gate(tensors, f"{prefix}.b.is_zero.frac_zero", [-1.0] * 10, [0.0])
|
| 1277 |
+
add_gate(tensors, f"{prefix}.b.is_zero.and", [1.0, 1.0], [-2.0])
|
| 1278 |
+
|
| 1279 |
+
add_gate(tensors, f"{prefix}.both_zero", [1.0, 1.0], [-2.0])
|
| 1280 |
+
|
| 1281 |
+
pos_weights = [float(1 << (14 - i)) for i in range(15)]
|
| 1282 |
+
neg_weights = [-w for w in pos_weights]
|
| 1283 |
+
add_gate(tensors, f"{prefix}.mag_a_gt_b", pos_weights + neg_weights, [-1.0])
|
| 1284 |
+
add_gate(tensors, f"{prefix}.mag_a_ge_b", pos_weights + neg_weights, [0.0])
|
| 1285 |
+
add_gate(tensors, f"{prefix}.mag_a_lt_b", neg_weights + pos_weights, [-1.0])
|
| 1286 |
+
add_gate(tensors, f"{prefix}.mag_a_le_b", neg_weights + pos_weights, [0.0])
|
| 1287 |
+
|
| 1288 |
+
add_gate(tensors, f"{prefix}.mag_eq.geq", pos_weights + neg_weights, [0.0])
|
| 1289 |
+
add_gate(tensors, f"{prefix}.mag_eq.leq", neg_weights + pos_weights, [0.0])
|
| 1290 |
+
add_gate(tensors, f"{prefix}.mag_eq.and", [1.0, 1.0], [-2.0])
|
| 1291 |
+
|
| 1292 |
+
add_gate(tensors, f"{prefix}.eq.not_nan", [-1.0], [0.0])
|
| 1293 |
+
add_gate(tensors, f"{prefix}.eq.mag_or_zero", [1.0, 1.0], [-1.0])
|
| 1294 |
+
add_gate(tensors, f"{prefix}.eq.same_sign_or_zero", [1.0, 1.0], [-1.0])
|
| 1295 |
+
add_gate(tensors, f"{prefix}.eq.result", [1.0, 1.0], [-2.0])
|
| 1296 |
+
|
| 1297 |
+
add_gate(tensors, f"{prefix}.lt.not_nan", [-1.0], [0.0])
|
| 1298 |
+
add_gate(tensors, f"{prefix}.lt.diff_sign.not_a_sign", [-1.0], [0.0])
|
| 1299 |
+
add_gate(tensors, f"{prefix}.lt.diff_sign.a_neg", [1.0, 1.0], [-2.0])
|
| 1300 |
+
add_gate(tensors, f"{prefix}.lt.same_sign.pos_lt", [1.0, 1.0], [-2.0])
|
| 1301 |
+
add_gate(tensors, f"{prefix}.lt.same_sign.neg_gt", [1.0, 1.0], [-2.0])
|
| 1302 |
+
add_gate(tensors, f"{prefix}.lt.same_sign.or", [1.0, 1.0], [-1.0])
|
| 1303 |
+
add_gate(tensors, f"{prefix}.lt.case_or", [1.0, 1.0], [-1.0])
|
| 1304 |
+
add_gate(tensors, f"{prefix}.lt.not_both_zero", [-1.0], [0.0])
|
| 1305 |
+
add_gate(tensors, f"{prefix}.lt.result", [1.0, 1.0, 1.0], [-3.0])
|
| 1306 |
+
|
| 1307 |
+
add_gate(tensors, f"{prefix}.gt.not_nan", [-1.0], [0.0])
|
| 1308 |
+
add_gate(tensors, f"{prefix}.gt.diff_sign.not_b_sign", [-1.0], [0.0])
|
| 1309 |
+
add_gate(tensors, f"{prefix}.gt.diff_sign.b_neg", [1.0, 1.0], [-2.0])
|
| 1310 |
+
add_gate(tensors, f"{prefix}.gt.same_sign.pos_gt", [1.0, 1.0], [-2.0])
|
| 1311 |
+
add_gate(tensors, f"{prefix}.gt.same_sign.neg_lt", [1.0, 1.0], [-2.0])
|
| 1312 |
+
add_gate(tensors, f"{prefix}.gt.same_sign.or", [1.0, 1.0], [-1.0])
|
| 1313 |
+
add_gate(tensors, f"{prefix}.gt.case_or", [1.0, 1.0], [-1.0])
|
| 1314 |
+
add_gate(tensors, f"{prefix}.gt.not_both_zero", [-1.0], [0.0])
|
| 1315 |
+
add_gate(tensors, f"{prefix}.gt.result", [1.0, 1.0, 1.0], [-3.0])
|
| 1316 |
+
|
| 1317 |
+
add_gate(tensors, f"{prefix}.le.eq_or_lt", [1.0, 1.0], [-1.0])
|
| 1318 |
+
add_gate(tensors, f"{prefix}.le.not_nan", [-1.0], [0.0])
|
| 1319 |
+
add_gate(tensors, f"{prefix}.le.result", [1.0, 1.0], [-2.0])
|
| 1320 |
+
|
| 1321 |
+
add_gate(tensors, f"{prefix}.ge.eq_or_gt", [1.0, 1.0], [-1.0])
|
| 1322 |
+
add_gate(tensors, f"{prefix}.ge.not_nan", [-1.0], [0.0])
|
| 1323 |
+
add_gate(tensors, f"{prefix}.ge.result", [1.0, 1.0], [-2.0])
|
| 1324 |
+
|
| 1325 |
+
|
| 1326 |
+
def add_float32_core(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1327 |
+
"""Add float32 core circuits (unpack, pack, classify, normalize).
|
| 1328 |
+
|
| 1329 |
+
IEEE 754 single-precision format (32 bits):
|
| 1330 |
+
- Bit 31: Sign
|
| 1331 |
+
- Bits 30-23: Exponent (8 bits, bias=127)
|
| 1332 |
+
- Bits 22-0: Mantissa (23 bits, implicit leading 1)
|
| 1333 |
+
"""
|
| 1334 |
+
prefix = "float32"
|
| 1335 |
+
|
| 1336 |
+
for i in range(32):
|
| 1337 |
+
add_gate(tensors, f"{prefix}.unpack.bit{i}", [1.0], [0.0])
|
| 1338 |
+
|
| 1339 |
+
add_gate(tensors, f"{prefix}.classify.exp_zero", [-1.0] * 8, [0.0])
|
| 1340 |
+
add_gate(tensors, f"{prefix}.classify.exp_max", [1.0] * 8, [-8.0])
|
| 1341 |
+
add_gate(tensors, f"{prefix}.classify.frac_zero", [-1.0] * 23, [0.0])
|
| 1342 |
+
add_gate(tensors, f"{prefix}.classify.frac_nonzero", [1.0] * 23, [-1.0])
|
| 1343 |
+
|
| 1344 |
+
add_gate(tensors, f"{prefix}.classify.is_zero.and", [1.0, 1.0], [-2.0])
|
| 1345 |
+
add_gate(tensors, f"{prefix}.classify.is_subnormal.and", [1.0, 1.0], [-2.0])
|
| 1346 |
+
add_gate(tensors, f"{prefix}.classify.is_inf.and", [1.0, 1.0], [-2.0])
|
| 1347 |
+
add_gate(tensors, f"{prefix}.classify.is_nan.and", [1.0, 1.0], [-2.0])
|
| 1348 |
+
|
| 1349 |
+
for stage in range(5):
|
| 1350 |
+
shift = 1 << (4 - stage)
|
| 1351 |
+
for bit in range(24):
|
| 1352 |
+
add_gate(tensors, f"{prefix}.normalize.stage{stage}.bit{bit}.not_sel", [-1.0], [0.0])
|
| 1353 |
+
add_gate(tensors, f"{prefix}.normalize.stage{stage}.bit{bit}.and_a", [1.0, 1.0], [-2.0])
|
| 1354 |
+
add_gate(tensors, f"{prefix}.normalize.stage{stage}.bit{bit}.and_b", [1.0, 1.0], [-2.0])
|
| 1355 |
+
add_gate(tensors, f"{prefix}.normalize.stage{stage}.bit{bit}.or", [1.0, 1.0], [-1.0])
|
| 1356 |
+
|
| 1357 |
+
for stage in range(5):
|
| 1358 |
+
for bit in range(8):
|
| 1359 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1360 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1361 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1362 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1363 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1364 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1365 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1366 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1367 |
+
add_gate(tensors, f"{prefix}.normalize.exp_adj.stage{stage}.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1368 |
+
|
| 1369 |
+
for i in range(32):
|
| 1370 |
+
add_gate(tensors, f"{prefix}.pack.bit{i}", [1.0], [0.0])
|
| 1371 |
+
|
| 1372 |
+
|
| 1373 |
+
def add_float32_cmp(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1374 |
+
"""Add float32 comparison circuits (EQ, LT, LE, GT, GE)."""
|
| 1375 |
+
prefix = "float32.cmp"
|
| 1376 |
+
|
| 1377 |
+
add_gate(tensors, f"{prefix}.a.exp_max", [1.0] * 8, [-8.0])
|
| 1378 |
+
add_gate(tensors, f"{prefix}.a.frac_nz", [1.0] * 23, [-1.0])
|
| 1379 |
+
add_gate(tensors, f"{prefix}.a.is_nan", [1.0, 1.0], [-2.0])
|
| 1380 |
+
|
| 1381 |
+
add_gate(tensors, f"{prefix}.b.exp_max", [1.0] * 8, [-8.0])
|
| 1382 |
+
add_gate(tensors, f"{prefix}.b.frac_nz", [1.0] * 23, [-1.0])
|
| 1383 |
+
add_gate(tensors, f"{prefix}.b.is_nan", [1.0, 1.0], [-2.0])
|
| 1384 |
+
|
| 1385 |
+
add_gate(tensors, f"{prefix}.either_nan", [1.0, 1.0], [-1.0])
|
| 1386 |
+
|
| 1387 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.or", [1.0, 1.0], [-1.0])
|
| 1388 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1389 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer2", [1.0, 1.0], [-2.0])
|
| 1390 |
+
|
| 1391 |
+
add_gate(tensors, f"{prefix}.a.is_zero.exp_zero", [-1.0] * 8, [0.0])
|
| 1392 |
+
add_gate(tensors, f"{prefix}.a.is_zero.frac_zero", [-1.0] * 23, [0.0])
|
| 1393 |
+
add_gate(tensors, f"{prefix}.a.is_zero.and", [1.0, 1.0], [-2.0])
|
| 1394 |
+
|
| 1395 |
+
add_gate(tensors, f"{prefix}.b.is_zero.exp_zero", [-1.0] * 8, [0.0])
|
| 1396 |
+
add_gate(tensors, f"{prefix}.b.is_zero.frac_zero", [-1.0] * 23, [0.0])
|
| 1397 |
+
add_gate(tensors, f"{prefix}.b.is_zero.and", [1.0, 1.0], [-2.0])
|
| 1398 |
+
|
| 1399 |
+
add_gate(tensors, f"{prefix}.both_zero", [1.0, 1.0], [-2.0])
|
| 1400 |
+
|
| 1401 |
+
pos_weights = [float(1 << (30 - i)) for i in range(31)]
|
| 1402 |
+
neg_weights = [-w for w in pos_weights]
|
| 1403 |
+
add_gate(tensors, f"{prefix}.mag_a_gt_b", pos_weights + neg_weights, [-1.0])
|
| 1404 |
+
add_gate(tensors, f"{prefix}.mag_a_ge_b", pos_weights + neg_weights, [0.0])
|
| 1405 |
+
add_gate(tensors, f"{prefix}.mag_a_lt_b", neg_weights + pos_weights, [-1.0])
|
| 1406 |
+
add_gate(tensors, f"{prefix}.mag_a_le_b", neg_weights + pos_weights, [0.0])
|
| 1407 |
+
|
| 1408 |
+
add_gate(tensors, f"{prefix}.mag_eq.geq", pos_weights + neg_weights, [0.0])
|
| 1409 |
+
add_gate(tensors, f"{prefix}.mag_eq.leq", neg_weights + pos_weights, [0.0])
|
| 1410 |
+
add_gate(tensors, f"{prefix}.mag_eq.and", [1.0, 1.0], [-2.0])
|
| 1411 |
+
|
| 1412 |
+
add_gate(tensors, f"{prefix}.eq.not_nan", [-1.0], [0.0])
|
| 1413 |
+
add_gate(tensors, f"{prefix}.eq.mag_or_zero", [1.0, 1.0], [-1.0])
|
| 1414 |
+
add_gate(tensors, f"{prefix}.eq.same_sign_or_zero", [1.0, 1.0], [-1.0])
|
| 1415 |
+
add_gate(tensors, f"{prefix}.eq.result", [1.0, 1.0], [-2.0])
|
| 1416 |
+
|
| 1417 |
+
add_gate(tensors, f"{prefix}.lt.not_nan", [-1.0], [0.0])
|
| 1418 |
+
add_gate(tensors, f"{prefix}.lt.diff_sign.not_a_sign", [-1.0], [0.0])
|
| 1419 |
+
add_gate(tensors, f"{prefix}.lt.diff_sign.a_neg", [1.0, 1.0], [-2.0])
|
| 1420 |
+
add_gate(tensors, f"{prefix}.lt.same_sign.pos_lt", [1.0, 1.0], [-2.0])
|
| 1421 |
+
add_gate(tensors, f"{prefix}.lt.same_sign.neg_gt", [1.0, 1.0], [-2.0])
|
| 1422 |
+
add_gate(tensors, f"{prefix}.lt.same_sign.or", [1.0, 1.0], [-1.0])
|
| 1423 |
+
add_gate(tensors, f"{prefix}.lt.case_or", [1.0, 1.0], [-1.0])
|
| 1424 |
+
add_gate(tensors, f"{prefix}.lt.not_both_zero", [-1.0], [0.0])
|
| 1425 |
+
add_gate(tensors, f"{prefix}.lt.result", [1.0, 1.0, 1.0], [-3.0])
|
| 1426 |
+
|
| 1427 |
+
add_gate(tensors, f"{prefix}.gt.not_nan", [-1.0], [0.0])
|
| 1428 |
+
add_gate(tensors, f"{prefix}.gt.diff_sign.not_b_sign", [-1.0], [0.0])
|
| 1429 |
+
add_gate(tensors, f"{prefix}.gt.diff_sign.b_neg", [1.0, 1.0], [-2.0])
|
| 1430 |
+
add_gate(tensors, f"{prefix}.gt.same_sign.pos_gt", [1.0, 1.0], [-2.0])
|
| 1431 |
+
add_gate(tensors, f"{prefix}.gt.same_sign.neg_lt", [1.0, 1.0], [-2.0])
|
| 1432 |
+
add_gate(tensors, f"{prefix}.gt.same_sign.or", [1.0, 1.0], [-1.0])
|
| 1433 |
+
add_gate(tensors, f"{prefix}.gt.case_or", [1.0, 1.0], [-1.0])
|
| 1434 |
+
add_gate(tensors, f"{prefix}.gt.not_both_zero", [-1.0], [0.0])
|
| 1435 |
+
add_gate(tensors, f"{prefix}.gt.result", [1.0, 1.0, 1.0], [-3.0])
|
| 1436 |
+
|
| 1437 |
+
add_gate(tensors, f"{prefix}.le.eq_or_lt", [1.0, 1.0], [-1.0])
|
| 1438 |
+
add_gate(tensors, f"{prefix}.le.not_nan", [-1.0], [0.0])
|
| 1439 |
+
add_gate(tensors, f"{prefix}.le.result", [1.0, 1.0], [-2.0])
|
| 1440 |
+
|
| 1441 |
+
add_gate(tensors, f"{prefix}.ge.eq_or_gt", [1.0, 1.0], [-1.0])
|
| 1442 |
+
add_gate(tensors, f"{prefix}.ge.not_nan", [-1.0], [0.0])
|
| 1443 |
+
add_gate(tensors, f"{prefix}.ge.result", [1.0, 1.0], [-2.0])
|
| 1444 |
+
|
| 1445 |
+
|
| 1446 |
+
def add_float32_add(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1447 |
+
"""Add float32 addition circuit.
|
| 1448 |
+
|
| 1449 |
+
Algorithm:
|
| 1450 |
+
1. Unpack both operands
|
| 1451 |
+
2. Compare exponents, align mantissas
|
| 1452 |
+
3. Add/subtract mantissas based on signs
|
| 1453 |
+
4. Normalize result
|
| 1454 |
+
5. Handle special cases (inf, nan, zero)
|
| 1455 |
+
"""
|
| 1456 |
+
prefix = "float32.add"
|
| 1457 |
+
|
| 1458 |
+
pos_weights = [float(1 << (7 - i)) for i in range(8)]
|
| 1459 |
+
neg_weights = [-w for w in pos_weights]
|
| 1460 |
+
add_gate(tensors, f"{prefix}.exp_cmp.a_gt_b", pos_weights + neg_weights, [-1.0])
|
| 1461 |
+
add_gate(tensors, f"{prefix}.exp_cmp.a_lt_b", neg_weights + pos_weights, [-1.0])
|
| 1462 |
+
|
| 1463 |
+
for bit in range(8):
|
| 1464 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1465 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1466 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1467 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1468 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1469 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1470 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1471 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1472 |
+
add_gate(tensors, f"{prefix}.exp_diff.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1473 |
+
add_gate(tensors, f"{prefix}.exp_diff.not_b.bit{bit}", [-1.0], [0.0])
|
| 1474 |
+
|
| 1475 |
+
for stage in range(5):
|
| 1476 |
+
shift = 1 << (4 - stage)
|
| 1477 |
+
for bit in range(24):
|
| 1478 |
+
add_gate(tensors, f"{prefix}.align.stage{stage}.bit{bit}.not_sel", [-1.0], [0.0])
|
| 1479 |
+
add_gate(tensors, f"{prefix}.align.stage{stage}.bit{bit}.and_a", [1.0, 1.0], [-2.0])
|
| 1480 |
+
add_gate(tensors, f"{prefix}.align.stage{stage}.bit{bit}.and_b", [1.0, 1.0], [-2.0])
|
| 1481 |
+
add_gate(tensors, f"{prefix}.align.stage{stage}.bit{bit}.or", [1.0, 1.0], [-1.0])
|
| 1482 |
+
|
| 1483 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.or", [1.0, 1.0], [-1.0])
|
| 1484 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1485 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer2", [1.0, 1.0], [-2.0])
|
| 1486 |
+
|
| 1487 |
+
for bit in range(25):
|
| 1488 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1489 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1490 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1491 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1492 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1493 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1494 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1495 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1496 |
+
add_gate(tensors, f"{prefix}.mant_add.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1497 |
+
|
| 1498 |
+
for bit in range(24):
|
| 1499 |
+
add_gate(tensors, f"{prefix}.mant_sub.not_b.bit{bit}", [-1.0], [0.0])
|
| 1500 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1501 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1502 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1503 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1504 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1505 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1506 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1507 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1508 |
+
add_gate(tensors, f"{prefix}.mant_sub.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1509 |
+
|
| 1510 |
+
for bit in range(24):
|
| 1511 |
+
add_gate(tensors, f"{prefix}.mant_select.bit{bit}.not_sel", [-1.0], [0.0])
|
| 1512 |
+
add_gate(tensors, f"{prefix}.mant_select.bit{bit}.and_add", [1.0, 1.0], [-2.0])
|
| 1513 |
+
add_gate(tensors, f"{prefix}.mant_select.bit{bit}.and_sub", [1.0, 1.0], [-2.0])
|
| 1514 |
+
add_gate(tensors, f"{prefix}.mant_select.bit{bit}.or", [1.0, 1.0], [-1.0])
|
| 1515 |
+
|
| 1516 |
+
|
| 1517 |
+
def add_float32_mul(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1518 |
+
"""Add float32 multiplication circuit.
|
| 1519 |
+
|
| 1520 |
+
Algorithm:
|
| 1521 |
+
1. Unpack both operands
|
| 1522 |
+
2. XOR signs for result sign
|
| 1523 |
+
3. Add exponents (subtract bias)
|
| 1524 |
+
4. Multiply mantissas (24x24 -> 48 bits)
|
| 1525 |
+
5. Normalize result
|
| 1526 |
+
6. Handle special cases
|
| 1527 |
+
"""
|
| 1528 |
+
prefix = "float32.mul"
|
| 1529 |
+
|
| 1530 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.or", [1.0, 1.0], [-1.0])
|
| 1531 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1532 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer2", [1.0, 1.0], [-2.0])
|
| 1533 |
+
|
| 1534 |
+
for bit in range(9):
|
| 1535 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1536 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1537 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1538 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1539 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1540 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1541 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1542 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1543 |
+
add_gate(tensors, f"{prefix}.exp_add.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1544 |
+
|
| 1545 |
+
for bit in range(8):
|
| 1546 |
+
add_gate(tensors, f"{prefix}.bias_sub.not_bias.bit{bit}", [-1.0], [0.0])
|
| 1547 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1548 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1549 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1550 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1551 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1552 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1553 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1554 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1555 |
+
add_gate(tensors, f"{prefix}.bias_sub.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1556 |
+
|
| 1557 |
+
for i in range(24):
|
| 1558 |
+
for j in range(24):
|
| 1559 |
+
add_gate(tensors, f"{prefix}.mant_mul.pp.a{i}b{j}", [1.0, 1.0], [-2.0])
|
| 1560 |
+
|
| 1561 |
+
for stage in range(23):
|
| 1562 |
+
for bit in range(48):
|
| 1563 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1564 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1565 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1566 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1567 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1568 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1569 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1570 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1571 |
+
add_gate(tensors, f"{prefix}.mant_mul.acc.s{stage}.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1572 |
+
|
| 1573 |
+
|
| 1574 |
+
def add_float32_div(tensors: Dict[str, torch.Tensor]) -> None:
|
| 1575 |
+
"""Add float32 division circuit.
|
| 1576 |
+
|
| 1577 |
+
Algorithm:
|
| 1578 |
+
1. Unpack both operands
|
| 1579 |
+
2. XOR signs for result sign
|
| 1580 |
+
3. Subtract exponents (add bias)
|
| 1581 |
+
4. Divide mantissas (restoring division)
|
| 1582 |
+
5. Normalize result
|
| 1583 |
+
6. Handle special cases (div by zero -> inf)
|
| 1584 |
+
"""
|
| 1585 |
+
prefix = "float32.div"
|
| 1586 |
+
|
| 1587 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.or", [1.0, 1.0], [-1.0])
|
| 1588 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1589 |
+
add_gate(tensors, f"{prefix}.sign_xor.layer2", [1.0, 1.0], [-2.0])
|
| 1590 |
+
|
| 1591 |
+
for bit in range(8):
|
| 1592 |
+
add_gate(tensors, f"{prefix}.exp_sub.not_b.bit{bit}", [-1.0], [0.0])
|
| 1593 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1594 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1595 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1596 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1597 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1598 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1599 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1600 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1601 |
+
add_gate(tensors, f"{prefix}.exp_sub.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1602 |
+
|
| 1603 |
+
for bit in range(8):
|
| 1604 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1605 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1606 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1607 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1608 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1609 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1610 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1611 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1612 |
+
add_gate(tensors, f"{prefix}.bias_add.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1613 |
+
|
| 1614 |
+
for stage in range(24):
|
| 1615 |
+
pos_weights = [float(1 << (23 - i)) for i in range(24)]
|
| 1616 |
+
neg_weights = [-w for w in pos_weights]
|
| 1617 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.cmp", pos_weights + neg_weights, [0.0])
|
| 1618 |
+
|
| 1619 |
+
for bit in range(24):
|
| 1620 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.not_d.bit{bit}", [-1.0], [0.0])
|
| 1621 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha1.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1622 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha1.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1623 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha1.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1624 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha1.carry", [1.0, 1.0], [-2.0])
|
| 1625 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha2.sum.layer1.or", [1.0, 1.0], [-1.0])
|
| 1626 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha2.sum.layer1.nand", [-1.0, -1.0], [1.0])
|
| 1627 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha2.sum.layer2", [1.0, 1.0], [-2.0])
|
| 1628 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.ha2.carry", [1.0, 1.0], [-2.0])
|
| 1629 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.sub.fa{bit}.carry_or", [1.0, 1.0], [-1.0])
|
| 1630 |
+
|
| 1631 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.mux.bit{bit}.not_sel", [-1.0], [0.0])
|
| 1632 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.mux.bit{bit}.and_old", [1.0, 1.0], [-2.0])
|
| 1633 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.mux.bit{bit}.and_new", [1.0, 1.0], [-2.0])
|
| 1634 |
+
add_gate(tensors, f"{prefix}.mant_div.stage{stage}.mux.bit{bit}.or", [1.0, 1.0], [-1.0])
|
| 1635 |
+
|
| 1636 |
+
|
| 1637 |
def update_manifest(tensors: Dict[str, torch.Tensor], data_bits: int, addr_bits: int, mem_bytes: int) -> None:
|
| 1638 |
"""Update manifest metadata tensors.
|
| 1639 |
|
|
|
|
| 2879 |
"arithmetic.greaterorequal8bit.", "arithmetic.lessorequal8bit.",
|
| 2880 |
"arithmetic.equality8bit.", "arithmetic.add3_8bit.", "arithmetic.expr_add_mul.", "arithmetic.expr_paren.",
|
| 2881 |
"combinational.barrelshifter.", "combinational.priorityencoder.",
|
| 2882 |
+
"float16.", "float32.",
|
| 2883 |
]
|
| 2884 |
|
| 2885 |
if bits in [16, 32]:
|
|
|
|
| 3034 |
except ValueError as e:
|
| 3035 |
print(f" {bits}-bit NEG already exists: {e}")
|
| 3036 |
|
| 3037 |
+
print(f"\nGenerating {bits}-bit barrel shifter...")
|
| 3038 |
+
try:
|
| 3039 |
+
add_barrel_shifter_nbits(tensors, bits)
|
| 3040 |
+
import math
|
| 3041 |
+
num_layers = max(1, math.ceil(math.log2(bits)))
|
| 3042 |
+
print(f" Added {bits}-bit barrel shifter ({num_layers} layers x {bits} muxes)")
|
| 3043 |
+
except ValueError as e:
|
| 3044 |
+
print(f" {bits}-bit barrel shifter already exists: {e}")
|
| 3045 |
+
|
| 3046 |
+
print(f"\nGenerating {bits}-bit priority encoder...")
|
| 3047 |
+
try:
|
| 3048 |
+
add_priority_encoder_nbits(tensors, bits)
|
| 3049 |
+
import math
|
| 3050 |
+
out_bits = max(1, math.ceil(math.log2(bits)))
|
| 3051 |
+
print(f" Added {bits}-bit priority encoder ({out_bits}-bit output)")
|
| 3052 |
+
except ValueError as e:
|
| 3053 |
+
print(f" {bits}-bit priority encoder already exists: {e}")
|
| 3054 |
+
|
| 3055 |
+
print(f"\n{'=' * 60}")
|
| 3056 |
+
print(f" GENERATING FLOAT CIRCUITS")
|
| 3057 |
+
print(f"{'=' * 60}")
|
| 3058 |
+
|
| 3059 |
+
print("\nGenerating float16 core circuits...")
|
| 3060 |
+
try:
|
| 3061 |
+
add_float16_core(tensors)
|
| 3062 |
+
print(" Added float16 unpack/pack/classify/normalize")
|
| 3063 |
+
except ValueError as e:
|
| 3064 |
+
print(f" float16 core already exists: {e}")
|
| 3065 |
+
|
| 3066 |
+
print("\nGenerating float16 ADD circuit...")
|
| 3067 |
+
try:
|
| 3068 |
+
add_float16_add(tensors)
|
| 3069 |
+
print(" Added float16 addition (exp align + mantissa add/sub)")
|
| 3070 |
+
except ValueError as e:
|
| 3071 |
+
print(f" float16 ADD already exists: {e}")
|
| 3072 |
+
|
| 3073 |
+
print("\nGenerating float16 MUL circuit...")
|
| 3074 |
+
try:
|
| 3075 |
+
add_float16_mul(tensors)
|
| 3076 |
+
print(" Added float16 multiplication (11x11 mantissa mul)")
|
| 3077 |
+
except ValueError as e:
|
| 3078 |
+
print(f" float16 MUL already exists: {e}")
|
| 3079 |
+
|
| 3080 |
+
print("\nGenerating float16 DIV circuit...")
|
| 3081 |
+
try:
|
| 3082 |
+
add_float16_div(tensors)
|
| 3083 |
+
print(" Added float16 division (11-stage restoring div)")
|
| 3084 |
+
except ValueError as e:
|
| 3085 |
+
print(f" float16 DIV already exists: {e}")
|
| 3086 |
+
|
| 3087 |
+
print("\nGenerating float16 CMP circuits...")
|
| 3088 |
+
try:
|
| 3089 |
+
add_float16_cmp(tensors)
|
| 3090 |
+
print(" Added float16 comparisons (EQ, LT, LE, GT, GE)")
|
| 3091 |
+
except ValueError as e:
|
| 3092 |
+
print(f" float16 CMP already exists: {e}")
|
| 3093 |
+
|
| 3094 |
+
print("\nGenerating float32 core circuits...")
|
| 3095 |
+
try:
|
| 3096 |
+
add_float32_core(tensors)
|
| 3097 |
+
print(" Added float32 unpack/pack/classify/normalize")
|
| 3098 |
+
except ValueError as e:
|
| 3099 |
+
print(f" float32 core already exists: {e}")
|
| 3100 |
+
|
| 3101 |
+
print("\nGenerating float32 ADD circuit...")
|
| 3102 |
+
try:
|
| 3103 |
+
add_float32_add(tensors)
|
| 3104 |
+
print(" Added float32 addition (exp align + mantissa add/sub)")
|
| 3105 |
+
except ValueError as e:
|
| 3106 |
+
print(f" float32 ADD already exists: {e}")
|
| 3107 |
+
|
| 3108 |
+
print("\nGenerating float32 MUL circuit...")
|
| 3109 |
+
try:
|
| 3110 |
+
add_float32_mul(tensors)
|
| 3111 |
+
print(" Added float32 multiplication (24x24 mantissa mul)")
|
| 3112 |
+
except ValueError as e:
|
| 3113 |
+
print(f" float32 MUL already exists: {e}")
|
| 3114 |
+
|
| 3115 |
+
print("\nGenerating float32 DIV circuit...")
|
| 3116 |
+
try:
|
| 3117 |
+
add_float32_div(tensors)
|
| 3118 |
+
print(" Added float32 division (24-stage restoring div)")
|
| 3119 |
+
except ValueError as e:
|
| 3120 |
+
print(f" float32 DIV already exists: {e}")
|
| 3121 |
+
|
| 3122 |
+
print("\nGenerating float32 CMP circuits...")
|
| 3123 |
+
try:
|
| 3124 |
+
add_float32_cmp(tensors)
|
| 3125 |
+
print(" Added float32 comparisons (EQ, LT, LE, GT, GE)")
|
| 3126 |
+
except ValueError as e:
|
| 3127 |
+
print(f" float32 CMP already exists: {e}")
|
| 3128 |
+
|
| 3129 |
if args.apply:
|
| 3130 |
print(f"\nSaving: {args.model}")
|
| 3131 |
save_file(tensors, str(args.model))
|
|
@@ -2917,6 +2917,152 @@ class BatchedFitnessEvaluator:
|
|
| 2917 |
|
| 2918 |
return scores, total
|
| 2919 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2920 |
# =========================================================================
|
| 2921 |
# CONTROL FLOW
|
| 2922 |
# =========================================================================
|
|
@@ -3662,7 +3808,7 @@ class BatchedFitnessEvaluator:
|
|
| 3662 |
'manifest.instruction_width': 16.0,
|
| 3663 |
'manifest.register_width': 8.0,
|
| 3664 |
'manifest.registers': 4.0,
|
| 3665 |
-
'manifest.version':
|
| 3666 |
}
|
| 3667 |
|
| 3668 |
for name, exp_val in fixed_expected.items():
|
|
@@ -3762,6 +3908,399 @@ class BatchedFitnessEvaluator:
|
|
| 3762 |
|
| 3763 |
return scores, total
|
| 3764 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3765 |
# =========================================================================
|
| 3766 |
# INTEGRATION TESTS (Multi-circuit chains)
|
| 3767 |
# =========================================================================
|
|
@@ -4091,6 +4630,18 @@ class BatchedFitnessEvaluator:
|
|
| 4091 |
total_tests += t
|
| 4092 |
self.category_scores[f'neg{bits}'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4093 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4094 |
# 3-operand adder
|
| 4095 |
s, t = self._test_add3(population, debug)
|
| 4096 |
scores += s
|
|
@@ -4163,6 +4714,78 @@ class BatchedFitnessEvaluator:
|
|
| 4163 |
total_tests += t
|
| 4164 |
self.category_scores['memory'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4166 |
self.total_tests = total_tests
|
| 4167 |
|
| 4168 |
if debug:
|
|
|
|
| 2917 |
|
| 2918 |
return scores, total
|
| 2919 |
|
| 2920 |
+
def _test_barrel_shifter_nbits(self, pop: Dict, bits: int, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 2921 |
+
"""Test N-bit barrel shifter (shift by 0 to bits-1 positions)."""
|
| 2922 |
+
import math
|
| 2923 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 2924 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 2925 |
+
total = 0
|
| 2926 |
+
num_layers = max(1, math.ceil(math.log2(bits)))
|
| 2927 |
+
max_val = (1 << bits) - 1
|
| 2928 |
+
|
| 2929 |
+
if debug:
|
| 2930 |
+
print(f"\n=== {bits}-BIT BARREL SHIFTER ===")
|
| 2931 |
+
|
| 2932 |
+
prefix = f'combinational.barrelshifter{bits}'
|
| 2933 |
+
try:
|
| 2934 |
+
if bits == 16:
|
| 2935 |
+
test_vals = [0x8001, 0xFF00, 0x00FF, 0xAAAA, 0xFFFF, 0x1234]
|
| 2936 |
+
elif bits == 32:
|
| 2937 |
+
test_vals = [0x80000001, 0xFFFF0000, 0x0000FFFF, 0xAAAAAAAA, 0xFFFFFFFF, 0x12345678]
|
| 2938 |
+
else:
|
| 2939 |
+
test_vals = [0b10000001, 0b11110000, 0b00001111, 0b10101010, max_val]
|
| 2940 |
+
|
| 2941 |
+
num_shifts = min(bits, 8)
|
| 2942 |
+
for val in test_vals:
|
| 2943 |
+
for shift in range(num_shifts):
|
| 2944 |
+
expected_val = (val << shift) & max_val
|
| 2945 |
+
val_bits = [float((val >> (bits - 1 - i)) & 1) for i in range(bits)]
|
| 2946 |
+
shift_bits = [float((shift >> (num_layers - 1 - i)) & 1) for i in range(num_layers)]
|
| 2947 |
+
|
| 2948 |
+
layer_in = val_bits[:]
|
| 2949 |
+
for layer in range(num_layers):
|
| 2950 |
+
shift_amount = 1 << (num_layers - 1 - layer)
|
| 2951 |
+
sel = shift_bits[layer]
|
| 2952 |
+
layer_out = []
|
| 2953 |
+
|
| 2954 |
+
for bit in range(bits):
|
| 2955 |
+
bit_prefix = f'{prefix}.layer{layer}.bit{bit}'
|
| 2956 |
+
|
| 2957 |
+
w_not = pop[f'{bit_prefix}.not_sel.weight'].view(pop_size)
|
| 2958 |
+
b_not = pop[f'{bit_prefix}.not_sel.bias'].view(pop_size)
|
| 2959 |
+
not_sel = heaviside(sel * w_not + b_not)
|
| 2960 |
+
|
| 2961 |
+
shifted_src = bit + shift_amount
|
| 2962 |
+
if shifted_src < bits:
|
| 2963 |
+
shifted_val = layer_in[shifted_src]
|
| 2964 |
+
else:
|
| 2965 |
+
shifted_val = 0.0
|
| 2966 |
+
|
| 2967 |
+
w_and_a = pop[f'{bit_prefix}.and_a.weight'].view(pop_size, 2)
|
| 2968 |
+
b_and_a = pop[f'{bit_prefix}.and_a.bias'].view(pop_size)
|
| 2969 |
+
inp_a = torch.tensor([layer_in[bit], not_sel[0].item()], device=self.device)
|
| 2970 |
+
and_a = heaviside((inp_a * w_and_a).sum(-1) + b_and_a)
|
| 2971 |
+
|
| 2972 |
+
w_and_b = pop[f'{bit_prefix}.and_b.weight'].view(pop_size, 2)
|
| 2973 |
+
b_and_b = pop[f'{bit_prefix}.and_b.bias'].view(pop_size)
|
| 2974 |
+
inp_b = torch.tensor([shifted_val, sel], device=self.device)
|
| 2975 |
+
and_b = heaviside((inp_b * w_and_b).sum(-1) + b_and_b)
|
| 2976 |
+
|
| 2977 |
+
w_or = pop[f'{bit_prefix}.or.weight'].view(pop_size, 2)
|
| 2978 |
+
b_or = pop[f'{bit_prefix}.or.bias'].view(pop_size)
|
| 2979 |
+
inp_or = torch.tensor([and_a[0].item(), and_b[0].item()], device=self.device)
|
| 2980 |
+
out = heaviside((inp_or * w_or).sum(-1) + b_or)
|
| 2981 |
+
layer_out.append(out[0].item())
|
| 2982 |
+
|
| 2983 |
+
layer_in = layer_out
|
| 2984 |
+
|
| 2985 |
+
result = sum(int(layer_in[i]) << (bits - 1 - i) for i in range(bits))
|
| 2986 |
+
if result == expected_val:
|
| 2987 |
+
scores += 1
|
| 2988 |
+
total += 1
|
| 2989 |
+
|
| 2990 |
+
self._record(prefix, int(scores[0].item()), total, [])
|
| 2991 |
+
if debug:
|
| 2992 |
+
r = self.results[-1]
|
| 2993 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 2994 |
+
except (KeyError, RuntimeError) as e:
|
| 2995 |
+
if debug:
|
| 2996 |
+
print(f" {prefix}: SKIP ({e})")
|
| 2997 |
+
|
| 2998 |
+
return scores, total
|
| 2999 |
+
|
| 3000 |
+
def _test_priority_encoder_nbits(self, pop: Dict, bits: int, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 3001 |
+
"""Test N-bit priority encoder (find highest set bit)."""
|
| 3002 |
+
import math
|
| 3003 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3004 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3005 |
+
total = 0
|
| 3006 |
+
out_bits = max(1, math.ceil(math.log2(bits)))
|
| 3007 |
+
|
| 3008 |
+
if debug:
|
| 3009 |
+
print(f"\n=== {bits}-BIT PRIORITY ENCODER ===")
|
| 3010 |
+
|
| 3011 |
+
prefix = f'combinational.priorityencoder{bits}'
|
| 3012 |
+
try:
|
| 3013 |
+
test_cases = [(0, 0, 0)]
|
| 3014 |
+
for i in range(bits):
|
| 3015 |
+
test_cases.append((1 << i, 1, bits - 1 - i))
|
| 3016 |
+
if bits == 16:
|
| 3017 |
+
test_cases.extend([
|
| 3018 |
+
(0x8001, 1, 0), (0x5555, 1, 1), (0x00FF, 1, 8), (0xFFFF, 1, 0)
|
| 3019 |
+
])
|
| 3020 |
+
elif bits == 32:
|
| 3021 |
+
test_cases.extend([
|
| 3022 |
+
(0x80000001, 1, 0), (0x55555555, 1, 1), (0x0000FFFF, 1, 16), (0xFFFFFFFF, 1, 0)
|
| 3023 |
+
])
|
| 3024 |
+
|
| 3025 |
+
for val, expected_valid, expected_idx in test_cases:
|
| 3026 |
+
val_bits = torch.tensor([float((val >> (bits - 1 - i)) & 1) for i in range(bits)],
|
| 3027 |
+
device=self.device, dtype=torch.float32)
|
| 3028 |
+
|
| 3029 |
+
w_valid = pop[f'{prefix}.valid.weight'].view(pop_size, bits)
|
| 3030 |
+
b_valid = pop[f'{prefix}.valid.bias'].view(pop_size)
|
| 3031 |
+
out_valid = heaviside((val_bits * w_valid).sum(-1) + b_valid)
|
| 3032 |
+
|
| 3033 |
+
if int(out_valid[0].item()) == expected_valid:
|
| 3034 |
+
scores += 1
|
| 3035 |
+
total += 1
|
| 3036 |
+
|
| 3037 |
+
if expected_valid == 1:
|
| 3038 |
+
for idx_bit in range(out_bits):
|
| 3039 |
+
try:
|
| 3040 |
+
w_idx = pop[f'{prefix}.out{idx_bit}.weight']
|
| 3041 |
+
num_weights = w_idx.numel() // pop_size
|
| 3042 |
+
w_idx = w_idx.view(pop_size, num_weights)
|
| 3043 |
+
b_idx = pop[f'{prefix}.out{idx_bit}.bias'].view(pop_size)
|
| 3044 |
+
relevant_bits = torch.tensor([val_bits[i].item() for i in range(bits)
|
| 3045 |
+
if (i >> idx_bit) & 1],
|
| 3046 |
+
device=self.device, dtype=torch.float32)
|
| 3047 |
+
if len(relevant_bits) > 0:
|
| 3048 |
+
out_idx = heaviside((relevant_bits[:w_idx.shape[1]] * w_idx).sum(-1) + b_idx)
|
| 3049 |
+
expected_bit = (expected_idx >> idx_bit) & 1
|
| 3050 |
+
if int(out_idx[0].item()) == expected_bit:
|
| 3051 |
+
scores += 1
|
| 3052 |
+
total += 1
|
| 3053 |
+
except KeyError:
|
| 3054 |
+
pass
|
| 3055 |
+
|
| 3056 |
+
self._record(prefix, int(scores[0].item()), total, [])
|
| 3057 |
+
if debug:
|
| 3058 |
+
r = self.results[-1]
|
| 3059 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 3060 |
+
except (KeyError, RuntimeError) as e:
|
| 3061 |
+
if debug:
|
| 3062 |
+
print(f" {prefix}: SKIP ({e})")
|
| 3063 |
+
|
| 3064 |
+
return scores, total
|
| 3065 |
+
|
| 3066 |
# =========================================================================
|
| 3067 |
# CONTROL FLOW
|
| 3068 |
# =========================================================================
|
|
|
|
| 3808 |
'manifest.instruction_width': 16.0,
|
| 3809 |
'manifest.register_width': 8.0,
|
| 3810 |
'manifest.registers': 4.0,
|
| 3811 |
+
'manifest.version': 4.0,
|
| 3812 |
}
|
| 3813 |
|
| 3814 |
for name, exp_val in fixed_expected.items():
|
|
|
|
| 3908 |
|
| 3909 |
return scores, total
|
| 3910 |
|
| 3911 |
+
# =========================================================================
|
| 3912 |
+
# FLOAT16 TESTS
|
| 3913 |
+
# =========================================================================
|
| 3914 |
+
|
| 3915 |
+
def _test_float16_core(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 3916 |
+
"""Test float16 core circuits (unpack, pack, classify)."""
|
| 3917 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3918 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3919 |
+
total = 0
|
| 3920 |
+
|
| 3921 |
+
if debug:
|
| 3922 |
+
print("\n=== FLOAT16 CORE ===")
|
| 3923 |
+
|
| 3924 |
+
expected_gates = [
|
| 3925 |
+
('float16.unpack.bit0.weight', (1,)),
|
| 3926 |
+
('float16.classify.exp_zero.weight', (5,)),
|
| 3927 |
+
('float16.classify.exp_max.weight', (5,)),
|
| 3928 |
+
('float16.classify.frac_zero.weight', (10,)),
|
| 3929 |
+
('float16.classify.is_zero.and.weight', (2,)),
|
| 3930 |
+
('float16.classify.is_nan.and.weight', (2,)),
|
| 3931 |
+
('float16.normalize.stage0.bit0.not_sel.weight', (1,)),
|
| 3932 |
+
('float16.normalize.stage0.bit0.and_a.weight', (2,)),
|
| 3933 |
+
('float16.normalize.stage0.bit0.or.weight', (2,)),
|
| 3934 |
+
('float16.pack.bit0.weight', (1,)),
|
| 3935 |
+
]
|
| 3936 |
+
|
| 3937 |
+
for name, expected_shape in expected_gates:
|
| 3938 |
+
try:
|
| 3939 |
+
tensor = pop[name]
|
| 3940 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 3941 |
+
if actual_shape == expected_shape:
|
| 3942 |
+
scores += 1
|
| 3943 |
+
self._record(name, 1, 1, [])
|
| 3944 |
+
else:
|
| 3945 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 3946 |
+
total += 1
|
| 3947 |
+
if debug:
|
| 3948 |
+
r = self.results[-1]
|
| 3949 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 3950 |
+
except KeyError:
|
| 3951 |
+
if debug:
|
| 3952 |
+
print(f" {name}: SKIP (not found)")
|
| 3953 |
+
|
| 3954 |
+
return scores, total
|
| 3955 |
+
|
| 3956 |
+
def _test_float16_add(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 3957 |
+
"""Test float16 addition circuit."""
|
| 3958 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3959 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3960 |
+
total = 0
|
| 3961 |
+
|
| 3962 |
+
if debug:
|
| 3963 |
+
print("\n=== FLOAT16 ADD ===")
|
| 3964 |
+
|
| 3965 |
+
expected_gates = [
|
| 3966 |
+
('float16.add.exp_cmp.a_gt_b.weight', (10,)),
|
| 3967 |
+
('float16.add.exp_cmp.a_lt_b.weight', (10,)),
|
| 3968 |
+
('float16.add.exp_diff.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 3969 |
+
('float16.add.align.stage0.bit0.not_sel.weight', (1,)),
|
| 3970 |
+
('float16.add.sign_xor.layer1.or.weight', (2,)),
|
| 3971 |
+
('float16.add.mant_add.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 3972 |
+
('float16.add.mant_sub.not_b.bit0.weight', (1,)),
|
| 3973 |
+
('float16.add.mant_select.bit0.not_sel.weight', (1,)),
|
| 3974 |
+
]
|
| 3975 |
+
|
| 3976 |
+
for name, expected_shape in expected_gates:
|
| 3977 |
+
try:
|
| 3978 |
+
tensor = pop[name]
|
| 3979 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 3980 |
+
if actual_shape == expected_shape:
|
| 3981 |
+
scores += 1
|
| 3982 |
+
self._record(name, 1, 1, [])
|
| 3983 |
+
else:
|
| 3984 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 3985 |
+
total += 1
|
| 3986 |
+
if debug:
|
| 3987 |
+
r = self.results[-1]
|
| 3988 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 3989 |
+
except KeyError:
|
| 3990 |
+
if debug:
|
| 3991 |
+
print(f" {name}: SKIP (not found)")
|
| 3992 |
+
|
| 3993 |
+
return scores, total
|
| 3994 |
+
|
| 3995 |
+
def _test_float16_mul(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 3996 |
+
"""Test float16 multiplication circuit."""
|
| 3997 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 3998 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 3999 |
+
total = 0
|
| 4000 |
+
|
| 4001 |
+
if debug:
|
| 4002 |
+
print("\n=== FLOAT16 MUL ===")
|
| 4003 |
+
|
| 4004 |
+
expected_gates = [
|
| 4005 |
+
('float16.mul.sign_xor.layer1.or.weight', (2,)),
|
| 4006 |
+
('float16.mul.exp_add.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 4007 |
+
('float16.mul.bias_sub.not_bias.bit0.weight', (1,)),
|
| 4008 |
+
('float16.mul.mant_mul.pp.a0b0.weight', (2,)),
|
| 4009 |
+
('float16.mul.mant_mul.acc.s0.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 4010 |
+
]
|
| 4011 |
+
|
| 4012 |
+
for name, expected_shape in expected_gates:
|
| 4013 |
+
try:
|
| 4014 |
+
tensor = pop[name]
|
| 4015 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 4016 |
+
if actual_shape == expected_shape:
|
| 4017 |
+
scores += 1
|
| 4018 |
+
self._record(name, 1, 1, [])
|
| 4019 |
+
else:
|
| 4020 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 4021 |
+
total += 1
|
| 4022 |
+
if debug:
|
| 4023 |
+
r = self.results[-1]
|
| 4024 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 4025 |
+
except KeyError:
|
| 4026 |
+
if debug:
|
| 4027 |
+
print(f" {name}: SKIP (not found)")
|
| 4028 |
+
|
| 4029 |
+
return scores, total
|
| 4030 |
+
|
| 4031 |
+
def _test_float16_div(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 4032 |
+
"""Test float16 division circuit."""
|
| 4033 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4034 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4035 |
+
total = 0
|
| 4036 |
+
|
| 4037 |
+
if debug:
|
| 4038 |
+
print("\n=== FLOAT16 DIV ===")
|
| 4039 |
+
|
| 4040 |
+
expected_gates = [
|
| 4041 |
+
('float16.div.sign_xor.layer1.or.weight', (2,)),
|
| 4042 |
+
('float16.div.exp_sub.not_b.bit0.weight', (1,)),
|
| 4043 |
+
('float16.div.bias_add.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 4044 |
+
('float16.div.mant_div.stage0.cmp.weight', (22,)),
|
| 4045 |
+
('float16.div.mant_div.stage0.sub.not_d.bit0.weight', (1,)),
|
| 4046 |
+
('float16.div.mant_div.stage0.mux.bit0.not_sel.weight', (1,)),
|
| 4047 |
+
]
|
| 4048 |
+
|
| 4049 |
+
for name, expected_shape in expected_gates:
|
| 4050 |
+
try:
|
| 4051 |
+
tensor = pop[name]
|
| 4052 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 4053 |
+
if actual_shape == expected_shape:
|
| 4054 |
+
scores += 1
|
| 4055 |
+
self._record(name, 1, 1, [])
|
| 4056 |
+
else:
|
| 4057 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 4058 |
+
total += 1
|
| 4059 |
+
if debug:
|
| 4060 |
+
r = self.results[-1]
|
| 4061 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 4062 |
+
except KeyError:
|
| 4063 |
+
if debug:
|
| 4064 |
+
print(f" {name}: SKIP (not found)")
|
| 4065 |
+
|
| 4066 |
+
return scores, total
|
| 4067 |
+
|
| 4068 |
+
def _test_float16_cmp(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 4069 |
+
"""Test float16 comparison circuits."""
|
| 4070 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4071 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4072 |
+
total = 0
|
| 4073 |
+
|
| 4074 |
+
if debug:
|
| 4075 |
+
print("\n=== FLOAT16 CMP ===")
|
| 4076 |
+
|
| 4077 |
+
expected_gates = [
|
| 4078 |
+
('float16.cmp.a.exp_max.weight', (5,)),
|
| 4079 |
+
('float16.cmp.a.frac_nz.weight', (10,)),
|
| 4080 |
+
('float16.cmp.a.is_nan.weight', (2,)),
|
| 4081 |
+
('float16.cmp.either_nan.weight', (2,)),
|
| 4082 |
+
('float16.cmp.sign_xor.layer1.or.weight', (2,)),
|
| 4083 |
+
('float16.cmp.both_zero.weight', (2,)),
|
| 4084 |
+
('float16.cmp.mag_a_gt_b.weight', (30,)),
|
| 4085 |
+
('float16.cmp.eq.result.weight', (2,)),
|
| 4086 |
+
('float16.cmp.lt.result.weight', (3,)),
|
| 4087 |
+
('float16.cmp.gt.result.weight', (3,)),
|
| 4088 |
+
]
|
| 4089 |
+
|
| 4090 |
+
for name, expected_shape in expected_gates:
|
| 4091 |
+
try:
|
| 4092 |
+
tensor = pop[name]
|
| 4093 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 4094 |
+
if actual_shape == expected_shape:
|
| 4095 |
+
scores += 1
|
| 4096 |
+
self._record(name, 1, 1, [])
|
| 4097 |
+
else:
|
| 4098 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 4099 |
+
total += 1
|
| 4100 |
+
if debug:
|
| 4101 |
+
r = self.results[-1]
|
| 4102 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 4103 |
+
except KeyError:
|
| 4104 |
+
if debug:
|
| 4105 |
+
print(f" {name}: SKIP (not found)")
|
| 4106 |
+
|
| 4107 |
+
return scores, total
|
| 4108 |
+
|
| 4109 |
+
# =========================================================================
|
| 4110 |
+
# FLOAT32 TESTS
|
| 4111 |
+
# =========================================================================
|
| 4112 |
+
|
| 4113 |
+
def _test_float32_core(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 4114 |
+
"""Test float32 core circuits (unpack, pack, classify)."""
|
| 4115 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4116 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4117 |
+
total = 0
|
| 4118 |
+
|
| 4119 |
+
if debug:
|
| 4120 |
+
print("\n=== FLOAT32 CORE ===")
|
| 4121 |
+
|
| 4122 |
+
expected_gates = [
|
| 4123 |
+
('float32.unpack.bit0.weight', (1,)),
|
| 4124 |
+
('float32.classify.exp_zero.weight', (8,)),
|
| 4125 |
+
('float32.classify.exp_max.weight', (8,)),
|
| 4126 |
+
('float32.classify.frac_zero.weight', (23,)),
|
| 4127 |
+
('float32.classify.is_zero.and.weight', (2,)),
|
| 4128 |
+
('float32.classify.is_nan.and.weight', (2,)),
|
| 4129 |
+
('float32.normalize.stage0.bit0.not_sel.weight', (1,)),
|
| 4130 |
+
('float32.pack.bit0.weight', (1,)),
|
| 4131 |
+
]
|
| 4132 |
+
|
| 4133 |
+
for name, expected_shape in expected_gates:
|
| 4134 |
+
try:
|
| 4135 |
+
tensor = pop[name]
|
| 4136 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 4137 |
+
if actual_shape == expected_shape:
|
| 4138 |
+
scores += 1
|
| 4139 |
+
self._record(name, 1, 1, [])
|
| 4140 |
+
else:
|
| 4141 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 4142 |
+
total += 1
|
| 4143 |
+
if debug:
|
| 4144 |
+
r = self.results[-1]
|
| 4145 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 4146 |
+
except KeyError:
|
| 4147 |
+
if debug:
|
| 4148 |
+
print(f" {name}: SKIP (not found)")
|
| 4149 |
+
|
| 4150 |
+
return scores, total
|
| 4151 |
+
|
| 4152 |
+
def _test_float32_add(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 4153 |
+
"""Test float32 addition circuit."""
|
| 4154 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4155 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4156 |
+
total = 0
|
| 4157 |
+
|
| 4158 |
+
if debug:
|
| 4159 |
+
print("\n=== FLOAT32 ADD ===")
|
| 4160 |
+
|
| 4161 |
+
expected_gates = [
|
| 4162 |
+
('float32.add.exp_cmp.a_gt_b.weight', (16,)),
|
| 4163 |
+
('float32.add.exp_diff.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 4164 |
+
('float32.add.align.stage0.bit0.not_sel.weight', (1,)),
|
| 4165 |
+
('float32.add.sign_xor.layer1.or.weight', (2,)),
|
| 4166 |
+
('float32.add.mant_add.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 4167 |
+
('float32.add.mant_sub.not_b.bit0.weight', (1,)),
|
| 4168 |
+
('float32.add.mant_select.bit0.not_sel.weight', (1,)),
|
| 4169 |
+
]
|
| 4170 |
+
|
| 4171 |
+
for name, expected_shape in expected_gates:
|
| 4172 |
+
try:
|
| 4173 |
+
tensor = pop[name]
|
| 4174 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 4175 |
+
if actual_shape == expected_shape:
|
| 4176 |
+
scores += 1
|
| 4177 |
+
self._record(name, 1, 1, [])
|
| 4178 |
+
else:
|
| 4179 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 4180 |
+
total += 1
|
| 4181 |
+
if debug:
|
| 4182 |
+
r = self.results[-1]
|
| 4183 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 4184 |
+
except KeyError:
|
| 4185 |
+
if debug:
|
| 4186 |
+
print(f" {name}: SKIP (not found)")
|
| 4187 |
+
|
| 4188 |
+
return scores, total
|
| 4189 |
+
|
| 4190 |
+
def _test_float32_mul(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 4191 |
+
"""Test float32 multiplication circuit."""
|
| 4192 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4193 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4194 |
+
total = 0
|
| 4195 |
+
|
| 4196 |
+
if debug:
|
| 4197 |
+
print("\n=== FLOAT32 MUL ===")
|
| 4198 |
+
|
| 4199 |
+
expected_gates = [
|
| 4200 |
+
('float32.mul.sign_xor.layer1.or.weight', (2,)),
|
| 4201 |
+
('float32.mul.exp_add.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 4202 |
+
('float32.mul.bias_sub.not_bias.bit0.weight', (1,)),
|
| 4203 |
+
('float32.mul.mant_mul.pp.a0b0.weight', (2,)),
|
| 4204 |
+
('float32.mul.mant_mul.acc.s0.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 4205 |
+
]
|
| 4206 |
+
|
| 4207 |
+
for name, expected_shape in expected_gates:
|
| 4208 |
+
try:
|
| 4209 |
+
tensor = pop[name]
|
| 4210 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 4211 |
+
if actual_shape == expected_shape:
|
| 4212 |
+
scores += 1
|
| 4213 |
+
self._record(name, 1, 1, [])
|
| 4214 |
+
else:
|
| 4215 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 4216 |
+
total += 1
|
| 4217 |
+
if debug:
|
| 4218 |
+
r = self.results[-1]
|
| 4219 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 4220 |
+
except KeyError:
|
| 4221 |
+
if debug:
|
| 4222 |
+
print(f" {name}: SKIP (not found)")
|
| 4223 |
+
|
| 4224 |
+
return scores, total
|
| 4225 |
+
|
| 4226 |
+
def _test_float32_div(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 4227 |
+
"""Test float32 division circuit."""
|
| 4228 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4229 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4230 |
+
total = 0
|
| 4231 |
+
|
| 4232 |
+
if debug:
|
| 4233 |
+
print("\n=== FLOAT32 DIV ===")
|
| 4234 |
+
|
| 4235 |
+
expected_gates = [
|
| 4236 |
+
('float32.div.sign_xor.layer1.or.weight', (2,)),
|
| 4237 |
+
('float32.div.exp_sub.not_b.bit0.weight', (1,)),
|
| 4238 |
+
('float32.div.bias_add.fa0.ha1.sum.layer1.or.weight', (2,)),
|
| 4239 |
+
('float32.div.mant_div.stage0.cmp.weight', (48,)),
|
| 4240 |
+
('float32.div.mant_div.stage0.sub.not_d.bit0.weight', (1,)),
|
| 4241 |
+
('float32.div.mant_div.stage0.mux.bit0.not_sel.weight', (1,)),
|
| 4242 |
+
]
|
| 4243 |
+
|
| 4244 |
+
for name, expected_shape in expected_gates:
|
| 4245 |
+
try:
|
| 4246 |
+
tensor = pop[name]
|
| 4247 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 4248 |
+
if actual_shape == expected_shape:
|
| 4249 |
+
scores += 1
|
| 4250 |
+
self._record(name, 1, 1, [])
|
| 4251 |
+
else:
|
| 4252 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 4253 |
+
total += 1
|
| 4254 |
+
if debug:
|
| 4255 |
+
r = self.results[-1]
|
| 4256 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 4257 |
+
except KeyError:
|
| 4258 |
+
if debug:
|
| 4259 |
+
print(f" {name}: SKIP (not found)")
|
| 4260 |
+
|
| 4261 |
+
return scores, total
|
| 4262 |
+
|
| 4263 |
+
def _test_float32_cmp(self, pop: Dict, debug: bool) -> Tuple[torch.Tensor, int]:
|
| 4264 |
+
"""Test float32 comparison circuits."""
|
| 4265 |
+
pop_size = next(iter(pop.values())).shape[0]
|
| 4266 |
+
scores = torch.zeros(pop_size, device=self.device)
|
| 4267 |
+
total = 0
|
| 4268 |
+
|
| 4269 |
+
if debug:
|
| 4270 |
+
print("\n=== FLOAT32 CMP ===")
|
| 4271 |
+
|
| 4272 |
+
expected_gates = [
|
| 4273 |
+
('float32.cmp.a.exp_max.weight', (8,)),
|
| 4274 |
+
('float32.cmp.a.frac_nz.weight', (23,)),
|
| 4275 |
+
('float32.cmp.a.is_nan.weight', (2,)),
|
| 4276 |
+
('float32.cmp.either_nan.weight', (2,)),
|
| 4277 |
+
('float32.cmp.sign_xor.layer1.or.weight', (2,)),
|
| 4278 |
+
('float32.cmp.both_zero.weight', (2,)),
|
| 4279 |
+
('float32.cmp.mag_a_gt_b.weight', (62,)),
|
| 4280 |
+
('float32.cmp.eq.result.weight', (2,)),
|
| 4281 |
+
('float32.cmp.lt.result.weight', (3,)),
|
| 4282 |
+
('float32.cmp.gt.result.weight', (3,)),
|
| 4283 |
+
]
|
| 4284 |
+
|
| 4285 |
+
for name, expected_shape in expected_gates:
|
| 4286 |
+
try:
|
| 4287 |
+
tensor = pop[name]
|
| 4288 |
+
actual_shape = tuple(tensor.shape[1:])
|
| 4289 |
+
if actual_shape == expected_shape:
|
| 4290 |
+
scores += 1
|
| 4291 |
+
self._record(name, 1, 1, [])
|
| 4292 |
+
else:
|
| 4293 |
+
self._record(name, 0, 1, [(expected_shape, actual_shape)])
|
| 4294 |
+
total += 1
|
| 4295 |
+
if debug:
|
| 4296 |
+
r = self.results[-1]
|
| 4297 |
+
print(f" {r.name}: {r.passed}/{r.total} {'PASS' if r.success else 'FAIL'}")
|
| 4298 |
+
except KeyError:
|
| 4299 |
+
if debug:
|
| 4300 |
+
print(f" {name}: SKIP (not found)")
|
| 4301 |
+
|
| 4302 |
+
return scores, total
|
| 4303 |
+
|
| 4304 |
# =========================================================================
|
| 4305 |
# INTEGRATION TESTS (Multi-circuit chains)
|
| 4306 |
# =========================================================================
|
|
|
|
| 4630 |
total_tests += t
|
| 4631 |
self.category_scores[f'neg{bits}'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4632 |
|
| 4633 |
+
if f'combinational.barrelshifter{bits}.layer0.bit0.not_sel.weight' in population:
|
| 4634 |
+
s, t = self._test_barrel_shifter_nbits(population, bits, debug)
|
| 4635 |
+
scores += s
|
| 4636 |
+
total_tests += t
|
| 4637 |
+
self.category_scores[f'barrelshifter{bits}'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4638 |
+
|
| 4639 |
+
if f'combinational.priorityencoder{bits}.valid.weight' in population:
|
| 4640 |
+
s, t = self._test_priority_encoder_nbits(population, bits, debug)
|
| 4641 |
+
scores += s
|
| 4642 |
+
total_tests += t
|
| 4643 |
+
self.category_scores[f'priorityencoder{bits}'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4644 |
+
|
| 4645 |
# 3-operand adder
|
| 4646 |
s, t = self._test_add3(population, debug)
|
| 4647 |
scores += s
|
|
|
|
| 4714 |
total_tests += t
|
| 4715 |
self.category_scores['memory'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4716 |
|
| 4717 |
+
# Float16 circuits (if present)
|
| 4718 |
+
if 'float16.unpack.bit0.weight' in population:
|
| 4719 |
+
if debug:
|
| 4720 |
+
print(f"\n{'=' * 60}")
|
| 4721 |
+
print(f" FLOAT16 CIRCUITS")
|
| 4722 |
+
print(f"{'=' * 60}")
|
| 4723 |
+
|
| 4724 |
+
s, t = self._test_float16_core(population, debug)
|
| 4725 |
+
scores += s
|
| 4726 |
+
total_tests += t
|
| 4727 |
+
self.category_scores['float16_core'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4728 |
+
|
| 4729 |
+
if 'float16.add.exp_cmp.a_gt_b.weight' in population:
|
| 4730 |
+
s, t = self._test_float16_add(population, debug)
|
| 4731 |
+
scores += s
|
| 4732 |
+
total_tests += t
|
| 4733 |
+
self.category_scores['float16_add'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4734 |
+
|
| 4735 |
+
if 'float16.mul.sign_xor.layer1.or.weight' in population:
|
| 4736 |
+
s, t = self._test_float16_mul(population, debug)
|
| 4737 |
+
scores += s
|
| 4738 |
+
total_tests += t
|
| 4739 |
+
self.category_scores['float16_mul'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4740 |
+
|
| 4741 |
+
if 'float16.div.sign_xor.layer1.or.weight' in population:
|
| 4742 |
+
s, t = self._test_float16_div(population, debug)
|
| 4743 |
+
scores += s
|
| 4744 |
+
total_tests += t
|
| 4745 |
+
self.category_scores['float16_div'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4746 |
+
|
| 4747 |
+
if 'float16.cmp.a.exp_max.weight' in population:
|
| 4748 |
+
s, t = self._test_float16_cmp(population, debug)
|
| 4749 |
+
scores += s
|
| 4750 |
+
total_tests += t
|
| 4751 |
+
self.category_scores['float16_cmp'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4752 |
+
|
| 4753 |
+
# Float32 circuits (if present)
|
| 4754 |
+
if 'float32.unpack.bit0.weight' in population:
|
| 4755 |
+
if debug:
|
| 4756 |
+
print(f"\n{'=' * 60}")
|
| 4757 |
+
print(f" FLOAT32 CIRCUITS")
|
| 4758 |
+
print(f"{'=' * 60}")
|
| 4759 |
+
|
| 4760 |
+
s, t = self._test_float32_core(population, debug)
|
| 4761 |
+
scores += s
|
| 4762 |
+
total_tests += t
|
| 4763 |
+
self.category_scores['float32_core'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4764 |
+
|
| 4765 |
+
if 'float32.add.exp_cmp.a_gt_b.weight' in population:
|
| 4766 |
+
s, t = self._test_float32_add(population, debug)
|
| 4767 |
+
scores += s
|
| 4768 |
+
total_tests += t
|
| 4769 |
+
self.category_scores['float32_add'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4770 |
+
|
| 4771 |
+
if 'float32.mul.sign_xor.layer1.or.weight' in population:
|
| 4772 |
+
s, t = self._test_float32_mul(population, debug)
|
| 4773 |
+
scores += s
|
| 4774 |
+
total_tests += t
|
| 4775 |
+
self.category_scores['float32_mul'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4776 |
+
|
| 4777 |
+
if 'float32.div.sign_xor.layer1.or.weight' in population:
|
| 4778 |
+
s, t = self._test_float32_div(population, debug)
|
| 4779 |
+
scores += s
|
| 4780 |
+
total_tests += t
|
| 4781 |
+
self.category_scores['float32_div'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4782 |
+
|
| 4783 |
+
if 'float32.cmp.a.exp_max.weight' in population:
|
| 4784 |
+
s, t = self._test_float32_cmp(population, debug)
|
| 4785 |
+
scores += s
|
| 4786 |
+
total_tests += t
|
| 4787 |
+
self.category_scores['float32_cmp'] = (s[0].item() if pop_size == 1 else s.mean().item(), t)
|
| 4788 |
+
|
| 4789 |
self.total_tests = total_tests
|
| 4790 |
|
| 4791 |
if debug:
|
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6efa5b719d55fa8e071c4dacc90bfe5bff7337c6fab952460f4ccdadf237facb
|
| 3 |
+
size 10083624
|
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:812d1833c915945eeb694bca530b075b3e08685bac8646f29e87d26a2d644b88
|
| 3 |
+
size 8436636
|