phanerozoic commited on
Commit
ab4f6ec
·
verified ·
1 Parent(s): 062669a

Add NEG circuit (two's complement negate) - 76 tensors, 256/256 tests pass

Browse files
Files changed (7) hide show
  1. 16bitupgrade.md +904 -904
  2. llm/circuit_llm.py +606 -606
  3. llm/guide.md +615 -615
  4. llm/train_circuit_interface.py +306 -306
  5. neural_computer.safetensors +2 -2
  6. tensors.txt +0 -0
  7. todo.md +1 -1
16bitupgrade.md CHANGED
@@ -1,905 +1,905 @@
1
- 16-BIT TENSOR MANIFEST
2
-
3
- ---
4
- ARITHMETIC
5
-
6
- ripplecarry16bit
7
- arithmetic.ripplecarry16bit.fa0.carry_or.bias [1]
8
- arithmetic.ripplecarry16bit.fa0.carry_or.weight [2]
9
- arithmetic.ripplecarry16bit.fa0.ha1.carry.bias [1]
10
- arithmetic.ripplecarry16bit.fa0.ha1.carry.weight [2]
11
- arithmetic.ripplecarry16bit.fa0.ha1.sum.layer1.nand.bias [1]
12
- arithmetic.ripplecarry16bit.fa0.ha1.sum.layer1.nand.weight [2]
13
- arithmetic.ripplecarry16bit.fa0.ha1.sum.layer1.or.bias [1]
14
- arithmetic.ripplecarry16bit.fa0.ha1.sum.layer1.or.weight [2]
15
- arithmetic.ripplecarry16bit.fa0.ha1.sum.layer2.bias [1]
16
- arithmetic.ripplecarry16bit.fa0.ha1.sum.layer2.weight [2]
17
- arithmetic.ripplecarry16bit.fa0.ha2.carry.bias [1]
18
- arithmetic.ripplecarry16bit.fa0.ha2.carry.weight [2]
19
- arithmetic.ripplecarry16bit.fa0.ha2.sum.layer1.nand.bias [1]
20
- arithmetic.ripplecarry16bit.fa0.ha2.sum.layer1.nand.weight [2]
21
- arithmetic.ripplecarry16bit.fa0.ha2.sum.layer1.or.bias [1]
22
- arithmetic.ripplecarry16bit.fa0.ha2.sum.layer1.or.weight [2]
23
- arithmetic.ripplecarry16bit.fa0.ha2.sum.layer2.bias [1]
24
- arithmetic.ripplecarry16bit.fa0.ha2.sum.layer2.weight [2]
25
- arithmetic.ripplecarry16bit.fa1.carry_or.bias [1]
26
- arithmetic.ripplecarry16bit.fa1.carry_or.weight [2]
27
- arithmetic.ripplecarry16bit.fa1.ha1.carry.bias [1]
28
- arithmetic.ripplecarry16bit.fa1.ha1.carry.weight [2]
29
- arithmetic.ripplecarry16bit.fa1.ha1.sum.layer1.nand.bias [1]
30
- arithmetic.ripplecarry16bit.fa1.ha1.sum.layer1.nand.weight [2]
31
- arithmetic.ripplecarry16bit.fa1.ha1.sum.layer1.or.bias [1]
32
- arithmetic.ripplecarry16bit.fa1.ha1.sum.layer1.or.weight [2]
33
- arithmetic.ripplecarry16bit.fa1.ha1.sum.layer2.bias [1]
34
- arithmetic.ripplecarry16bit.fa1.ha1.sum.layer2.weight [2]
35
- arithmetic.ripplecarry16bit.fa1.ha2.carry.bias [1]
36
- arithmetic.ripplecarry16bit.fa1.ha2.carry.weight [2]
37
- arithmetic.ripplecarry16bit.fa1.ha2.sum.layer1.nand.bias [1]
38
- arithmetic.ripplecarry16bit.fa1.ha2.sum.layer1.nand.weight [2]
39
- arithmetic.ripplecarry16bit.fa1.ha2.sum.layer1.or.bias [1]
40
- arithmetic.ripplecarry16bit.fa1.ha2.sum.layer1.or.weight [2]
41
- arithmetic.ripplecarry16bit.fa1.ha2.sum.layer2.bias [1]
42
- arithmetic.ripplecarry16bit.fa1.ha2.sum.layer2.weight [2]
43
- arithmetic.ripplecarry16bit.fa2.carry_or.bias [1]
44
- arithmetic.ripplecarry16bit.fa2.carry_or.weight [2]
45
- arithmetic.ripplecarry16bit.fa2.ha1.carry.bias [1]
46
- arithmetic.ripplecarry16bit.fa2.ha1.carry.weight [2]
47
- arithmetic.ripplecarry16bit.fa2.ha1.sum.layer1.nand.bias [1]
48
- arithmetic.ripplecarry16bit.fa2.ha1.sum.layer1.nand.weight [2]
49
- arithmetic.ripplecarry16bit.fa2.ha1.sum.layer1.or.bias [1]
50
- arithmetic.ripplecarry16bit.fa2.ha1.sum.layer1.or.weight [2]
51
- arithmetic.ripplecarry16bit.fa2.ha1.sum.layer2.bias [1]
52
- arithmetic.ripplecarry16bit.fa2.ha1.sum.layer2.weight [2]
53
- arithmetic.ripplecarry16bit.fa2.ha2.carry.bias [1]
54
- arithmetic.ripplecarry16bit.fa2.ha2.carry.weight [2]
55
- arithmetic.ripplecarry16bit.fa2.ha2.sum.layer1.nand.bias [1]
56
- arithmetic.ripplecarry16bit.fa2.ha2.sum.layer1.nand.weight [2]
57
- arithmetic.ripplecarry16bit.fa2.ha2.sum.layer1.or.bias [1]
58
- arithmetic.ripplecarry16bit.fa2.ha2.sum.layer1.or.weight [2]
59
- arithmetic.ripplecarry16bit.fa2.ha2.sum.layer2.bias [1]
60
- arithmetic.ripplecarry16bit.fa2.ha2.sum.layer2.weight [2]
61
- arithmetic.ripplecarry16bit.fa3.carry_or.bias [1]
62
- arithmetic.ripplecarry16bit.fa3.carry_or.weight [2]
63
- arithmetic.ripplecarry16bit.fa3.ha1.carry.bias [1]
64
- arithmetic.ripplecarry16bit.fa3.ha1.carry.weight [2]
65
- arithmetic.ripplecarry16bit.fa3.ha1.sum.layer1.nand.bias [1]
66
- arithmetic.ripplecarry16bit.fa3.ha1.sum.layer1.nand.weight [2]
67
- arithmetic.ripplecarry16bit.fa3.ha1.sum.layer1.or.bias [1]
68
- arithmetic.ripplecarry16bit.fa3.ha1.sum.layer1.or.weight [2]
69
- arithmetic.ripplecarry16bit.fa3.ha1.sum.layer2.bias [1]
70
- arithmetic.ripplecarry16bit.fa3.ha1.sum.layer2.weight [2]
71
- arithmetic.ripplecarry16bit.fa3.ha2.carry.bias [1]
72
- arithmetic.ripplecarry16bit.fa3.ha2.carry.weight [2]
73
- arithmetic.ripplecarry16bit.fa3.ha2.sum.layer1.nand.bias [1]
74
- arithmetic.ripplecarry16bit.fa3.ha2.sum.layer1.nand.weight [2]
75
- arithmetic.ripplecarry16bit.fa3.ha2.sum.layer1.or.bias [1]
76
- arithmetic.ripplecarry16bit.fa3.ha2.sum.layer1.or.weight [2]
77
- arithmetic.ripplecarry16bit.fa3.ha2.sum.layer2.bias [1]
78
- arithmetic.ripplecarry16bit.fa3.ha2.sum.layer2.weight [2]
79
- arithmetic.ripplecarry16bit.fa4.carry_or.bias [1]
80
- arithmetic.ripplecarry16bit.fa4.carry_or.weight [2]
81
- arithmetic.ripplecarry16bit.fa4.ha1.carry.bias [1]
82
- arithmetic.ripplecarry16bit.fa4.ha1.carry.weight [2]
83
- arithmetic.ripplecarry16bit.fa4.ha1.sum.layer1.nand.bias [1]
84
- arithmetic.ripplecarry16bit.fa4.ha1.sum.layer1.nand.weight [2]
85
- arithmetic.ripplecarry16bit.fa4.ha1.sum.layer1.or.bias [1]
86
- arithmetic.ripplecarry16bit.fa4.ha1.sum.layer1.or.weight [2]
87
- arithmetic.ripplecarry16bit.fa4.ha1.sum.layer2.bias [1]
88
- arithmetic.ripplecarry16bit.fa4.ha1.sum.layer2.weight [2]
89
- arithmetic.ripplecarry16bit.fa4.ha2.carry.bias [1]
90
- arithmetic.ripplecarry16bit.fa4.ha2.carry.weight [2]
91
- arithmetic.ripplecarry16bit.fa4.ha2.sum.layer1.nand.bias [1]
92
- arithmetic.ripplecarry16bit.fa4.ha2.sum.layer1.nand.weight [2]
93
- arithmetic.ripplecarry16bit.fa4.ha2.sum.layer1.or.bias [1]
94
- arithmetic.ripplecarry16bit.fa4.ha2.sum.layer1.or.weight [2]
95
- arithmetic.ripplecarry16bit.fa4.ha2.sum.layer2.bias [1]
96
- arithmetic.ripplecarry16bit.fa4.ha2.sum.layer2.weight [2]
97
- arithmetic.ripplecarry16bit.fa5.carry_or.bias [1]
98
- arithmetic.ripplecarry16bit.fa5.carry_or.weight [2]
99
- arithmetic.ripplecarry16bit.fa5.ha1.carry.bias [1]
100
- arithmetic.ripplecarry16bit.fa5.ha1.carry.weight [2]
101
- arithmetic.ripplecarry16bit.fa5.ha1.sum.layer1.nand.bias [1]
102
- arithmetic.ripplecarry16bit.fa5.ha1.sum.layer1.nand.weight [2]
103
- arithmetic.ripplecarry16bit.fa5.ha1.sum.layer1.or.bias [1]
104
- arithmetic.ripplecarry16bit.fa5.ha1.sum.layer1.or.weight [2]
105
- arithmetic.ripplecarry16bit.fa5.ha1.sum.layer2.bias [1]
106
- arithmetic.ripplecarry16bit.fa5.ha1.sum.layer2.weight [2]
107
- arithmetic.ripplecarry16bit.fa5.ha2.carry.bias [1]
108
- arithmetic.ripplecarry16bit.fa5.ha2.carry.weight [2]
109
- arithmetic.ripplecarry16bit.fa5.ha2.sum.layer1.nand.bias [1]
110
- arithmetic.ripplecarry16bit.fa5.ha2.sum.layer1.nand.weight [2]
111
- arithmetic.ripplecarry16bit.fa5.ha2.sum.layer1.or.bias [1]
112
- arithmetic.ripplecarry16bit.fa5.ha2.sum.layer1.or.weight [2]
113
- arithmetic.ripplecarry16bit.fa5.ha2.sum.layer2.bias [1]
114
- arithmetic.ripplecarry16bit.fa5.ha2.sum.layer2.weight [2]
115
- arithmetic.ripplecarry16bit.fa6.carry_or.bias [1]
116
- arithmetic.ripplecarry16bit.fa6.carry_or.weight [2]
117
- arithmetic.ripplecarry16bit.fa6.ha1.carry.bias [1]
118
- arithmetic.ripplecarry16bit.fa6.ha1.carry.weight [2]
119
- arithmetic.ripplecarry16bit.fa6.ha1.sum.layer1.nand.bias [1]
120
- arithmetic.ripplecarry16bit.fa6.ha1.sum.layer1.nand.weight [2]
121
- arithmetic.ripplecarry16bit.fa6.ha1.sum.layer1.or.bias [1]
122
- arithmetic.ripplecarry16bit.fa6.ha1.sum.layer1.or.weight [2]
123
- arithmetic.ripplecarry16bit.fa6.ha1.sum.layer2.bias [1]
124
- arithmetic.ripplecarry16bit.fa6.ha1.sum.layer2.weight [2]
125
- arithmetic.ripplecarry16bit.fa6.ha2.carry.bias [1]
126
- arithmetic.ripplecarry16bit.fa6.ha2.carry.weight [2]
127
- arithmetic.ripplecarry16bit.fa6.ha2.sum.layer1.nand.bias [1]
128
- arithmetic.ripplecarry16bit.fa6.ha2.sum.layer1.nand.weight [2]
129
- arithmetic.ripplecarry16bit.fa6.ha2.sum.layer1.or.bias [1]
130
- arithmetic.ripplecarry16bit.fa6.ha2.sum.layer1.or.weight [2]
131
- arithmetic.ripplecarry16bit.fa6.ha2.sum.layer2.bias [1]
132
- arithmetic.ripplecarry16bit.fa6.ha2.sum.layer2.weight [2]
133
- arithmetic.ripplecarry16bit.fa7.carry_or.bias [1]
134
- arithmetic.ripplecarry16bit.fa7.carry_or.weight [2]
135
- arithmetic.ripplecarry16bit.fa7.ha1.carry.bias [1]
136
- arithmetic.ripplecarry16bit.fa7.ha1.carry.weight [2]
137
- arithmetic.ripplecarry16bit.fa7.ha1.sum.layer1.nand.bias [1]
138
- arithmetic.ripplecarry16bit.fa7.ha1.sum.layer1.nand.weight [2]
139
- arithmetic.ripplecarry16bit.fa7.ha1.sum.layer1.or.bias [1]
140
- arithmetic.ripplecarry16bit.fa7.ha1.sum.layer1.or.weight [2]
141
- arithmetic.ripplecarry16bit.fa7.ha1.sum.layer2.bias [1]
142
- arithmetic.ripplecarry16bit.fa7.ha1.sum.layer2.weight [2]
143
- arithmetic.ripplecarry16bit.fa7.ha2.carry.bias [1]
144
- arithmetic.ripplecarry16bit.fa7.ha2.carry.weight [2]
145
- arithmetic.ripplecarry16bit.fa7.ha2.sum.layer1.nand.bias [1]
146
- arithmetic.ripplecarry16bit.fa7.ha2.sum.layer1.nand.weight [2]
147
- arithmetic.ripplecarry16bit.fa7.ha2.sum.layer1.or.bias [1]
148
- arithmetic.ripplecarry16bit.fa7.ha2.sum.layer1.or.weight [2]
149
- arithmetic.ripplecarry16bit.fa7.ha2.sum.layer2.bias [1]
150
- arithmetic.ripplecarry16bit.fa7.ha2.sum.layer2.weight [2]
151
- arithmetic.ripplecarry16bit.fa8.carry_or.bias [1]
152
- arithmetic.ripplecarry16bit.fa8.carry_or.weight [2]
153
- arithmetic.ripplecarry16bit.fa8.ha1.carry.bias [1]
154
- arithmetic.ripplecarry16bit.fa8.ha1.carry.weight [2]
155
- arithmetic.ripplecarry16bit.fa8.ha1.sum.layer1.nand.bias [1]
156
- arithmetic.ripplecarry16bit.fa8.ha1.sum.layer1.nand.weight [2]
157
- arithmetic.ripplecarry16bit.fa8.ha1.sum.layer1.or.bias [1]
158
- arithmetic.ripplecarry16bit.fa8.ha1.sum.layer1.or.weight [2]
159
- arithmetic.ripplecarry16bit.fa8.ha1.sum.layer2.bias [1]
160
- arithmetic.ripplecarry16bit.fa8.ha1.sum.layer2.weight [2]
161
- arithmetic.ripplecarry16bit.fa8.ha2.carry.bias [1]
162
- arithmetic.ripplecarry16bit.fa8.ha2.carry.weight [2]
163
- arithmetic.ripplecarry16bit.fa8.ha2.sum.layer1.nand.bias [1]
164
- arithmetic.ripplecarry16bit.fa8.ha2.sum.layer1.nand.weight [2]
165
- arithmetic.ripplecarry16bit.fa8.ha2.sum.layer1.or.bias [1]
166
- arithmetic.ripplecarry16bit.fa8.ha2.sum.layer1.or.weight [2]
167
- arithmetic.ripplecarry16bit.fa8.ha2.sum.layer2.bias [1]
168
- arithmetic.ripplecarry16bit.fa8.ha2.sum.layer2.weight [2]
169
- arithmetic.ripplecarry16bit.fa9.carry_or.bias [1]
170
- arithmetic.ripplecarry16bit.fa9.carry_or.weight [2]
171
- arithmetic.ripplecarry16bit.fa9.ha1.carry.bias [1]
172
- arithmetic.ripplecarry16bit.fa9.ha1.carry.weight [2]
173
- arithmetic.ripplecarry16bit.fa9.ha1.sum.layer1.nand.bias [1]
174
- arithmetic.ripplecarry16bit.fa9.ha1.sum.layer1.nand.weight [2]
175
- arithmetic.ripplecarry16bit.fa9.ha1.sum.layer1.or.bias [1]
176
- arithmetic.ripplecarry16bit.fa9.ha1.sum.layer1.or.weight [2]
177
- arithmetic.ripplecarry16bit.fa9.ha1.sum.layer2.bias [1]
178
- arithmetic.ripplecarry16bit.fa9.ha1.sum.layer2.weight [2]
179
- arithmetic.ripplecarry16bit.fa9.ha2.carry.bias [1]
180
- arithmetic.ripplecarry16bit.fa9.ha2.carry.weight [2]
181
- arithmetic.ripplecarry16bit.fa9.ha2.sum.layer1.nand.bias [1]
182
- arithmetic.ripplecarry16bit.fa9.ha2.sum.layer1.nand.weight [2]
183
- arithmetic.ripplecarry16bit.fa9.ha2.sum.layer1.or.bias [1]
184
- arithmetic.ripplecarry16bit.fa9.ha2.sum.layer1.or.weight [2]
185
- arithmetic.ripplecarry16bit.fa9.ha2.sum.layer2.bias [1]
186
- arithmetic.ripplecarry16bit.fa9.ha2.sum.layer2.weight [2]
187
- arithmetic.ripplecarry16bit.fa10.carry_or.bias [1]
188
- arithmetic.ripplecarry16bit.fa10.carry_or.weight [2]
189
- arithmetic.ripplecarry16bit.fa10.ha1.carry.bias [1]
190
- arithmetic.ripplecarry16bit.fa10.ha1.carry.weight [2]
191
- arithmetic.ripplecarry16bit.fa10.ha1.sum.layer1.nand.bias [1]
192
- arithmetic.ripplecarry16bit.fa10.ha1.sum.layer1.nand.weight [2]
193
- arithmetic.ripplecarry16bit.fa10.ha1.sum.layer1.or.bias [1]
194
- arithmetic.ripplecarry16bit.fa10.ha1.sum.layer1.or.weight [2]
195
- arithmetic.ripplecarry16bit.fa10.ha1.sum.layer2.bias [1]
196
- arithmetic.ripplecarry16bit.fa10.ha1.sum.layer2.weight [2]
197
- arithmetic.ripplecarry16bit.fa10.ha2.carry.bias [1]
198
- arithmetic.ripplecarry16bit.fa10.ha2.carry.weight [2]
199
- arithmetic.ripplecarry16bit.fa10.ha2.sum.layer1.nand.bias [1]
200
- arithmetic.ripplecarry16bit.fa10.ha2.sum.layer1.nand.weight [2]
201
- arithmetic.ripplecarry16bit.fa10.ha2.sum.layer1.or.bias [1]
202
- arithmetic.ripplecarry16bit.fa10.ha2.sum.layer1.or.weight [2]
203
- arithmetic.ripplecarry16bit.fa10.ha2.sum.layer2.bias [1]
204
- arithmetic.ripplecarry16bit.fa10.ha2.sum.layer2.weight [2]
205
- arithmetic.ripplecarry16bit.fa11.carry_or.bias [1]
206
- arithmetic.ripplecarry16bit.fa11.carry_or.weight [2]
207
- arithmetic.ripplecarry16bit.fa11.ha1.carry.bias [1]
208
- arithmetic.ripplecarry16bit.fa11.ha1.carry.weight [2]
209
- arithmetic.ripplecarry16bit.fa11.ha1.sum.layer1.nand.bias [1]
210
- arithmetic.ripplecarry16bit.fa11.ha1.sum.layer1.nand.weight [2]
211
- arithmetic.ripplecarry16bit.fa11.ha1.sum.layer1.or.bias [1]
212
- arithmetic.ripplecarry16bit.fa11.ha1.sum.layer1.or.weight [2]
213
- arithmetic.ripplecarry16bit.fa11.ha1.sum.layer2.bias [1]
214
- arithmetic.ripplecarry16bit.fa11.ha1.sum.layer2.weight [2]
215
- arithmetic.ripplecarry16bit.fa11.ha2.carry.bias [1]
216
- arithmetic.ripplecarry16bit.fa11.ha2.carry.weight [2]
217
- arithmetic.ripplecarry16bit.fa11.ha2.sum.layer1.nand.bias [1]
218
- arithmetic.ripplecarry16bit.fa11.ha2.sum.layer1.nand.weight [2]
219
- arithmetic.ripplecarry16bit.fa11.ha2.sum.layer1.or.bias [1]
220
- arithmetic.ripplecarry16bit.fa11.ha2.sum.layer1.or.weight [2]
221
- arithmetic.ripplecarry16bit.fa11.ha2.sum.layer2.bias [1]
222
- arithmetic.ripplecarry16bit.fa11.ha2.sum.layer2.weight [2]
223
- arithmetic.ripplecarry16bit.fa12.carry_or.bias [1]
224
- arithmetic.ripplecarry16bit.fa12.carry_or.weight [2]
225
- arithmetic.ripplecarry16bit.fa12.ha1.carry.bias [1]
226
- arithmetic.ripplecarry16bit.fa12.ha1.carry.weight [2]
227
- arithmetic.ripplecarry16bit.fa12.ha1.sum.layer1.nand.bias [1]
228
- arithmetic.ripplecarry16bit.fa12.ha1.sum.layer1.nand.weight [2]
229
- arithmetic.ripplecarry16bit.fa12.ha1.sum.layer1.or.bias [1]
230
- arithmetic.ripplecarry16bit.fa12.ha1.sum.layer1.or.weight [2]
231
- arithmetic.ripplecarry16bit.fa12.ha1.sum.layer2.bias [1]
232
- arithmetic.ripplecarry16bit.fa12.ha1.sum.layer2.weight [2]
233
- arithmetic.ripplecarry16bit.fa12.ha2.carry.bias [1]
234
- arithmetic.ripplecarry16bit.fa12.ha2.carry.weight [2]
235
- arithmetic.ripplecarry16bit.fa12.ha2.sum.layer1.nand.bias [1]
236
- arithmetic.ripplecarry16bit.fa12.ha2.sum.layer1.nand.weight [2]
237
- arithmetic.ripplecarry16bit.fa12.ha2.sum.layer1.or.bias [1]
238
- arithmetic.ripplecarry16bit.fa12.ha2.sum.layer1.or.weight [2]
239
- arithmetic.ripplecarry16bit.fa12.ha2.sum.layer2.bias [1]
240
- arithmetic.ripplecarry16bit.fa12.ha2.sum.layer2.weight [2]
241
- arithmetic.ripplecarry16bit.fa13.carry_or.bias [1]
242
- arithmetic.ripplecarry16bit.fa13.carry_or.weight [2]
243
- arithmetic.ripplecarry16bit.fa13.ha1.carry.bias [1]
244
- arithmetic.ripplecarry16bit.fa13.ha1.carry.weight [2]
245
- arithmetic.ripplecarry16bit.fa13.ha1.sum.layer1.nand.bias [1]
246
- arithmetic.ripplecarry16bit.fa13.ha1.sum.layer1.nand.weight [2]
247
- arithmetic.ripplecarry16bit.fa13.ha1.sum.layer1.or.bias [1]
248
- arithmetic.ripplecarry16bit.fa13.ha1.sum.layer1.or.weight [2]
249
- arithmetic.ripplecarry16bit.fa13.ha1.sum.layer2.bias [1]
250
- arithmetic.ripplecarry16bit.fa13.ha1.sum.layer2.weight [2]
251
- arithmetic.ripplecarry16bit.fa13.ha2.carry.bias [1]
252
- arithmetic.ripplecarry16bit.fa13.ha2.carry.weight [2]
253
- arithmetic.ripplecarry16bit.fa13.ha2.sum.layer1.nand.bias [1]
254
- arithmetic.ripplecarry16bit.fa13.ha2.sum.layer1.nand.weight [2]
255
- arithmetic.ripplecarry16bit.fa13.ha2.sum.layer1.or.bias [1]
256
- arithmetic.ripplecarry16bit.fa13.ha2.sum.layer1.or.weight [2]
257
- arithmetic.ripplecarry16bit.fa13.ha2.sum.layer2.bias [1]
258
- arithmetic.ripplecarry16bit.fa13.ha2.sum.layer2.weight [2]
259
- arithmetic.ripplecarry16bit.fa14.carry_or.bias [1]
260
- arithmetic.ripplecarry16bit.fa14.carry_or.weight [2]
261
- arithmetic.ripplecarry16bit.fa14.ha1.carry.bias [1]
262
- arithmetic.ripplecarry16bit.fa14.ha1.carry.weight [2]
263
- arithmetic.ripplecarry16bit.fa14.ha1.sum.layer1.nand.bias [1]
264
- arithmetic.ripplecarry16bit.fa14.ha1.sum.layer1.nand.weight [2]
265
- arithmetic.ripplecarry16bit.fa14.ha1.sum.layer1.or.bias [1]
266
- arithmetic.ripplecarry16bit.fa14.ha1.sum.layer1.or.weight [2]
267
- arithmetic.ripplecarry16bit.fa14.ha1.sum.layer2.bias [1]
268
- arithmetic.ripplecarry16bit.fa14.ha1.sum.layer2.weight [2]
269
- arithmetic.ripplecarry16bit.fa14.ha2.carry.bias [1]
270
- arithmetic.ripplecarry16bit.fa14.ha2.carry.weight [2]
271
- arithmetic.ripplecarry16bit.fa14.ha2.sum.layer1.nand.bias [1]
272
- arithmetic.ripplecarry16bit.fa14.ha2.sum.layer1.nand.weight [2]
273
- arithmetic.ripplecarry16bit.fa14.ha2.sum.layer1.or.bias [1]
274
- arithmetic.ripplecarry16bit.fa14.ha2.sum.layer1.or.weight [2]
275
- arithmetic.ripplecarry16bit.fa14.ha2.sum.layer2.bias [1]
276
- arithmetic.ripplecarry16bit.fa14.ha2.sum.layer2.weight [2]
277
- arithmetic.ripplecarry16bit.fa15.carry_or.bias [1]
278
- arithmetic.ripplecarry16bit.fa15.carry_or.weight [2]
279
- arithmetic.ripplecarry16bit.fa15.ha1.carry.bias [1]
280
- arithmetic.ripplecarry16bit.fa15.ha1.carry.weight [2]
281
- arithmetic.ripplecarry16bit.fa15.ha1.sum.layer1.nand.bias [1]
282
- arithmetic.ripplecarry16bit.fa15.ha1.sum.layer1.nand.weight [2]
283
- arithmetic.ripplecarry16bit.fa15.ha1.sum.layer1.or.bias [1]
284
- arithmetic.ripplecarry16bit.fa15.ha1.sum.layer1.or.weight [2]
285
- arithmetic.ripplecarry16bit.fa15.ha1.sum.layer2.bias [1]
286
- arithmetic.ripplecarry16bit.fa15.ha1.sum.layer2.weight [2]
287
- arithmetic.ripplecarry16bit.fa15.ha2.carry.bias [1]
288
- arithmetic.ripplecarry16bit.fa15.ha2.carry.weight [2]
289
- arithmetic.ripplecarry16bit.fa15.ha2.sum.layer1.nand.bias [1]
290
- arithmetic.ripplecarry16bit.fa15.ha2.sum.layer1.nand.weight [2]
291
- arithmetic.ripplecarry16bit.fa15.ha2.sum.layer1.or.bias [1]
292
- arithmetic.ripplecarry16bit.fa15.ha2.sum.layer1.or.weight [2]
293
- arithmetic.ripplecarry16bit.fa15.ha2.sum.layer2.bias [1]
294
- arithmetic.ripplecarry16bit.fa15.ha2.sum.layer2.weight [2]
295
-
296
- 16-bit comparators
297
- arithmetic.greaterthan16bit.comparator [16]
298
- arithmetic.lessthan16bit.comparator [16]
299
-
300
- 16x16 multiplier (14 stages, bits 0-30 per stage where applicable)
301
-
302
- Stage 0: bits 0-16
303
- Stage 1: bits 0-17
304
- Stage 2: bits 0-18
305
- ...
306
- Stage 13: bits 0-30
307
-
308
- Each bit position has the same full adder structure. Total enumeration:
309
-
310
- arithmetic.multiplier16x16.stage0.bit0.carry_or.bias [1]
311
- arithmetic.multiplier16x16.stage0.bit0.carry_or.weight [2]
312
- arithmetic.multiplier16x16.stage0.bit0.ha1.carry.bias [1]
313
- arithmetic.multiplier16x16.stage0.bit0.ha1.carry.weight [2]
314
- arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer1.nand.bias [1]
315
- arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer1.nand.weight [2]
316
- arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer1.or.bias [1]
317
- arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer1.or.weight [2]
318
- arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer2.bias [1]
319
- arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer2.weight [2]
320
- arithmetic.multiplier16x16.stage0.bit0.ha2.carry.bias [1]
321
- arithmetic.multiplier16x16.stage0.bit0.ha2.carry.weight [2]
322
- arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer1.nand.bias [1]
323
- arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer1.nand.weight [2]
324
- arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer1.or.bias [1]
325
- arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer1.or.weight [2]
326
- arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer2.bias [1]
327
- arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer2.weight [2]
328
-
329
- Pattern repeats for:
330
- - stage0: bit0-bit16 (17 bits)
331
- - stage1: bit0-bit17 (18 bits)
332
- - stage2: bit0-bit18 (19 bits)
333
- - stage3: bit0-bit19 (20 bits)
334
- - stage4: bit0-bit20 (21 bits)
335
- - stage5: bit0-bit21 (22 bits)
336
- - stage6: bit0-bit22 (23 bits)
337
- - stage7: bit0-bit23 (24 bits)
338
- - stage8: bit0-bit24 (25 bits)
339
- - stage9: bit0-bit25 (26 bits)
340
- - stage10: bit0-bit26 (27 bits)
341
- - stage11: bit0-bit27 (28 bits)
342
- - stage12: bit0-bit28 (29 bits)
343
- - stage13: bit0-bit29 (30 bits)
344
-
345
- 18 tensors per bit × (17+18+19+20+21+22+23+24+25+26+27+28+29+30) = 18 × 329 = 5922 tensors for multiplier stages.
346
-
347
- Plus 256 AND gates for partial products (16×16):
348
- arithmetic.multiplier16x16.partial.r0c0.bias [1]
349
- arithmetic.multiplier16x16.partial.r0c0.weight [2]
350
- ...through...
351
- arithmetic.multiplier16x16.partial.r15c15.bias [1]
352
- arithmetic.multiplier16x16.partial.r15c15.weight [2]
353
- 256 × 2 = 512 tensors for partial products.
354
-
355
- ---
356
- COMBINATIONAL
357
-
358
- Barrel shifter 16-bit
359
- combinational.barrelshifter16bit.shift [20]
360
-
361
- Decoder 4-to-16
362
- combinational.decoder4to16.out0.bias [1]
363
- combinational.decoder4to16.out0.weight [4]
364
- combinational.decoder4to16.out1.bias [1]
365
- combinational.decoder4to16.out1.weight [4]
366
- combinational.decoder4to16.out2.bias [1]
367
- combinational.decoder4to16.out2.weight [4]
368
- combinational.decoder4to16.out3.bias [1]
369
- combinational.decoder4to16.out3.weight [4]
370
- combinational.decoder4to16.out4.bias [1]
371
- combinational.decoder4to16.out4.weight [4]
372
- combinational.decoder4to16.out5.bias [1]
373
- combinational.decoder4to16.out5.weight [4]
374
- combinational.decoder4to16.out6.bias [1]
375
- combinational.decoder4to16.out6.weight [4]
376
- combinational.decoder4to16.out7.bias [1]
377
- combinational.decoder4to16.out7.weight [4]
378
- combinational.decoder4to16.out8.bias [1]
379
- combinational.decoder4to16.out8.weight [4]
380
- combinational.decoder4to16.out9.bias [1]
381
- combinational.decoder4to16.out9.weight [4]
382
- combinational.decoder4to16.out10.bias [1]
383
- combinational.decoder4to16.out10.weight [4]
384
- combinational.decoder4to16.out11.bias [1]
385
- combinational.decoder4to16.out11.weight [4]
386
- combinational.decoder4to16.out12.bias [1]
387
- combinational.decoder4to16.out12.weight [4]
388
- combinational.decoder4to16.out13.bias [1]
389
- combinational.decoder4to16.out13.weight [4]
390
- combinational.decoder4to16.out14.bias [1]
391
- combinational.decoder4to16.out14.weight [4]
392
- combinational.decoder4to16.out15.bias [1]
393
- combinational.decoder4to16.out15.weight [4]
394
-
395
- Encoder 16-to-4
396
- combinational.encoder16to4.bit0.bias [1]
397
- combinational.encoder16to4.bit0.weight [16]
398
- combinational.encoder16to4.bit1.bias [1]
399
- combinational.encoder16to4.bit1.weight [16]
400
- combinational.encoder16to4.bit2.bias [1]
401
- combinational.encoder16to4.bit2.weight [16]
402
- combinational.encoder16to4.bit3.bias [1]
403
- combinational.encoder16to4.bit3.weight [16]
404
-
405
- Multiplexer 16-to-1
406
- combinational.multiplexer16to1.select [20]
407
-
408
- Demultiplexer 1-to-16
409
- combinational.demultiplexer1to16.decode [5]
410
-
411
- Priority encoder 16-bit
412
- combinational.priorityencoder16bit.priority [16]
413
-
414
- ---
415
- CONTROL
416
-
417
- Unconditional jump 16-bit
418
- control.jump.bit0.bias [1]
419
- control.jump.bit0.weight [1]
420
- control.jump.bit1.bias [1]
421
- control.jump.bit1.weight [1]
422
- control.jump.bit2.bias [1]
423
- control.jump.bit2.weight [1]
424
- control.jump.bit3.bias [1]
425
- control.jump.bit3.weight [1]
426
- control.jump.bit4.bias [1]
427
- control.jump.bit4.weight [1]
428
- control.jump.bit5.bias [1]
429
- control.jump.bit5.weight [1]
430
- control.jump.bit6.bias [1]
431
- control.jump.bit6.weight [1]
432
- control.jump.bit7.bias [1]
433
- control.jump.bit7.weight [1]
434
- control.jump.bit8.bias [1]
435
- control.jump.bit8.weight [1]
436
- control.jump.bit9.bias [1]
437
- control.jump.bit9.weight [1]
438
- control.jump.bit10.bias [1]
439
- control.jump.bit10.weight [1]
440
- control.jump.bit11.bias [1]
441
- control.jump.bit11.weight [1]
442
- control.jump.bit12.bias [1]
443
- control.jump.bit12.weight [1]
444
- control.jump.bit13.bias [1]
445
- control.jump.bit13.weight [1]
446
- control.jump.bit14.bias [1]
447
- control.jump.bit14.weight [1]
448
- control.jump.bit15.bias [1]
449
- control.jump.bit15.weight [1]
450
-
451
- Conditional jump 16-bit (template for JZ, JNZ, JC, JNC, JN, JP, JV, JNV, and generic conditionaljump)
452
-
453
- Each conditional jump type follows this pattern for bits 0-15:
454
- control.{jtype}.bit{N}.and_a.bias [1]
455
- control.{jtype}.bit{N}.and_a.weight [2]
456
- control.{jtype}.bit{N}.and_b.bias [1]
457
- control.{jtype}.bit{N}.and_b.weight [2]
458
- control.{jtype}.bit{N}.not_sel.bias [1]
459
- control.{jtype}.bit{N}.not_sel.weight [1]
460
- control.{jtype}.bit{N}.or.bias [1]
461
- control.{jtype}.bit{N}.or.weight [2]
462
-
463
- Where {jtype} ∈ {conditionaljump, jz, jnz, jc, jnc, jn, jp, jv, jnv} and N ∈ {0..15}
464
-
465
- Full expansion for control.jz (others follow same pattern):
466
- control.jz.bit0.and_a.bias [1]
467
- control.jz.bit0.and_a.weight [2]
468
- control.jz.bit0.and_b.bias [1]
469
- control.jz.bit0.and_b.weight [2]
470
- control.jz.bit0.not_sel.bias [1]
471
- control.jz.bit0.not_sel.weight [1]
472
- control.jz.bit0.or.bias [1]
473
- control.jz.bit0.or.weight [2]
474
- control.jz.bit1.and_a.bias [1]
475
- control.jz.bit1.and_a.weight [2]
476
- control.jz.bit1.and_b.bias [1]
477
- control.jz.bit1.and_b.weight [2]
478
- control.jz.bit1.not_sel.bias [1]
479
- control.jz.bit1.not_sel.weight [1]
480
- control.jz.bit1.or.bias [1]
481
- control.jz.bit1.or.weight [2]
482
- control.jz.bit2.and_a.bias [1]
483
- control.jz.bit2.and_a.weight [2]
484
- control.jz.bit2.and_b.bias [1]
485
- control.jz.bit2.and_b.weight [2]
486
- control.jz.bit2.not_sel.bias [1]
487
- control.jz.bit2.not_sel.weight [1]
488
- control.jz.bit2.or.bias [1]
489
- control.jz.bit2.or.weight [2]
490
- control.jz.bit3.and_a.bias [1]
491
- control.jz.bit3.and_a.weight [2]
492
- control.jz.bit3.and_b.bias [1]
493
- control.jz.bit3.and_b.weight [2]
494
- control.jz.bit3.not_sel.bias [1]
495
- control.jz.bit3.not_sel.weight [1]
496
- control.jz.bit3.or.bias [1]
497
- control.jz.bit3.or.weight [2]
498
- control.jz.bit4.and_a.bias [1]
499
- control.jz.bit4.and_a.weight [2]
500
- control.jz.bit4.and_b.bias [1]
501
- control.jz.bit4.and_b.weight [2]
502
- control.jz.bit4.not_sel.bias [1]
503
- control.jz.bit4.not_sel.weight [1]
504
- control.jz.bit4.or.bias [1]
505
- control.jz.bit4.or.weight [2]
506
- control.jz.bit5.and_a.bias [1]
507
- control.jz.bit5.and_a.weight [2]
508
- control.jz.bit5.and_b.bias [1]
509
- control.jz.bit5.and_b.weight [2]
510
- control.jz.bit5.not_sel.bias [1]
511
- control.jz.bit5.not_sel.weight [1]
512
- control.jz.bit5.or.bias [1]
513
- control.jz.bit5.or.weight [2]
514
- control.jz.bit6.and_a.bias [1]
515
- control.jz.bit6.and_a.weight [2]
516
- control.jz.bit6.and_b.bias [1]
517
- control.jz.bit6.and_b.weight [2]
518
- control.jz.bit6.not_sel.bias [1]
519
- control.jz.bit6.not_sel.weight [1]
520
- control.jz.bit6.or.bias [1]
521
- control.jz.bit6.or.weight [2]
522
- control.jz.bit7.and_a.bias [1]
523
- control.jz.bit7.and_a.weight [2]
524
- control.jz.bit7.and_b.bias [1]
525
- control.jz.bit7.and_b.weight [2]
526
- control.jz.bit7.not_sel.bias [1]
527
- control.jz.bit7.not_sel.weight [1]
528
- control.jz.bit7.or.bias [1]
529
- control.jz.bit7.or.weight [2]
530
- control.jz.bit8.and_a.bias [1]
531
- control.jz.bit8.and_a.weight [2]
532
- control.jz.bit8.and_b.bias [1]
533
- control.jz.bit8.and_b.weight [2]
534
- control.jz.bit8.not_sel.bias [1]
535
- control.jz.bit8.not_sel.weight [1]
536
- control.jz.bit8.or.bias [1]
537
- control.jz.bit8.or.weight [2]
538
- control.jz.bit9.and_a.bias [1]
539
- control.jz.bit9.and_a.weight [2]
540
- control.jz.bit9.and_b.bias [1]
541
- control.jz.bit9.and_b.weight [2]
542
- control.jz.bit9.not_sel.bias [1]
543
- control.jz.bit9.not_sel.weight [1]
544
- control.jz.bit9.or.bias [1]
545
- control.jz.bit9.or.weight [2]
546
- control.jz.bit10.and_a.bias [1]
547
- control.jz.bit10.and_a.weight [2]
548
- control.jz.bit10.and_b.bias [1]
549
- control.jz.bit10.and_b.weight [2]
550
- control.jz.bit10.not_sel.bias [1]
551
- control.jz.bit10.not_sel.weight [1]
552
- control.jz.bit10.or.bias [1]
553
- control.jz.bit10.or.weight [2]
554
- control.jz.bit11.and_a.bias [1]
555
- control.jz.bit11.and_a.weight [2]
556
- control.jz.bit11.and_b.bias [1]
557
- control.jz.bit11.and_b.weight [2]
558
- control.jz.bit11.not_sel.bias [1]
559
- control.jz.bit11.not_sel.weight [1]
560
- control.jz.bit11.or.bias [1]
561
- control.jz.bit11.or.weight [2]
562
- control.jz.bit12.and_a.bias [1]
563
- control.jz.bit12.and_a.weight [2]
564
- control.jz.bit12.and_b.bias [1]
565
- control.jz.bit12.and_b.weight [2]
566
- control.jz.bit12.not_sel.bias [1]
567
- control.jz.bit12.not_sel.weight [1]
568
- control.jz.bit12.or.bias [1]
569
- control.jz.bit12.or.weight [2]
570
- control.jz.bit13.and_a.bias [1]
571
- control.jz.bit13.and_a.weight [2]
572
- control.jz.bit13.and_b.bias [1]
573
- control.jz.bit13.and_b.weight [2]
574
- control.jz.bit13.not_sel.bias [1]
575
- control.jz.bit13.not_sel.weight [1]
576
- control.jz.bit13.or.bias [1]
577
- control.jz.bit13.or.weight [2]
578
- control.jz.bit14.and_a.bias [1]
579
- control.jz.bit14.and_a.weight [2]
580
- control.jz.bit14.and_b.bias [1]
581
- control.jz.bit14.and_b.weight [2]
582
- control.jz.bit14.not_sel.bias [1]
583
- control.jz.bit14.not_sel.weight [1]
584
- control.jz.bit14.or.bias [1]
585
- control.jz.bit14.or.weight [2]
586
- control.jz.bit15.and_a.bias [1]
587
- control.jz.bit15.and_a.weight [2]
588
- control.jz.bit15.and_b.bias [1]
589
- control.jz.bit15.and_b.weight [2]
590
- control.jz.bit15.not_sel.bias [1]
591
- control.jz.bit15.not_sel.weight [1]
592
- control.jz.bit15.or.bias [1]
593
- control.jz.bit15.or.weight [2]
594
-
595
- Repeat above for: jnz, jc, jnc, jn, jp, jv, jnv, conditionaljump (9 types × 16 bits × 8 tensors = 1152 tensors)
596
-
597
- Stack operations (unchanged)
598
- control.call.jump [1]
599
- control.call.push [1]
600
- control.pop.load [1]
601
- control.pop.sp_inc [1]
602
- control.push.sp_dec [1]
603
- control.push.store [1]
604
- control.ret.jump [1]
605
- control.ret.pop [1]
606
- control.sp_dec.uses [1]
607
- control.sp_inc.uses [1]
608
-
609
- ---
610
- ERROR DETECTION
611
-
612
- Checksum 16-bit
613
- error_detection.checksum16bit.sum.bias [1]
614
- error_detection.checksum16bit.sum.weight [16]
615
-
616
- Parity 16-bit
617
- error_detection.evenparitychecker16bit.bias [1]
618
- error_detection.evenparitychecker16bit.weight [16]
619
- error_detection.oddparitychecker16bit.not.bias [1]
620
- error_detection.oddparitychecker16bit.not.weight [1]
621
- error_detection.oddparitychecker16bit.parity.bias [1]
622
- error_detection.oddparitychecker16bit.parity.weight [16]
623
-
624
- Parity checker/generator 16-bit (4 XOR stages instead of 3)
625
- error_detection.paritychecker16bit.output.not.bias [1]
626
- error_detection.paritychecker16bit.output.not.weight [1]
627
- error_detection.paritychecker16bit.stage1.xor0.layer1.nand.bias [1]
628
- error_detection.paritychecker16bit.stage1.xor0.layer1.nand.weight [2]
629
- error_detection.paritychecker16bit.stage1.xor0.layer1.or.bias [1]
630
- error_detection.paritychecker16bit.stage1.xor0.layer1.or.weight [2]
631
- error_detection.paritychecker16bit.stage1.xor0.layer2.bias [1]
632
- error_detection.paritychecker16bit.stage1.xor0.layer2.weight [2]
633
- error_detection.paritychecker16bit.stage1.xor1.layer1.nand.bias [1]
634
- error_detection.paritychecker16bit.stage1.xor1.layer1.nand.weight [2]
635
- error_detection.paritychecker16bit.stage1.xor1.layer1.or.bias [1]
636
- error_detection.paritychecker16bit.stage1.xor1.layer1.or.weight [2]
637
- error_detection.paritychecker16bit.stage1.xor1.layer2.bias [1]
638
- error_detection.paritychecker16bit.stage1.xor1.layer2.weight [2]
639
- error_detection.paritychecker16bit.stage1.xor2.layer1.nand.bias [1]
640
- error_detection.paritychecker16bit.stage1.xor2.layer1.nand.weight [2]
641
- error_detection.paritychecker16bit.stage1.xor2.layer1.or.bias [1]
642
- error_detection.paritychecker16bit.stage1.xor2.layer1.or.weight [2]
643
- error_detection.paritychecker16bit.stage1.xor2.layer2.bias [1]
644
- error_detection.paritychecker16bit.stage1.xor2.layer2.weight [2]
645
- error_detection.paritychecker16bit.stage1.xor3.layer1.nand.bias [1]
646
- error_detection.paritychecker16bit.stage1.xor3.layer1.nand.weight [2]
647
- error_detection.paritychecker16bit.stage1.xor3.layer1.or.bias [1]
648
- error_detection.paritychecker16bit.stage1.xor3.layer1.or.weight [2]
649
- error_detection.paritychecker16bit.stage1.xor3.layer2.bias [1]
650
- error_detection.paritychecker16bit.stage1.xor3.layer2.weight [2]
651
- error_detection.paritychecker16bit.stage1.xor4.layer1.nand.bias [1]
652
- error_detection.paritychecker16bit.stage1.xor4.layer1.nand.weight [2]
653
- error_detection.paritychecker16bit.stage1.xor4.layer1.or.bias [1]
654
- error_detection.paritychecker16bit.stage1.xor4.layer1.or.weight [2]
655
- error_detection.paritychecker16bit.stage1.xor4.layer2.bias [1]
656
- error_detection.paritychecker16bit.stage1.xor4.layer2.weight [2]
657
- error_detection.paritychecker16bit.stage1.xor5.layer1.nand.bias [1]
658
- error_detection.paritychecker16bit.stage1.xor5.layer1.nand.weight [2]
659
- error_detection.paritychecker16bit.stage1.xor5.layer1.or.bias [1]
660
- error_detection.paritychecker16bit.stage1.xor5.layer1.or.weight [2]
661
- error_detection.paritychecker16bit.stage1.xor5.layer2.bias [1]
662
- error_detection.paritychecker16bit.stage1.xor5.layer2.weight [2]
663
- error_detection.paritychecker16bit.stage1.xor6.layer1.nand.bias [1]
664
- error_detection.paritychecker16bit.stage1.xor6.layer1.nand.weight [2]
665
- error_detection.paritychecker16bit.stage1.xor6.layer1.or.bias [1]
666
- error_detection.paritychecker16bit.stage1.xor6.layer1.or.weight [2]
667
- error_detection.paritychecker16bit.stage1.xor6.layer2.bias [1]
668
- error_detection.paritychecker16bit.stage1.xor6.layer2.weight [2]
669
- error_detection.paritychecker16bit.stage1.xor7.layer1.nand.bias [1]
670
- error_detection.paritychecker16bit.stage1.xor7.layer1.nand.weight [2]
671
- error_detection.paritychecker16bit.stage1.xor7.layer1.or.bias [1]
672
- error_detection.paritychecker16bit.stage1.xor7.layer1.or.weight [2]
673
- error_detection.paritychecker16bit.stage1.xor7.layer2.bias [1]
674
- error_detection.paritychecker16bit.stage1.xor7.layer2.weight [2]
675
- error_detection.paritychecker16bit.stage2.xor0.layer1.nand.bias [1]
676
- error_detection.paritychecker16bit.stage2.xor0.layer1.nand.weight [2]
677
- error_detection.paritychecker16bit.stage2.xor0.layer1.or.bias [1]
678
- error_detection.paritychecker16bit.stage2.xor0.layer1.or.weight [2]
679
- error_detection.paritychecker16bit.stage2.xor0.layer2.bias [1]
680
- error_detection.paritychecker16bit.stage2.xor0.layer2.weight [2]
681
- error_detection.paritychecker16bit.stage2.xor1.layer1.nand.bias [1]
682
- error_detection.paritychecker16bit.stage2.xor1.layer1.nand.weight [2]
683
- error_detection.paritychecker16bit.stage2.xor1.layer1.or.bias [1]
684
- error_detection.paritychecker16bit.stage2.xor1.layer1.or.weight [2]
685
- error_detection.paritychecker16bit.stage2.xor1.layer2.bias [1]
686
- error_detection.paritychecker16bit.stage2.xor1.layer2.weight [2]
687
- error_detection.paritychecker16bit.stage2.xor2.layer1.nand.bias [1]
688
- error_detection.paritychecker16bit.stage2.xor2.layer1.nand.weight [2]
689
- error_detection.paritychecker16bit.stage2.xor2.layer1.or.bias [1]
690
- error_detection.paritychecker16bit.stage2.xor2.layer1.or.weight [2]
691
- error_detection.paritychecker16bit.stage2.xor2.layer2.bias [1]
692
- error_detection.paritychecker16bit.stage2.xor2.layer2.weight [2]
693
- error_detection.paritychecker16bit.stage2.xor3.layer1.nand.bias [1]
694
- error_detection.paritychecker16bit.stage2.xor3.layer1.nand.weight [2]
695
- error_detection.paritychecker16bit.stage2.xor3.layer1.or.bias [1]
696
- error_detection.paritychecker16bit.stage2.xor3.layer1.or.weight [2]
697
- error_detection.paritychecker16bit.stage2.xor3.layer2.bias [1]
698
- error_detection.paritychecker16bit.stage2.xor3.layer2.weight [2]
699
- error_detection.paritychecker16bit.stage3.xor0.layer1.nand.bias [1]
700
- error_detection.paritychecker16bit.stage3.xor0.layer1.nand.weight [2]
701
- error_detection.paritychecker16bit.stage3.xor0.layer1.or.bias [1]
702
- error_detection.paritychecker16bit.stage3.xor0.layer1.or.weight [2]
703
- error_detection.paritychecker16bit.stage3.xor0.layer2.bias [1]
704
- error_detection.paritychecker16bit.stage3.xor0.layer2.weight [2]
705
- error_detection.paritychecker16bit.stage3.xor1.layer1.nand.bias [1]
706
- error_detection.paritychecker16bit.stage3.xor1.layer1.nand.weight [2]
707
- error_detection.paritychecker16bit.stage3.xor1.layer1.or.bias [1]
708
- error_detection.paritychecker16bit.stage3.xor1.layer1.or.weight [2]
709
- error_detection.paritychecker16bit.stage3.xor1.layer2.bias [1]
710
- error_detection.paritychecker16bit.stage3.xor1.layer2.weight [2]
711
- error_detection.paritychecker16bit.stage4.xor0.layer1.nand.bias [1]
712
- error_detection.paritychecker16bit.stage4.xor0.layer1.nand.weight [2]
713
- error_detection.paritychecker16bit.stage4.xor0.layer1.or.bias [1]
714
- error_detection.paritychecker16bit.stage4.xor0.layer1.or.weight [2]
715
- error_detection.paritychecker16bit.stage4.xor0.layer2.bias [1]
716
- error_detection.paritychecker16bit.stage4.xor0.layer2.weight [2]
717
-
718
- Identical structure for paritygenerator16bit.
719
-
720
- CRC-16
721
- error_detection.crc16.divisor [17]
722
-
723
- Hamming (15,11) - 11 data bits, 4 parity bits
724
- error_detection.hammingencode11bit.p0.weight [11]
725
- error_detection.hammingencode11bit.p1.bias [1]
726
- error_detection.hammingencode11bit.p1.weight [11]
727
- error_detection.hammingencode11bit.p2.bias [1]
728
- error_detection.hammingencode11bit.p2.weight [11]
729
- error_detection.hammingencode11bit.p3.bias [1]
730
- error_detection.hammingencode11bit.p3.weight [11]
731
- error_detection.hammingencode11bit.p4.bias [1]
732
- error_detection.hammingencode11bit.p4.weight [11]
733
- error_detection.hammingdecode15bit.s1.bias [1]
734
- error_detection.hammingdecode15bit.s1.weight [8]
735
- error_detection.hammingdecode15bit.s2.bias [1]
736
- error_detection.hammingdecode15bit.s2.weight [8]
737
- error_detection.hammingdecode15bit.s3.bias [1]
738
- error_detection.hammingdecode15bit.s3.weight [8]
739
- error_detection.hammingdecode15bit.s4.bias [1]
740
- error_detection.hammingdecode15bit.s4.weight [8]
741
- error_detection.hammingsyndrome15bit.s1.weight [8]
742
- error_detection.hammingsyndrome15bit.s2.weight [8]
743
- error_detection.hammingsyndrome15bit.s3.weight [8]
744
- error_detection.hammingsyndrome15bit.s4.weight [8]
745
-
746
- Longitudinal parity 16-bit
747
- error_detection.longitudinalparity16bit.col_parity [16]
748
- error_detection.longitudinalparity16bit.row_parity [16]
749
-
750
- ---
751
- MODULAR
752
-
753
- For 16-bit inputs, modular arithmetic requires detecting which of ceil(65536/N) ranges the input falls into. Structure per modulus:
754
-
755
- mod2 (simple - just check LSB)
756
- modular.mod2_16bit.bias [1]
757
- modular.mod2_16bit.weight [16]
758
-
759
- mod4 (check 2 LSBs)
760
- modular.mod4_16bit.bias [1]
761
- modular.mod4_16bit.weight [16]
762
-
763
- mod8 (check 3 LSBs)
764
- modular.mod8_16bit.bias [1]
765
- modular.mod8_16bit.weight [16]
766
-
767
- mod16 (check 4 LSBs)
768
- modular.mod16_16bit.bias [1]
769
- modular.mod16_16bit.weight [16]
770
-
771
- For non-power-of-2 moduli (3, 5, 6, 7, 9, 10, 11, 12), use iterative subtraction circuit referencing the 16-bit subtractor and comparator, or expand the range-check approach:
772
-
773
- mod3, mod5, mod6, mod7, mod9, mod10, mod11, mod12 (range-check approach, pattern):
774
- modular.mod{N}_16bit.layer1.geq{K}.bias [1]
775
- modular.mod{N}_16bit.layer1.geq{K}.weight [16]
776
- modular.mod{N}_16bit.layer1.leq{K}.bias [1]
777
- modular.mod{N}_16bit.layer1.leq{K}.weight [16]
778
- modular.mod{N}_16bit.layer2.eq{K}.bias [1]
779
- modular.mod{N}_16bit.layer2.eq{K}.weight [2]
780
- modular.mod{N}_16bit.layer3.or.bias [1]
781
- modular.mod{N}_16bit.layer3.or.weight [R]
782
-
783
- Where R = number of ranges = ceil(65536/N).
784
-
785
- ---
786
- PATTERN RECOGNITION
787
-
788
- pattern_recognition.popcount16bit.bias [1]
789
- pattern_recognition.popcount16bit.weight [16]
790
- pattern_recognition.allones16bit.bias [1]
791
- pattern_recognition.allones16bit.weight [16]
792
- pattern_recognition.allzeros16bit.bias [1]
793
- pattern_recognition.allzeros16bit.weight [16]
794
- pattern_recognition.alternating16bit.pattern1.weight [16]
795
- pattern_recognition.alternating16bit.pattern2.weight [16]
796
- pattern_recognition.hammingdistance16bit.popcount.weight [16]
797
- pattern_recognition.hammingdistance16bit.xor.weight [32]
798
- pattern_recognition.leadingones16bit.weight [16]
799
- pattern_recognition.trailingones16bit.weight [16]
800
- pattern_recognition.runlength16bit.weight [16]
801
- pattern_recognition.onehotdetector16bit.and.bias [1]
802
- pattern_recognition.onehotdetector16bit.and.weight [2]
803
- pattern_recognition.onehotdetector16bit.atleast1.bias [1]
804
- pattern_recognition.onehotdetector16bit.atleast1.weight [16]
805
- pattern_recognition.onehotdetector16bit.atmost1.bias [1]
806
- pattern_recognition.onehotdetector16bit.atmost1.weight [16]
807
- pattern_recognition.symmetry16bit.and.bias [1]
808
- pattern_recognition.symmetry16bit.and.weight [8]
809
- pattern_recognition.symmetry16bit.xnor0.weight [2]
810
- pattern_recognition.symmetry16bit.xnor1.weight [2]
811
- pattern_recognition.symmetry16bit.xnor2.weight [2]
812
- pattern_recognition.symmetry16bit.xnor3.weight [2]
813
- pattern_recognition.symmetry16bit.xnor4.weight [2]
814
- pattern_recognition.symmetry16bit.xnor5.weight [2]
815
- pattern_recognition.symmetry16bit.xnor6.weight [2]
816
- pattern_recognition.symmetry16bit.xnor7.weight [2]
817
-
818
- ---
819
- THRESHOLD
820
-
821
- threshold.alloutof16.bias [1]
822
- threshold.alloutof16.weight [16]
823
- threshold.oneoutof16.bias [1]
824
- threshold.oneoutof16.weight [16]
825
- threshold.twooutof16.bias [1]
826
- threshold.twooutof16.weight [16]
827
- threshold.threeoutof16.bias [1]
828
- threshold.threeoutof16.weight [16]
829
- threshold.fouroutof16.bias [1]
830
- threshold.fouroutof16.weight [16]
831
- threshold.fiveoutof16.bias [1]
832
- threshold.fiveoutof16.weight [16]
833
- threshold.sixoutof16.bias [1]
834
- threshold.sixoutof16.weight [16]
835
- threshold.sevenoutof16.bias [1]
836
- threshold.sevenoutof16.weight [16]
837
- threshold.eightoutof16.bias [1]
838
- threshold.eightoutof16.weight [16]
839
- threshold.nineoutof16.bias [1]
840
- threshold.nineoutof16.weight [16]
841
- threshold.tenoutof16.bias [1]
842
- threshold.tenoutof16.weight [16]
843
- threshold.elevenoutof16.bias [1]
844
- threshold.elevenoutof16.weight [16]
845
- threshold.twelveoutof16.bias [1]
846
- threshold.twelveoutof16.weight [16]
847
- threshold.thirteenoutof16.bias [1]
848
- threshold.thirteenoutof16.weight [16]
849
- threshold.fourteenoutof16.bias [1]
850
- threshold.fourteenoutof16.weight [16]
851
- threshold.fifteenoutof16.bias [1]
852
- threshold.fifteenoutof16.weight [16]
853
- threshold.sixteenoutof16.bias [1]
854
- threshold.sixteenoutof16.weight [16]
855
- threshold.majority16.bias [1]
856
- threshold.majority16.weight [16]
857
- threshold.minority16.bias [1]
858
- threshold.minority16.weight [16]
859
- threshold.atleastk_8_16bit.bias [1]
860
- threshold.atleastk_8_16bit.weight [16]
861
- threshold.atmostk_8_16bit.bias [1]
862
- threshold.atmostk_8_16bit.weight [16]
863
- threshold.exactlyk_8_16bit.and.bias [1]
864
- threshold.exactlyk_8_16bit.and.weight [2]
865
- threshold.exactlyk_8_16bit.atleast.bias [1]
866
- threshold.exactlyk_8_16bit.atleast.weight [16]
867
- threshold.exactlyk_8_16bit.atmost.bias [1]
868
- threshold.exactlyk_8_16bit.atmost.weight [16]
869
-
870
- ---
871
- MANIFEST
872
-
873
- manifest.alu_operations [1]
874
- manifest.flags [1]
875
- manifest.instruction_width [1]
876
- manifest.memory_bytes [1]
877
- manifest.pc_width [1]
878
- manifest.register_width [1]
879
- manifest.registers [1]
880
- manifest.turing_complete [1]
881
- manifest.version [1]
882
-
883
- Values change:
884
- - register_width: 8 → 16
885
- - pc_width: 8 → 16
886
- - memory_bytes: 256 → 65536
887
-
888
- ---
889
- TOTAL NEW TENSOR COUNT
890
-
891
- | Category | Count |
892
- |-------------------------------|-------------------|
893
- | ripplecarry16bit | 288 |
894
- | 16-bit comparators | 2 |
895
- | multiplier16x16 | ~6500 |
896
- | combinational | 45 |
897
- | control (jump + conditionals) | 1184 |
898
- | error_detection | ~200 |
899
- | modular | ~600 |
900
- | pattern_recognition | 45 |
901
- | threshold | 60 |
902
- | manifest | 9 |
903
- | TOTAL | ~8900 new tensors |
904
-
905
  Combined with existing 8-bit tensors retained for backwards compatibility or removed: final 16-bit model ~9000-17000 tensors depending on whether 8-bit components are kept.
 
1
+ 16-BIT TENSOR MANIFEST
2
+
3
+ ---
4
+ ARITHMETIC
5
+
6
+ ripplecarry16bit
7
+ arithmetic.ripplecarry16bit.fa0.carry_or.bias [1]
8
+ arithmetic.ripplecarry16bit.fa0.carry_or.weight [2]
9
+ arithmetic.ripplecarry16bit.fa0.ha1.carry.bias [1]
10
+ arithmetic.ripplecarry16bit.fa0.ha1.carry.weight [2]
11
+ arithmetic.ripplecarry16bit.fa0.ha1.sum.layer1.nand.bias [1]
12
+ arithmetic.ripplecarry16bit.fa0.ha1.sum.layer1.nand.weight [2]
13
+ arithmetic.ripplecarry16bit.fa0.ha1.sum.layer1.or.bias [1]
14
+ arithmetic.ripplecarry16bit.fa0.ha1.sum.layer1.or.weight [2]
15
+ arithmetic.ripplecarry16bit.fa0.ha1.sum.layer2.bias [1]
16
+ arithmetic.ripplecarry16bit.fa0.ha1.sum.layer2.weight [2]
17
+ arithmetic.ripplecarry16bit.fa0.ha2.carry.bias [1]
18
+ arithmetic.ripplecarry16bit.fa0.ha2.carry.weight [2]
19
+ arithmetic.ripplecarry16bit.fa0.ha2.sum.layer1.nand.bias [1]
20
+ arithmetic.ripplecarry16bit.fa0.ha2.sum.layer1.nand.weight [2]
21
+ arithmetic.ripplecarry16bit.fa0.ha2.sum.layer1.or.bias [1]
22
+ arithmetic.ripplecarry16bit.fa0.ha2.sum.layer1.or.weight [2]
23
+ arithmetic.ripplecarry16bit.fa0.ha2.sum.layer2.bias [1]
24
+ arithmetic.ripplecarry16bit.fa0.ha2.sum.layer2.weight [2]
25
+ arithmetic.ripplecarry16bit.fa1.carry_or.bias [1]
26
+ arithmetic.ripplecarry16bit.fa1.carry_or.weight [2]
27
+ arithmetic.ripplecarry16bit.fa1.ha1.carry.bias [1]
28
+ arithmetic.ripplecarry16bit.fa1.ha1.carry.weight [2]
29
+ arithmetic.ripplecarry16bit.fa1.ha1.sum.layer1.nand.bias [1]
30
+ arithmetic.ripplecarry16bit.fa1.ha1.sum.layer1.nand.weight [2]
31
+ arithmetic.ripplecarry16bit.fa1.ha1.sum.layer1.or.bias [1]
32
+ arithmetic.ripplecarry16bit.fa1.ha1.sum.layer1.or.weight [2]
33
+ arithmetic.ripplecarry16bit.fa1.ha1.sum.layer2.bias [1]
34
+ arithmetic.ripplecarry16bit.fa1.ha1.sum.layer2.weight [2]
35
+ arithmetic.ripplecarry16bit.fa1.ha2.carry.bias [1]
36
+ arithmetic.ripplecarry16bit.fa1.ha2.carry.weight [2]
37
+ arithmetic.ripplecarry16bit.fa1.ha2.sum.layer1.nand.bias [1]
38
+ arithmetic.ripplecarry16bit.fa1.ha2.sum.layer1.nand.weight [2]
39
+ arithmetic.ripplecarry16bit.fa1.ha2.sum.layer1.or.bias [1]
40
+ arithmetic.ripplecarry16bit.fa1.ha2.sum.layer1.or.weight [2]
41
+ arithmetic.ripplecarry16bit.fa1.ha2.sum.layer2.bias [1]
42
+ arithmetic.ripplecarry16bit.fa1.ha2.sum.layer2.weight [2]
43
+ arithmetic.ripplecarry16bit.fa2.carry_or.bias [1]
44
+ arithmetic.ripplecarry16bit.fa2.carry_or.weight [2]
45
+ arithmetic.ripplecarry16bit.fa2.ha1.carry.bias [1]
46
+ arithmetic.ripplecarry16bit.fa2.ha1.carry.weight [2]
47
+ arithmetic.ripplecarry16bit.fa2.ha1.sum.layer1.nand.bias [1]
48
+ arithmetic.ripplecarry16bit.fa2.ha1.sum.layer1.nand.weight [2]
49
+ arithmetic.ripplecarry16bit.fa2.ha1.sum.layer1.or.bias [1]
50
+ arithmetic.ripplecarry16bit.fa2.ha1.sum.layer1.or.weight [2]
51
+ arithmetic.ripplecarry16bit.fa2.ha1.sum.layer2.bias [1]
52
+ arithmetic.ripplecarry16bit.fa2.ha1.sum.layer2.weight [2]
53
+ arithmetic.ripplecarry16bit.fa2.ha2.carry.bias [1]
54
+ arithmetic.ripplecarry16bit.fa2.ha2.carry.weight [2]
55
+ arithmetic.ripplecarry16bit.fa2.ha2.sum.layer1.nand.bias [1]
56
+ arithmetic.ripplecarry16bit.fa2.ha2.sum.layer1.nand.weight [2]
57
+ arithmetic.ripplecarry16bit.fa2.ha2.sum.layer1.or.bias [1]
58
+ arithmetic.ripplecarry16bit.fa2.ha2.sum.layer1.or.weight [2]
59
+ arithmetic.ripplecarry16bit.fa2.ha2.sum.layer2.bias [1]
60
+ arithmetic.ripplecarry16bit.fa2.ha2.sum.layer2.weight [2]
61
+ arithmetic.ripplecarry16bit.fa3.carry_or.bias [1]
62
+ arithmetic.ripplecarry16bit.fa3.carry_or.weight [2]
63
+ arithmetic.ripplecarry16bit.fa3.ha1.carry.bias [1]
64
+ arithmetic.ripplecarry16bit.fa3.ha1.carry.weight [2]
65
+ arithmetic.ripplecarry16bit.fa3.ha1.sum.layer1.nand.bias [1]
66
+ arithmetic.ripplecarry16bit.fa3.ha1.sum.layer1.nand.weight [2]
67
+ arithmetic.ripplecarry16bit.fa3.ha1.sum.layer1.or.bias [1]
68
+ arithmetic.ripplecarry16bit.fa3.ha1.sum.layer1.or.weight [2]
69
+ arithmetic.ripplecarry16bit.fa3.ha1.sum.layer2.bias [1]
70
+ arithmetic.ripplecarry16bit.fa3.ha1.sum.layer2.weight [2]
71
+ arithmetic.ripplecarry16bit.fa3.ha2.carry.bias [1]
72
+ arithmetic.ripplecarry16bit.fa3.ha2.carry.weight [2]
73
+ arithmetic.ripplecarry16bit.fa3.ha2.sum.layer1.nand.bias [1]
74
+ arithmetic.ripplecarry16bit.fa3.ha2.sum.layer1.nand.weight [2]
75
+ arithmetic.ripplecarry16bit.fa3.ha2.sum.layer1.or.bias [1]
76
+ arithmetic.ripplecarry16bit.fa3.ha2.sum.layer1.or.weight [2]
77
+ arithmetic.ripplecarry16bit.fa3.ha2.sum.layer2.bias [1]
78
+ arithmetic.ripplecarry16bit.fa3.ha2.sum.layer2.weight [2]
79
+ arithmetic.ripplecarry16bit.fa4.carry_or.bias [1]
80
+ arithmetic.ripplecarry16bit.fa4.carry_or.weight [2]
81
+ arithmetic.ripplecarry16bit.fa4.ha1.carry.bias [1]
82
+ arithmetic.ripplecarry16bit.fa4.ha1.carry.weight [2]
83
+ arithmetic.ripplecarry16bit.fa4.ha1.sum.layer1.nand.bias [1]
84
+ arithmetic.ripplecarry16bit.fa4.ha1.sum.layer1.nand.weight [2]
85
+ arithmetic.ripplecarry16bit.fa4.ha1.sum.layer1.or.bias [1]
86
+ arithmetic.ripplecarry16bit.fa4.ha1.sum.layer1.or.weight [2]
87
+ arithmetic.ripplecarry16bit.fa4.ha1.sum.layer2.bias [1]
88
+ arithmetic.ripplecarry16bit.fa4.ha1.sum.layer2.weight [2]
89
+ arithmetic.ripplecarry16bit.fa4.ha2.carry.bias [1]
90
+ arithmetic.ripplecarry16bit.fa4.ha2.carry.weight [2]
91
+ arithmetic.ripplecarry16bit.fa4.ha2.sum.layer1.nand.bias [1]
92
+ arithmetic.ripplecarry16bit.fa4.ha2.sum.layer1.nand.weight [2]
93
+ arithmetic.ripplecarry16bit.fa4.ha2.sum.layer1.or.bias [1]
94
+ arithmetic.ripplecarry16bit.fa4.ha2.sum.layer1.or.weight [2]
95
+ arithmetic.ripplecarry16bit.fa4.ha2.sum.layer2.bias [1]
96
+ arithmetic.ripplecarry16bit.fa4.ha2.sum.layer2.weight [2]
97
+ arithmetic.ripplecarry16bit.fa5.carry_or.bias [1]
98
+ arithmetic.ripplecarry16bit.fa5.carry_or.weight [2]
99
+ arithmetic.ripplecarry16bit.fa5.ha1.carry.bias [1]
100
+ arithmetic.ripplecarry16bit.fa5.ha1.carry.weight [2]
101
+ arithmetic.ripplecarry16bit.fa5.ha1.sum.layer1.nand.bias [1]
102
+ arithmetic.ripplecarry16bit.fa5.ha1.sum.layer1.nand.weight [2]
103
+ arithmetic.ripplecarry16bit.fa5.ha1.sum.layer1.or.bias [1]
104
+ arithmetic.ripplecarry16bit.fa5.ha1.sum.layer1.or.weight [2]
105
+ arithmetic.ripplecarry16bit.fa5.ha1.sum.layer2.bias [1]
106
+ arithmetic.ripplecarry16bit.fa5.ha1.sum.layer2.weight [2]
107
+ arithmetic.ripplecarry16bit.fa5.ha2.carry.bias [1]
108
+ arithmetic.ripplecarry16bit.fa5.ha2.carry.weight [2]
109
+ arithmetic.ripplecarry16bit.fa5.ha2.sum.layer1.nand.bias [1]
110
+ arithmetic.ripplecarry16bit.fa5.ha2.sum.layer1.nand.weight [2]
111
+ arithmetic.ripplecarry16bit.fa5.ha2.sum.layer1.or.bias [1]
112
+ arithmetic.ripplecarry16bit.fa5.ha2.sum.layer1.or.weight [2]
113
+ arithmetic.ripplecarry16bit.fa5.ha2.sum.layer2.bias [1]
114
+ arithmetic.ripplecarry16bit.fa5.ha2.sum.layer2.weight [2]
115
+ arithmetic.ripplecarry16bit.fa6.carry_or.bias [1]
116
+ arithmetic.ripplecarry16bit.fa6.carry_or.weight [2]
117
+ arithmetic.ripplecarry16bit.fa6.ha1.carry.bias [1]
118
+ arithmetic.ripplecarry16bit.fa6.ha1.carry.weight [2]
119
+ arithmetic.ripplecarry16bit.fa6.ha1.sum.layer1.nand.bias [1]
120
+ arithmetic.ripplecarry16bit.fa6.ha1.sum.layer1.nand.weight [2]
121
+ arithmetic.ripplecarry16bit.fa6.ha1.sum.layer1.or.bias [1]
122
+ arithmetic.ripplecarry16bit.fa6.ha1.sum.layer1.or.weight [2]
123
+ arithmetic.ripplecarry16bit.fa6.ha1.sum.layer2.bias [1]
124
+ arithmetic.ripplecarry16bit.fa6.ha1.sum.layer2.weight [2]
125
+ arithmetic.ripplecarry16bit.fa6.ha2.carry.bias [1]
126
+ arithmetic.ripplecarry16bit.fa6.ha2.carry.weight [2]
127
+ arithmetic.ripplecarry16bit.fa6.ha2.sum.layer1.nand.bias [1]
128
+ arithmetic.ripplecarry16bit.fa6.ha2.sum.layer1.nand.weight [2]
129
+ arithmetic.ripplecarry16bit.fa6.ha2.sum.layer1.or.bias [1]
130
+ arithmetic.ripplecarry16bit.fa6.ha2.sum.layer1.or.weight [2]
131
+ arithmetic.ripplecarry16bit.fa6.ha2.sum.layer2.bias [1]
132
+ arithmetic.ripplecarry16bit.fa6.ha2.sum.layer2.weight [2]
133
+ arithmetic.ripplecarry16bit.fa7.carry_or.bias [1]
134
+ arithmetic.ripplecarry16bit.fa7.carry_or.weight [2]
135
+ arithmetic.ripplecarry16bit.fa7.ha1.carry.bias [1]
136
+ arithmetic.ripplecarry16bit.fa7.ha1.carry.weight [2]
137
+ arithmetic.ripplecarry16bit.fa7.ha1.sum.layer1.nand.bias [1]
138
+ arithmetic.ripplecarry16bit.fa7.ha1.sum.layer1.nand.weight [2]
139
+ arithmetic.ripplecarry16bit.fa7.ha1.sum.layer1.or.bias [1]
140
+ arithmetic.ripplecarry16bit.fa7.ha1.sum.layer1.or.weight [2]
141
+ arithmetic.ripplecarry16bit.fa7.ha1.sum.layer2.bias [1]
142
+ arithmetic.ripplecarry16bit.fa7.ha1.sum.layer2.weight [2]
143
+ arithmetic.ripplecarry16bit.fa7.ha2.carry.bias [1]
144
+ arithmetic.ripplecarry16bit.fa7.ha2.carry.weight [2]
145
+ arithmetic.ripplecarry16bit.fa7.ha2.sum.layer1.nand.bias [1]
146
+ arithmetic.ripplecarry16bit.fa7.ha2.sum.layer1.nand.weight [2]
147
+ arithmetic.ripplecarry16bit.fa7.ha2.sum.layer1.or.bias [1]
148
+ arithmetic.ripplecarry16bit.fa7.ha2.sum.layer1.or.weight [2]
149
+ arithmetic.ripplecarry16bit.fa7.ha2.sum.layer2.bias [1]
150
+ arithmetic.ripplecarry16bit.fa7.ha2.sum.layer2.weight [2]
151
+ arithmetic.ripplecarry16bit.fa8.carry_or.bias [1]
152
+ arithmetic.ripplecarry16bit.fa8.carry_or.weight [2]
153
+ arithmetic.ripplecarry16bit.fa8.ha1.carry.bias [1]
154
+ arithmetic.ripplecarry16bit.fa8.ha1.carry.weight [2]
155
+ arithmetic.ripplecarry16bit.fa8.ha1.sum.layer1.nand.bias [1]
156
+ arithmetic.ripplecarry16bit.fa8.ha1.sum.layer1.nand.weight [2]
157
+ arithmetic.ripplecarry16bit.fa8.ha1.sum.layer1.or.bias [1]
158
+ arithmetic.ripplecarry16bit.fa8.ha1.sum.layer1.or.weight [2]
159
+ arithmetic.ripplecarry16bit.fa8.ha1.sum.layer2.bias [1]
160
+ arithmetic.ripplecarry16bit.fa8.ha1.sum.layer2.weight [2]
161
+ arithmetic.ripplecarry16bit.fa8.ha2.carry.bias [1]
162
+ arithmetic.ripplecarry16bit.fa8.ha2.carry.weight [2]
163
+ arithmetic.ripplecarry16bit.fa8.ha2.sum.layer1.nand.bias [1]
164
+ arithmetic.ripplecarry16bit.fa8.ha2.sum.layer1.nand.weight [2]
165
+ arithmetic.ripplecarry16bit.fa8.ha2.sum.layer1.or.bias [1]
166
+ arithmetic.ripplecarry16bit.fa8.ha2.sum.layer1.or.weight [2]
167
+ arithmetic.ripplecarry16bit.fa8.ha2.sum.layer2.bias [1]
168
+ arithmetic.ripplecarry16bit.fa8.ha2.sum.layer2.weight [2]
169
+ arithmetic.ripplecarry16bit.fa9.carry_or.bias [1]
170
+ arithmetic.ripplecarry16bit.fa9.carry_or.weight [2]
171
+ arithmetic.ripplecarry16bit.fa9.ha1.carry.bias [1]
172
+ arithmetic.ripplecarry16bit.fa9.ha1.carry.weight [2]
173
+ arithmetic.ripplecarry16bit.fa9.ha1.sum.layer1.nand.bias [1]
174
+ arithmetic.ripplecarry16bit.fa9.ha1.sum.layer1.nand.weight [2]
175
+ arithmetic.ripplecarry16bit.fa9.ha1.sum.layer1.or.bias [1]
176
+ arithmetic.ripplecarry16bit.fa9.ha1.sum.layer1.or.weight [2]
177
+ arithmetic.ripplecarry16bit.fa9.ha1.sum.layer2.bias [1]
178
+ arithmetic.ripplecarry16bit.fa9.ha1.sum.layer2.weight [2]
179
+ arithmetic.ripplecarry16bit.fa9.ha2.carry.bias [1]
180
+ arithmetic.ripplecarry16bit.fa9.ha2.carry.weight [2]
181
+ arithmetic.ripplecarry16bit.fa9.ha2.sum.layer1.nand.bias [1]
182
+ arithmetic.ripplecarry16bit.fa9.ha2.sum.layer1.nand.weight [2]
183
+ arithmetic.ripplecarry16bit.fa9.ha2.sum.layer1.or.bias [1]
184
+ arithmetic.ripplecarry16bit.fa9.ha2.sum.layer1.or.weight [2]
185
+ arithmetic.ripplecarry16bit.fa9.ha2.sum.layer2.bias [1]
186
+ arithmetic.ripplecarry16bit.fa9.ha2.sum.layer2.weight [2]
187
+ arithmetic.ripplecarry16bit.fa10.carry_or.bias [1]
188
+ arithmetic.ripplecarry16bit.fa10.carry_or.weight [2]
189
+ arithmetic.ripplecarry16bit.fa10.ha1.carry.bias [1]
190
+ arithmetic.ripplecarry16bit.fa10.ha1.carry.weight [2]
191
+ arithmetic.ripplecarry16bit.fa10.ha1.sum.layer1.nand.bias [1]
192
+ arithmetic.ripplecarry16bit.fa10.ha1.sum.layer1.nand.weight [2]
193
+ arithmetic.ripplecarry16bit.fa10.ha1.sum.layer1.or.bias [1]
194
+ arithmetic.ripplecarry16bit.fa10.ha1.sum.layer1.or.weight [2]
195
+ arithmetic.ripplecarry16bit.fa10.ha1.sum.layer2.bias [1]
196
+ arithmetic.ripplecarry16bit.fa10.ha1.sum.layer2.weight [2]
197
+ arithmetic.ripplecarry16bit.fa10.ha2.carry.bias [1]
198
+ arithmetic.ripplecarry16bit.fa10.ha2.carry.weight [2]
199
+ arithmetic.ripplecarry16bit.fa10.ha2.sum.layer1.nand.bias [1]
200
+ arithmetic.ripplecarry16bit.fa10.ha2.sum.layer1.nand.weight [2]
201
+ arithmetic.ripplecarry16bit.fa10.ha2.sum.layer1.or.bias [1]
202
+ arithmetic.ripplecarry16bit.fa10.ha2.sum.layer1.or.weight [2]
203
+ arithmetic.ripplecarry16bit.fa10.ha2.sum.layer2.bias [1]
204
+ arithmetic.ripplecarry16bit.fa10.ha2.sum.layer2.weight [2]
205
+ arithmetic.ripplecarry16bit.fa11.carry_or.bias [1]
206
+ arithmetic.ripplecarry16bit.fa11.carry_or.weight [2]
207
+ arithmetic.ripplecarry16bit.fa11.ha1.carry.bias [1]
208
+ arithmetic.ripplecarry16bit.fa11.ha1.carry.weight [2]
209
+ arithmetic.ripplecarry16bit.fa11.ha1.sum.layer1.nand.bias [1]
210
+ arithmetic.ripplecarry16bit.fa11.ha1.sum.layer1.nand.weight [2]
211
+ arithmetic.ripplecarry16bit.fa11.ha1.sum.layer1.or.bias [1]
212
+ arithmetic.ripplecarry16bit.fa11.ha1.sum.layer1.or.weight [2]
213
+ arithmetic.ripplecarry16bit.fa11.ha1.sum.layer2.bias [1]
214
+ arithmetic.ripplecarry16bit.fa11.ha1.sum.layer2.weight [2]
215
+ arithmetic.ripplecarry16bit.fa11.ha2.carry.bias [1]
216
+ arithmetic.ripplecarry16bit.fa11.ha2.carry.weight [2]
217
+ arithmetic.ripplecarry16bit.fa11.ha2.sum.layer1.nand.bias [1]
218
+ arithmetic.ripplecarry16bit.fa11.ha2.sum.layer1.nand.weight [2]
219
+ arithmetic.ripplecarry16bit.fa11.ha2.sum.layer1.or.bias [1]
220
+ arithmetic.ripplecarry16bit.fa11.ha2.sum.layer1.or.weight [2]
221
+ arithmetic.ripplecarry16bit.fa11.ha2.sum.layer2.bias [1]
222
+ arithmetic.ripplecarry16bit.fa11.ha2.sum.layer2.weight [2]
223
+ arithmetic.ripplecarry16bit.fa12.carry_or.bias [1]
224
+ arithmetic.ripplecarry16bit.fa12.carry_or.weight [2]
225
+ arithmetic.ripplecarry16bit.fa12.ha1.carry.bias [1]
226
+ arithmetic.ripplecarry16bit.fa12.ha1.carry.weight [2]
227
+ arithmetic.ripplecarry16bit.fa12.ha1.sum.layer1.nand.bias [1]
228
+ arithmetic.ripplecarry16bit.fa12.ha1.sum.layer1.nand.weight [2]
229
+ arithmetic.ripplecarry16bit.fa12.ha1.sum.layer1.or.bias [1]
230
+ arithmetic.ripplecarry16bit.fa12.ha1.sum.layer1.or.weight [2]
231
+ arithmetic.ripplecarry16bit.fa12.ha1.sum.layer2.bias [1]
232
+ arithmetic.ripplecarry16bit.fa12.ha1.sum.layer2.weight [2]
233
+ arithmetic.ripplecarry16bit.fa12.ha2.carry.bias [1]
234
+ arithmetic.ripplecarry16bit.fa12.ha2.carry.weight [2]
235
+ arithmetic.ripplecarry16bit.fa12.ha2.sum.layer1.nand.bias [1]
236
+ arithmetic.ripplecarry16bit.fa12.ha2.sum.layer1.nand.weight [2]
237
+ arithmetic.ripplecarry16bit.fa12.ha2.sum.layer1.or.bias [1]
238
+ arithmetic.ripplecarry16bit.fa12.ha2.sum.layer1.or.weight [2]
239
+ arithmetic.ripplecarry16bit.fa12.ha2.sum.layer2.bias [1]
240
+ arithmetic.ripplecarry16bit.fa12.ha2.sum.layer2.weight [2]
241
+ arithmetic.ripplecarry16bit.fa13.carry_or.bias [1]
242
+ arithmetic.ripplecarry16bit.fa13.carry_or.weight [2]
243
+ arithmetic.ripplecarry16bit.fa13.ha1.carry.bias [1]
244
+ arithmetic.ripplecarry16bit.fa13.ha1.carry.weight [2]
245
+ arithmetic.ripplecarry16bit.fa13.ha1.sum.layer1.nand.bias [1]
246
+ arithmetic.ripplecarry16bit.fa13.ha1.sum.layer1.nand.weight [2]
247
+ arithmetic.ripplecarry16bit.fa13.ha1.sum.layer1.or.bias [1]
248
+ arithmetic.ripplecarry16bit.fa13.ha1.sum.layer1.or.weight [2]
249
+ arithmetic.ripplecarry16bit.fa13.ha1.sum.layer2.bias [1]
250
+ arithmetic.ripplecarry16bit.fa13.ha1.sum.layer2.weight [2]
251
+ arithmetic.ripplecarry16bit.fa13.ha2.carry.bias [1]
252
+ arithmetic.ripplecarry16bit.fa13.ha2.carry.weight [2]
253
+ arithmetic.ripplecarry16bit.fa13.ha2.sum.layer1.nand.bias [1]
254
+ arithmetic.ripplecarry16bit.fa13.ha2.sum.layer1.nand.weight [2]
255
+ arithmetic.ripplecarry16bit.fa13.ha2.sum.layer1.or.bias [1]
256
+ arithmetic.ripplecarry16bit.fa13.ha2.sum.layer1.or.weight [2]
257
+ arithmetic.ripplecarry16bit.fa13.ha2.sum.layer2.bias [1]
258
+ arithmetic.ripplecarry16bit.fa13.ha2.sum.layer2.weight [2]
259
+ arithmetic.ripplecarry16bit.fa14.carry_or.bias [1]
260
+ arithmetic.ripplecarry16bit.fa14.carry_or.weight [2]
261
+ arithmetic.ripplecarry16bit.fa14.ha1.carry.bias [1]
262
+ arithmetic.ripplecarry16bit.fa14.ha1.carry.weight [2]
263
+ arithmetic.ripplecarry16bit.fa14.ha1.sum.layer1.nand.bias [1]
264
+ arithmetic.ripplecarry16bit.fa14.ha1.sum.layer1.nand.weight [2]
265
+ arithmetic.ripplecarry16bit.fa14.ha1.sum.layer1.or.bias [1]
266
+ arithmetic.ripplecarry16bit.fa14.ha1.sum.layer1.or.weight [2]
267
+ arithmetic.ripplecarry16bit.fa14.ha1.sum.layer2.bias [1]
268
+ arithmetic.ripplecarry16bit.fa14.ha1.sum.layer2.weight [2]
269
+ arithmetic.ripplecarry16bit.fa14.ha2.carry.bias [1]
270
+ arithmetic.ripplecarry16bit.fa14.ha2.carry.weight [2]
271
+ arithmetic.ripplecarry16bit.fa14.ha2.sum.layer1.nand.bias [1]
272
+ arithmetic.ripplecarry16bit.fa14.ha2.sum.layer1.nand.weight [2]
273
+ arithmetic.ripplecarry16bit.fa14.ha2.sum.layer1.or.bias [1]
274
+ arithmetic.ripplecarry16bit.fa14.ha2.sum.layer1.or.weight [2]
275
+ arithmetic.ripplecarry16bit.fa14.ha2.sum.layer2.bias [1]
276
+ arithmetic.ripplecarry16bit.fa14.ha2.sum.layer2.weight [2]
277
+ arithmetic.ripplecarry16bit.fa15.carry_or.bias [1]
278
+ arithmetic.ripplecarry16bit.fa15.carry_or.weight [2]
279
+ arithmetic.ripplecarry16bit.fa15.ha1.carry.bias [1]
280
+ arithmetic.ripplecarry16bit.fa15.ha1.carry.weight [2]
281
+ arithmetic.ripplecarry16bit.fa15.ha1.sum.layer1.nand.bias [1]
282
+ arithmetic.ripplecarry16bit.fa15.ha1.sum.layer1.nand.weight [2]
283
+ arithmetic.ripplecarry16bit.fa15.ha1.sum.layer1.or.bias [1]
284
+ arithmetic.ripplecarry16bit.fa15.ha1.sum.layer1.or.weight [2]
285
+ arithmetic.ripplecarry16bit.fa15.ha1.sum.layer2.bias [1]
286
+ arithmetic.ripplecarry16bit.fa15.ha1.sum.layer2.weight [2]
287
+ arithmetic.ripplecarry16bit.fa15.ha2.carry.bias [1]
288
+ arithmetic.ripplecarry16bit.fa15.ha2.carry.weight [2]
289
+ arithmetic.ripplecarry16bit.fa15.ha2.sum.layer1.nand.bias [1]
290
+ arithmetic.ripplecarry16bit.fa15.ha2.sum.layer1.nand.weight [2]
291
+ arithmetic.ripplecarry16bit.fa15.ha2.sum.layer1.or.bias [1]
292
+ arithmetic.ripplecarry16bit.fa15.ha2.sum.layer1.or.weight [2]
293
+ arithmetic.ripplecarry16bit.fa15.ha2.sum.layer2.bias [1]
294
+ arithmetic.ripplecarry16bit.fa15.ha2.sum.layer2.weight [2]
295
+
296
+ 16-bit comparators
297
+ arithmetic.greaterthan16bit.comparator [16]
298
+ arithmetic.lessthan16bit.comparator [16]
299
+
300
+ 16x16 multiplier (14 stages, bits 0-30 per stage where applicable)
301
+
302
+ Stage 0: bits 0-16
303
+ Stage 1: bits 0-17
304
+ Stage 2: bits 0-18
305
+ ...
306
+ Stage 13: bits 0-30
307
+
308
+ Each bit position has the same full adder structure. Total enumeration:
309
+
310
+ arithmetic.multiplier16x16.stage0.bit0.carry_or.bias [1]
311
+ arithmetic.multiplier16x16.stage0.bit0.carry_or.weight [2]
312
+ arithmetic.multiplier16x16.stage0.bit0.ha1.carry.bias [1]
313
+ arithmetic.multiplier16x16.stage0.bit0.ha1.carry.weight [2]
314
+ arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer1.nand.bias [1]
315
+ arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer1.nand.weight [2]
316
+ arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer1.or.bias [1]
317
+ arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer1.or.weight [2]
318
+ arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer2.bias [1]
319
+ arithmetic.multiplier16x16.stage0.bit0.ha1.sum.layer2.weight [2]
320
+ arithmetic.multiplier16x16.stage0.bit0.ha2.carry.bias [1]
321
+ arithmetic.multiplier16x16.stage0.bit0.ha2.carry.weight [2]
322
+ arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer1.nand.bias [1]
323
+ arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer1.nand.weight [2]
324
+ arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer1.or.bias [1]
325
+ arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer1.or.weight [2]
326
+ arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer2.bias [1]
327
+ arithmetic.multiplier16x16.stage0.bit0.ha2.sum.layer2.weight [2]
328
+
329
+ Pattern repeats for:
330
+ - stage0: bit0-bit16 (17 bits)
331
+ - stage1: bit0-bit17 (18 bits)
332
+ - stage2: bit0-bit18 (19 bits)
333
+ - stage3: bit0-bit19 (20 bits)
334
+ - stage4: bit0-bit20 (21 bits)
335
+ - stage5: bit0-bit21 (22 bits)
336
+ - stage6: bit0-bit22 (23 bits)
337
+ - stage7: bit0-bit23 (24 bits)
338
+ - stage8: bit0-bit24 (25 bits)
339
+ - stage9: bit0-bit25 (26 bits)
340
+ - stage10: bit0-bit26 (27 bits)
341
+ - stage11: bit0-bit27 (28 bits)
342
+ - stage12: bit0-bit28 (29 bits)
343
+ - stage13: bit0-bit29 (30 bits)
344
+
345
+ 18 tensors per bit × (17+18+19+20+21+22+23+24+25+26+27+28+29+30) = 18 × 329 = 5922 tensors for multiplier stages.
346
+
347
+ Plus 256 AND gates for partial products (16×16):
348
+ arithmetic.multiplier16x16.partial.r0c0.bias [1]
349
+ arithmetic.multiplier16x16.partial.r0c0.weight [2]
350
+ ...through...
351
+ arithmetic.multiplier16x16.partial.r15c15.bias [1]
352
+ arithmetic.multiplier16x16.partial.r15c15.weight [2]
353
+ 256 × 2 = 512 tensors for partial products.
354
+
355
+ ---
356
+ COMBINATIONAL
357
+
358
+ Barrel shifter 16-bit
359
+ combinational.barrelshifter16bit.shift [20]
360
+
361
+ Decoder 4-to-16
362
+ combinational.decoder4to16.out0.bias [1]
363
+ combinational.decoder4to16.out0.weight [4]
364
+ combinational.decoder4to16.out1.bias [1]
365
+ combinational.decoder4to16.out1.weight [4]
366
+ combinational.decoder4to16.out2.bias [1]
367
+ combinational.decoder4to16.out2.weight [4]
368
+ combinational.decoder4to16.out3.bias [1]
369
+ combinational.decoder4to16.out3.weight [4]
370
+ combinational.decoder4to16.out4.bias [1]
371
+ combinational.decoder4to16.out4.weight [4]
372
+ combinational.decoder4to16.out5.bias [1]
373
+ combinational.decoder4to16.out5.weight [4]
374
+ combinational.decoder4to16.out6.bias [1]
375
+ combinational.decoder4to16.out6.weight [4]
376
+ combinational.decoder4to16.out7.bias [1]
377
+ combinational.decoder4to16.out7.weight [4]
378
+ combinational.decoder4to16.out8.bias [1]
379
+ combinational.decoder4to16.out8.weight [4]
380
+ combinational.decoder4to16.out9.bias [1]
381
+ combinational.decoder4to16.out9.weight [4]
382
+ combinational.decoder4to16.out10.bias [1]
383
+ combinational.decoder4to16.out10.weight [4]
384
+ combinational.decoder4to16.out11.bias [1]
385
+ combinational.decoder4to16.out11.weight [4]
386
+ combinational.decoder4to16.out12.bias [1]
387
+ combinational.decoder4to16.out12.weight [4]
388
+ combinational.decoder4to16.out13.bias [1]
389
+ combinational.decoder4to16.out13.weight [4]
390
+ combinational.decoder4to16.out14.bias [1]
391
+ combinational.decoder4to16.out14.weight [4]
392
+ combinational.decoder4to16.out15.bias [1]
393
+ combinational.decoder4to16.out15.weight [4]
394
+
395
+ Encoder 16-to-4
396
+ combinational.encoder16to4.bit0.bias [1]
397
+ combinational.encoder16to4.bit0.weight [16]
398
+ combinational.encoder16to4.bit1.bias [1]
399
+ combinational.encoder16to4.bit1.weight [16]
400
+ combinational.encoder16to4.bit2.bias [1]
401
+ combinational.encoder16to4.bit2.weight [16]
402
+ combinational.encoder16to4.bit3.bias [1]
403
+ combinational.encoder16to4.bit3.weight [16]
404
+
405
+ Multiplexer 16-to-1
406
+ combinational.multiplexer16to1.select [20]
407
+
408
+ Demultiplexer 1-to-16
409
+ combinational.demultiplexer1to16.decode [5]
410
+
411
+ Priority encoder 16-bit
412
+ combinational.priorityencoder16bit.priority [16]
413
+
414
+ ---
415
+ CONTROL
416
+
417
+ Unconditional jump 16-bit
418
+ control.jump.bit0.bias [1]
419
+ control.jump.bit0.weight [1]
420
+ control.jump.bit1.bias [1]
421
+ control.jump.bit1.weight [1]
422
+ control.jump.bit2.bias [1]
423
+ control.jump.bit2.weight [1]
424
+ control.jump.bit3.bias [1]
425
+ control.jump.bit3.weight [1]
426
+ control.jump.bit4.bias [1]
427
+ control.jump.bit4.weight [1]
428
+ control.jump.bit5.bias [1]
429
+ control.jump.bit5.weight [1]
430
+ control.jump.bit6.bias [1]
431
+ control.jump.bit6.weight [1]
432
+ control.jump.bit7.bias [1]
433
+ control.jump.bit7.weight [1]
434
+ control.jump.bit8.bias [1]
435
+ control.jump.bit8.weight [1]
436
+ control.jump.bit9.bias [1]
437
+ control.jump.bit9.weight [1]
438
+ control.jump.bit10.bias [1]
439
+ control.jump.bit10.weight [1]
440
+ control.jump.bit11.bias [1]
441
+ control.jump.bit11.weight [1]
442
+ control.jump.bit12.bias [1]
443
+ control.jump.bit12.weight [1]
444
+ control.jump.bit13.bias [1]
445
+ control.jump.bit13.weight [1]
446
+ control.jump.bit14.bias [1]
447
+ control.jump.bit14.weight [1]
448
+ control.jump.bit15.bias [1]
449
+ control.jump.bit15.weight [1]
450
+
451
+ Conditional jump 16-bit (template for JZ, JNZ, JC, JNC, JN, JP, JV, JNV, and generic conditionaljump)
452
+
453
+ Each conditional jump type follows this pattern for bits 0-15:
454
+ control.{jtype}.bit{N}.and_a.bias [1]
455
+ control.{jtype}.bit{N}.and_a.weight [2]
456
+ control.{jtype}.bit{N}.and_b.bias [1]
457
+ control.{jtype}.bit{N}.and_b.weight [2]
458
+ control.{jtype}.bit{N}.not_sel.bias [1]
459
+ control.{jtype}.bit{N}.not_sel.weight [1]
460
+ control.{jtype}.bit{N}.or.bias [1]
461
+ control.{jtype}.bit{N}.or.weight [2]
462
+
463
+ Where {jtype} ∈ {conditionaljump, jz, jnz, jc, jnc, jn, jp, jv, jnv} and N ∈ {0..15}
464
+
465
+ Full expansion for control.jz (others follow same pattern):
466
+ control.jz.bit0.and_a.bias [1]
467
+ control.jz.bit0.and_a.weight [2]
468
+ control.jz.bit0.and_b.bias [1]
469
+ control.jz.bit0.and_b.weight [2]
470
+ control.jz.bit0.not_sel.bias [1]
471
+ control.jz.bit0.not_sel.weight [1]
472
+ control.jz.bit0.or.bias [1]
473
+ control.jz.bit0.or.weight [2]
474
+ control.jz.bit1.and_a.bias [1]
475
+ control.jz.bit1.and_a.weight [2]
476
+ control.jz.bit1.and_b.bias [1]
477
+ control.jz.bit1.and_b.weight [2]
478
+ control.jz.bit1.not_sel.bias [1]
479
+ control.jz.bit1.not_sel.weight [1]
480
+ control.jz.bit1.or.bias [1]
481
+ control.jz.bit1.or.weight [2]
482
+ control.jz.bit2.and_a.bias [1]
483
+ control.jz.bit2.and_a.weight [2]
484
+ control.jz.bit2.and_b.bias [1]
485
+ control.jz.bit2.and_b.weight [2]
486
+ control.jz.bit2.not_sel.bias [1]
487
+ control.jz.bit2.not_sel.weight [1]
488
+ control.jz.bit2.or.bias [1]
489
+ control.jz.bit2.or.weight [2]
490
+ control.jz.bit3.and_a.bias [1]
491
+ control.jz.bit3.and_a.weight [2]
492
+ control.jz.bit3.and_b.bias [1]
493
+ control.jz.bit3.and_b.weight [2]
494
+ control.jz.bit3.not_sel.bias [1]
495
+ control.jz.bit3.not_sel.weight [1]
496
+ control.jz.bit3.or.bias [1]
497
+ control.jz.bit3.or.weight [2]
498
+ control.jz.bit4.and_a.bias [1]
499
+ control.jz.bit4.and_a.weight [2]
500
+ control.jz.bit4.and_b.bias [1]
501
+ control.jz.bit4.and_b.weight [2]
502
+ control.jz.bit4.not_sel.bias [1]
503
+ control.jz.bit4.not_sel.weight [1]
504
+ control.jz.bit4.or.bias [1]
505
+ control.jz.bit4.or.weight [2]
506
+ control.jz.bit5.and_a.bias [1]
507
+ control.jz.bit5.and_a.weight [2]
508
+ control.jz.bit5.and_b.bias [1]
509
+ control.jz.bit5.and_b.weight [2]
510
+ control.jz.bit5.not_sel.bias [1]
511
+ control.jz.bit5.not_sel.weight [1]
512
+ control.jz.bit5.or.bias [1]
513
+ control.jz.bit5.or.weight [2]
514
+ control.jz.bit6.and_a.bias [1]
515
+ control.jz.bit6.and_a.weight [2]
516
+ control.jz.bit6.and_b.bias [1]
517
+ control.jz.bit6.and_b.weight [2]
518
+ control.jz.bit6.not_sel.bias [1]
519
+ control.jz.bit6.not_sel.weight [1]
520
+ control.jz.bit6.or.bias [1]
521
+ control.jz.bit6.or.weight [2]
522
+ control.jz.bit7.and_a.bias [1]
523
+ control.jz.bit7.and_a.weight [2]
524
+ control.jz.bit7.and_b.bias [1]
525
+ control.jz.bit7.and_b.weight [2]
526
+ control.jz.bit7.not_sel.bias [1]
527
+ control.jz.bit7.not_sel.weight [1]
528
+ control.jz.bit7.or.bias [1]
529
+ control.jz.bit7.or.weight [2]
530
+ control.jz.bit8.and_a.bias [1]
531
+ control.jz.bit8.and_a.weight [2]
532
+ control.jz.bit8.and_b.bias [1]
533
+ control.jz.bit8.and_b.weight [2]
534
+ control.jz.bit8.not_sel.bias [1]
535
+ control.jz.bit8.not_sel.weight [1]
536
+ control.jz.bit8.or.bias [1]
537
+ control.jz.bit8.or.weight [2]
538
+ control.jz.bit9.and_a.bias [1]
539
+ control.jz.bit9.and_a.weight [2]
540
+ control.jz.bit9.and_b.bias [1]
541
+ control.jz.bit9.and_b.weight [2]
542
+ control.jz.bit9.not_sel.bias [1]
543
+ control.jz.bit9.not_sel.weight [1]
544
+ control.jz.bit9.or.bias [1]
545
+ control.jz.bit9.or.weight [2]
546
+ control.jz.bit10.and_a.bias [1]
547
+ control.jz.bit10.and_a.weight [2]
548
+ control.jz.bit10.and_b.bias [1]
549
+ control.jz.bit10.and_b.weight [2]
550
+ control.jz.bit10.not_sel.bias [1]
551
+ control.jz.bit10.not_sel.weight [1]
552
+ control.jz.bit10.or.bias [1]
553
+ control.jz.bit10.or.weight [2]
554
+ control.jz.bit11.and_a.bias [1]
555
+ control.jz.bit11.and_a.weight [2]
556
+ control.jz.bit11.and_b.bias [1]
557
+ control.jz.bit11.and_b.weight [2]
558
+ control.jz.bit11.not_sel.bias [1]
559
+ control.jz.bit11.not_sel.weight [1]
560
+ control.jz.bit11.or.bias [1]
561
+ control.jz.bit11.or.weight [2]
562
+ control.jz.bit12.and_a.bias [1]
563
+ control.jz.bit12.and_a.weight [2]
564
+ control.jz.bit12.and_b.bias [1]
565
+ control.jz.bit12.and_b.weight [2]
566
+ control.jz.bit12.not_sel.bias [1]
567
+ control.jz.bit12.not_sel.weight [1]
568
+ control.jz.bit12.or.bias [1]
569
+ control.jz.bit12.or.weight [2]
570
+ control.jz.bit13.and_a.bias [1]
571
+ control.jz.bit13.and_a.weight [2]
572
+ control.jz.bit13.and_b.bias [1]
573
+ control.jz.bit13.and_b.weight [2]
574
+ control.jz.bit13.not_sel.bias [1]
575
+ control.jz.bit13.not_sel.weight [1]
576
+ control.jz.bit13.or.bias [1]
577
+ control.jz.bit13.or.weight [2]
578
+ control.jz.bit14.and_a.bias [1]
579
+ control.jz.bit14.and_a.weight [2]
580
+ control.jz.bit14.and_b.bias [1]
581
+ control.jz.bit14.and_b.weight [2]
582
+ control.jz.bit14.not_sel.bias [1]
583
+ control.jz.bit14.not_sel.weight [1]
584
+ control.jz.bit14.or.bias [1]
585
+ control.jz.bit14.or.weight [2]
586
+ control.jz.bit15.and_a.bias [1]
587
+ control.jz.bit15.and_a.weight [2]
588
+ control.jz.bit15.and_b.bias [1]
589
+ control.jz.bit15.and_b.weight [2]
590
+ control.jz.bit15.not_sel.bias [1]
591
+ control.jz.bit15.not_sel.weight [1]
592
+ control.jz.bit15.or.bias [1]
593
+ control.jz.bit15.or.weight [2]
594
+
595
+ Repeat above for: jnz, jc, jnc, jn, jp, jv, jnv, conditionaljump (9 types × 16 bits × 8 tensors = 1152 tensors)
596
+
597
+ Stack operations (unchanged)
598
+ control.call.jump [1]
599
+ control.call.push [1]
600
+ control.pop.load [1]
601
+ control.pop.sp_inc [1]
602
+ control.push.sp_dec [1]
603
+ control.push.store [1]
604
+ control.ret.jump [1]
605
+ control.ret.pop [1]
606
+ control.sp_dec.uses [1]
607
+ control.sp_inc.uses [1]
608
+
609
+ ---
610
+ ERROR DETECTION
611
+
612
+ Checksum 16-bit
613
+ error_detection.checksum16bit.sum.bias [1]
614
+ error_detection.checksum16bit.sum.weight [16]
615
+
616
+ Parity 16-bit
617
+ error_detection.evenparitychecker16bit.bias [1]
618
+ error_detection.evenparitychecker16bit.weight [16]
619
+ error_detection.oddparitychecker16bit.not.bias [1]
620
+ error_detection.oddparitychecker16bit.not.weight [1]
621
+ error_detection.oddparitychecker16bit.parity.bias [1]
622
+ error_detection.oddparitychecker16bit.parity.weight [16]
623
+
624
+ Parity checker/generator 16-bit (4 XOR stages instead of 3)
625
+ error_detection.paritychecker16bit.output.not.bias [1]
626
+ error_detection.paritychecker16bit.output.not.weight [1]
627
+ error_detection.paritychecker16bit.stage1.xor0.layer1.nand.bias [1]
628
+ error_detection.paritychecker16bit.stage1.xor0.layer1.nand.weight [2]
629
+ error_detection.paritychecker16bit.stage1.xor0.layer1.or.bias [1]
630
+ error_detection.paritychecker16bit.stage1.xor0.layer1.or.weight [2]
631
+ error_detection.paritychecker16bit.stage1.xor0.layer2.bias [1]
632
+ error_detection.paritychecker16bit.stage1.xor0.layer2.weight [2]
633
+ error_detection.paritychecker16bit.stage1.xor1.layer1.nand.bias [1]
634
+ error_detection.paritychecker16bit.stage1.xor1.layer1.nand.weight [2]
635
+ error_detection.paritychecker16bit.stage1.xor1.layer1.or.bias [1]
636
+ error_detection.paritychecker16bit.stage1.xor1.layer1.or.weight [2]
637
+ error_detection.paritychecker16bit.stage1.xor1.layer2.bias [1]
638
+ error_detection.paritychecker16bit.stage1.xor1.layer2.weight [2]
639
+ error_detection.paritychecker16bit.stage1.xor2.layer1.nand.bias [1]
640
+ error_detection.paritychecker16bit.stage1.xor2.layer1.nand.weight [2]
641
+ error_detection.paritychecker16bit.stage1.xor2.layer1.or.bias [1]
642
+ error_detection.paritychecker16bit.stage1.xor2.layer1.or.weight [2]
643
+ error_detection.paritychecker16bit.stage1.xor2.layer2.bias [1]
644
+ error_detection.paritychecker16bit.stage1.xor2.layer2.weight [2]
645
+ error_detection.paritychecker16bit.stage1.xor3.layer1.nand.bias [1]
646
+ error_detection.paritychecker16bit.stage1.xor3.layer1.nand.weight [2]
647
+ error_detection.paritychecker16bit.stage1.xor3.layer1.or.bias [1]
648
+ error_detection.paritychecker16bit.stage1.xor3.layer1.or.weight [2]
649
+ error_detection.paritychecker16bit.stage1.xor3.layer2.bias [1]
650
+ error_detection.paritychecker16bit.stage1.xor3.layer2.weight [2]
651
+ error_detection.paritychecker16bit.stage1.xor4.layer1.nand.bias [1]
652
+ error_detection.paritychecker16bit.stage1.xor4.layer1.nand.weight [2]
653
+ error_detection.paritychecker16bit.stage1.xor4.layer1.or.bias [1]
654
+ error_detection.paritychecker16bit.stage1.xor4.layer1.or.weight [2]
655
+ error_detection.paritychecker16bit.stage1.xor4.layer2.bias [1]
656
+ error_detection.paritychecker16bit.stage1.xor4.layer2.weight [2]
657
+ error_detection.paritychecker16bit.stage1.xor5.layer1.nand.bias [1]
658
+ error_detection.paritychecker16bit.stage1.xor5.layer1.nand.weight [2]
659
+ error_detection.paritychecker16bit.stage1.xor5.layer1.or.bias [1]
660
+ error_detection.paritychecker16bit.stage1.xor5.layer1.or.weight [2]
661
+ error_detection.paritychecker16bit.stage1.xor5.layer2.bias [1]
662
+ error_detection.paritychecker16bit.stage1.xor5.layer2.weight [2]
663
+ error_detection.paritychecker16bit.stage1.xor6.layer1.nand.bias [1]
664
+ error_detection.paritychecker16bit.stage1.xor6.layer1.nand.weight [2]
665
+ error_detection.paritychecker16bit.stage1.xor6.layer1.or.bias [1]
666
+ error_detection.paritychecker16bit.stage1.xor6.layer1.or.weight [2]
667
+ error_detection.paritychecker16bit.stage1.xor6.layer2.bias [1]
668
+ error_detection.paritychecker16bit.stage1.xor6.layer2.weight [2]
669
+ error_detection.paritychecker16bit.stage1.xor7.layer1.nand.bias [1]
670
+ error_detection.paritychecker16bit.stage1.xor7.layer1.nand.weight [2]
671
+ error_detection.paritychecker16bit.stage1.xor7.layer1.or.bias [1]
672
+ error_detection.paritychecker16bit.stage1.xor7.layer1.or.weight [2]
673
+ error_detection.paritychecker16bit.stage1.xor7.layer2.bias [1]
674
+ error_detection.paritychecker16bit.stage1.xor7.layer2.weight [2]
675
+ error_detection.paritychecker16bit.stage2.xor0.layer1.nand.bias [1]
676
+ error_detection.paritychecker16bit.stage2.xor0.layer1.nand.weight [2]
677
+ error_detection.paritychecker16bit.stage2.xor0.layer1.or.bias [1]
678
+ error_detection.paritychecker16bit.stage2.xor0.layer1.or.weight [2]
679
+ error_detection.paritychecker16bit.stage2.xor0.layer2.bias [1]
680
+ error_detection.paritychecker16bit.stage2.xor0.layer2.weight [2]
681
+ error_detection.paritychecker16bit.stage2.xor1.layer1.nand.bias [1]
682
+ error_detection.paritychecker16bit.stage2.xor1.layer1.nand.weight [2]
683
+ error_detection.paritychecker16bit.stage2.xor1.layer1.or.bias [1]
684
+ error_detection.paritychecker16bit.stage2.xor1.layer1.or.weight [2]
685
+ error_detection.paritychecker16bit.stage2.xor1.layer2.bias [1]
686
+ error_detection.paritychecker16bit.stage2.xor1.layer2.weight [2]
687
+ error_detection.paritychecker16bit.stage2.xor2.layer1.nand.bias [1]
688
+ error_detection.paritychecker16bit.stage2.xor2.layer1.nand.weight [2]
689
+ error_detection.paritychecker16bit.stage2.xor2.layer1.or.bias [1]
690
+ error_detection.paritychecker16bit.stage2.xor2.layer1.or.weight [2]
691
+ error_detection.paritychecker16bit.stage2.xor2.layer2.bias [1]
692
+ error_detection.paritychecker16bit.stage2.xor2.layer2.weight [2]
693
+ error_detection.paritychecker16bit.stage2.xor3.layer1.nand.bias [1]
694
+ error_detection.paritychecker16bit.stage2.xor3.layer1.nand.weight [2]
695
+ error_detection.paritychecker16bit.stage2.xor3.layer1.or.bias [1]
696
+ error_detection.paritychecker16bit.stage2.xor3.layer1.or.weight [2]
697
+ error_detection.paritychecker16bit.stage2.xor3.layer2.bias [1]
698
+ error_detection.paritychecker16bit.stage2.xor3.layer2.weight [2]
699
+ error_detection.paritychecker16bit.stage3.xor0.layer1.nand.bias [1]
700
+ error_detection.paritychecker16bit.stage3.xor0.layer1.nand.weight [2]
701
+ error_detection.paritychecker16bit.stage3.xor0.layer1.or.bias [1]
702
+ error_detection.paritychecker16bit.stage3.xor0.layer1.or.weight [2]
703
+ error_detection.paritychecker16bit.stage3.xor0.layer2.bias [1]
704
+ error_detection.paritychecker16bit.stage3.xor0.layer2.weight [2]
705
+ error_detection.paritychecker16bit.stage3.xor1.layer1.nand.bias [1]
706
+ error_detection.paritychecker16bit.stage3.xor1.layer1.nand.weight [2]
707
+ error_detection.paritychecker16bit.stage3.xor1.layer1.or.bias [1]
708
+ error_detection.paritychecker16bit.stage3.xor1.layer1.or.weight [2]
709
+ error_detection.paritychecker16bit.stage3.xor1.layer2.bias [1]
710
+ error_detection.paritychecker16bit.stage3.xor1.layer2.weight [2]
711
+ error_detection.paritychecker16bit.stage4.xor0.layer1.nand.bias [1]
712
+ error_detection.paritychecker16bit.stage4.xor0.layer1.nand.weight [2]
713
+ error_detection.paritychecker16bit.stage4.xor0.layer1.or.bias [1]
714
+ error_detection.paritychecker16bit.stage4.xor0.layer1.or.weight [2]
715
+ error_detection.paritychecker16bit.stage4.xor0.layer2.bias [1]
716
+ error_detection.paritychecker16bit.stage4.xor0.layer2.weight [2]
717
+
718
+ Identical structure for paritygenerator16bit.
719
+
720
+ CRC-16
721
+ error_detection.crc16.divisor [17]
722
+
723
+ Hamming (15,11) - 11 data bits, 4 parity bits
724
+ error_detection.hammingencode11bit.p0.weight [11]
725
+ error_detection.hammingencode11bit.p1.bias [1]
726
+ error_detection.hammingencode11bit.p1.weight [11]
727
+ error_detection.hammingencode11bit.p2.bias [1]
728
+ error_detection.hammingencode11bit.p2.weight [11]
729
+ error_detection.hammingencode11bit.p3.bias [1]
730
+ error_detection.hammingencode11bit.p3.weight [11]
731
+ error_detection.hammingencode11bit.p4.bias [1]
732
+ error_detection.hammingencode11bit.p4.weight [11]
733
+ error_detection.hammingdecode15bit.s1.bias [1]
734
+ error_detection.hammingdecode15bit.s1.weight [8]
735
+ error_detection.hammingdecode15bit.s2.bias [1]
736
+ error_detection.hammingdecode15bit.s2.weight [8]
737
+ error_detection.hammingdecode15bit.s3.bias [1]
738
+ error_detection.hammingdecode15bit.s3.weight [8]
739
+ error_detection.hammingdecode15bit.s4.bias [1]
740
+ error_detection.hammingdecode15bit.s4.weight [8]
741
+ error_detection.hammingsyndrome15bit.s1.weight [8]
742
+ error_detection.hammingsyndrome15bit.s2.weight [8]
743
+ error_detection.hammingsyndrome15bit.s3.weight [8]
744
+ error_detection.hammingsyndrome15bit.s4.weight [8]
745
+
746
+ Longitudinal parity 16-bit
747
+ error_detection.longitudinalparity16bit.col_parity [16]
748
+ error_detection.longitudinalparity16bit.row_parity [16]
749
+
750
+ ---
751
+ MODULAR
752
+
753
+ For 16-bit inputs, modular arithmetic requires detecting which of ceil(65536/N) ranges the input falls into. Structure per modulus:
754
+
755
+ mod2 (simple - just check LSB)
756
+ modular.mod2_16bit.bias [1]
757
+ modular.mod2_16bit.weight [16]
758
+
759
+ mod4 (check 2 LSBs)
760
+ modular.mod4_16bit.bias [1]
761
+ modular.mod4_16bit.weight [16]
762
+
763
+ mod8 (check 3 LSBs)
764
+ modular.mod8_16bit.bias [1]
765
+ modular.mod8_16bit.weight [16]
766
+
767
+ mod16 (check 4 LSBs)
768
+ modular.mod16_16bit.bias [1]
769
+ modular.mod16_16bit.weight [16]
770
+
771
+ For non-power-of-2 moduli (3, 5, 6, 7, 9, 10, 11, 12), use iterative subtraction circuit referencing the 16-bit subtractor and comparator, or expand the range-check approach:
772
+
773
+ mod3, mod5, mod6, mod7, mod9, mod10, mod11, mod12 (range-check approach, pattern):
774
+ modular.mod{N}_16bit.layer1.geq{K}.bias [1]
775
+ modular.mod{N}_16bit.layer1.geq{K}.weight [16]
776
+ modular.mod{N}_16bit.layer1.leq{K}.bias [1]
777
+ modular.mod{N}_16bit.layer1.leq{K}.weight [16]
778
+ modular.mod{N}_16bit.layer2.eq{K}.bias [1]
779
+ modular.mod{N}_16bit.layer2.eq{K}.weight [2]
780
+ modular.mod{N}_16bit.layer3.or.bias [1]
781
+ modular.mod{N}_16bit.layer3.or.weight [R]
782
+
783
+ Where R = number of ranges = ceil(65536/N).
784
+
785
+ ---
786
+ PATTERN RECOGNITION
787
+
788
+ pattern_recognition.popcount16bit.bias [1]
789
+ pattern_recognition.popcount16bit.weight [16]
790
+ pattern_recognition.allones16bit.bias [1]
791
+ pattern_recognition.allones16bit.weight [16]
792
+ pattern_recognition.allzeros16bit.bias [1]
793
+ pattern_recognition.allzeros16bit.weight [16]
794
+ pattern_recognition.alternating16bit.pattern1.weight [16]
795
+ pattern_recognition.alternating16bit.pattern2.weight [16]
796
+ pattern_recognition.hammingdistance16bit.popcount.weight [16]
797
+ pattern_recognition.hammingdistance16bit.xor.weight [32]
798
+ pattern_recognition.leadingones16bit.weight [16]
799
+ pattern_recognition.trailingones16bit.weight [16]
800
+ pattern_recognition.runlength16bit.weight [16]
801
+ pattern_recognition.onehotdetector16bit.and.bias [1]
802
+ pattern_recognition.onehotdetector16bit.and.weight [2]
803
+ pattern_recognition.onehotdetector16bit.atleast1.bias [1]
804
+ pattern_recognition.onehotdetector16bit.atleast1.weight [16]
805
+ pattern_recognition.onehotdetector16bit.atmost1.bias [1]
806
+ pattern_recognition.onehotdetector16bit.atmost1.weight [16]
807
+ pattern_recognition.symmetry16bit.and.bias [1]
808
+ pattern_recognition.symmetry16bit.and.weight [8]
809
+ pattern_recognition.symmetry16bit.xnor0.weight [2]
810
+ pattern_recognition.symmetry16bit.xnor1.weight [2]
811
+ pattern_recognition.symmetry16bit.xnor2.weight [2]
812
+ pattern_recognition.symmetry16bit.xnor3.weight [2]
813
+ pattern_recognition.symmetry16bit.xnor4.weight [2]
814
+ pattern_recognition.symmetry16bit.xnor5.weight [2]
815
+ pattern_recognition.symmetry16bit.xnor6.weight [2]
816
+ pattern_recognition.symmetry16bit.xnor7.weight [2]
817
+
818
+ ---
819
+ THRESHOLD
820
+
821
+ threshold.alloutof16.bias [1]
822
+ threshold.alloutof16.weight [16]
823
+ threshold.oneoutof16.bias [1]
824
+ threshold.oneoutof16.weight [16]
825
+ threshold.twooutof16.bias [1]
826
+ threshold.twooutof16.weight [16]
827
+ threshold.threeoutof16.bias [1]
828
+ threshold.threeoutof16.weight [16]
829
+ threshold.fouroutof16.bias [1]
830
+ threshold.fouroutof16.weight [16]
831
+ threshold.fiveoutof16.bias [1]
832
+ threshold.fiveoutof16.weight [16]
833
+ threshold.sixoutof16.bias [1]
834
+ threshold.sixoutof16.weight [16]
835
+ threshold.sevenoutof16.bias [1]
836
+ threshold.sevenoutof16.weight [16]
837
+ threshold.eightoutof16.bias [1]
838
+ threshold.eightoutof16.weight [16]
839
+ threshold.nineoutof16.bias [1]
840
+ threshold.nineoutof16.weight [16]
841
+ threshold.tenoutof16.bias [1]
842
+ threshold.tenoutof16.weight [16]
843
+ threshold.elevenoutof16.bias [1]
844
+ threshold.elevenoutof16.weight [16]
845
+ threshold.twelveoutof16.bias [1]
846
+ threshold.twelveoutof16.weight [16]
847
+ threshold.thirteenoutof16.bias [1]
848
+ threshold.thirteenoutof16.weight [16]
849
+ threshold.fourteenoutof16.bias [1]
850
+ threshold.fourteenoutof16.weight [16]
851
+ threshold.fifteenoutof16.bias [1]
852
+ threshold.fifteenoutof16.weight [16]
853
+ threshold.sixteenoutof16.bias [1]
854
+ threshold.sixteenoutof16.weight [16]
855
+ threshold.majority16.bias [1]
856
+ threshold.majority16.weight [16]
857
+ threshold.minority16.bias [1]
858
+ threshold.minority16.weight [16]
859
+ threshold.atleastk_8_16bit.bias [1]
860
+ threshold.atleastk_8_16bit.weight [16]
861
+ threshold.atmostk_8_16bit.bias [1]
862
+ threshold.atmostk_8_16bit.weight [16]
863
+ threshold.exactlyk_8_16bit.and.bias [1]
864
+ threshold.exactlyk_8_16bit.and.weight [2]
865
+ threshold.exactlyk_8_16bit.atleast.bias [1]
866
+ threshold.exactlyk_8_16bit.atleast.weight [16]
867
+ threshold.exactlyk_8_16bit.atmost.bias [1]
868
+ threshold.exactlyk_8_16bit.atmost.weight [16]
869
+
870
+ ---
871
+ MANIFEST
872
+
873
+ manifest.alu_operations [1]
874
+ manifest.flags [1]
875
+ manifest.instruction_width [1]
876
+ manifest.memory_bytes [1]
877
+ manifest.pc_width [1]
878
+ manifest.register_width [1]
879
+ manifest.registers [1]
880
+ manifest.turing_complete [1]
881
+ manifest.version [1]
882
+
883
+ Values change:
884
+ - register_width: 8 → 16
885
+ - pc_width: 8 → 16
886
+ - memory_bytes: 256 → 65536
887
+
888
+ ---
889
+ TOTAL NEW TENSOR COUNT
890
+
891
+ | Category | Count |
892
+ |-------------------------------|-------------------|
893
+ | ripplecarry16bit | 288 |
894
+ | 16-bit comparators | 2 |
895
+ | multiplier16x16 | ~6500 |
896
+ | combinational | 45 |
897
+ | control (jump + conditionals) | 1184 |
898
+ | error_detection | ~200 |
899
+ | modular | ~600 |
900
+ | pattern_recognition | 45 |
901
+ | threshold | 60 |
902
+ | manifest | 9 |
903
+ | TOTAL | ~8900 new tensors |
904
+
905
  Combined with existing 8-bit tensors retained for backwards compatibility or removed: final 16-bit model ~9000-17000 tensors depending on whether 8-bit components are kept.
llm/circuit_llm.py CHANGED
@@ -1,606 +1,606 @@
1
- """
2
- Circuit-Augmented LLM: Embedding threshold logic circuits into SmolLM2
3
- ======================================================================
4
-
5
- Replaces/augments MLP layers with frozen threshold circuits for exact arithmetic.
6
- """
7
-
8
- import torch
9
- import torch.nn as nn
10
- import torch.nn.functional as F
11
- from typing import Dict, Optional, Tuple
12
- from safetensors.torch import load_file
13
- from transformers import AutoModelForCausalLM, AutoTokenizer
14
- import warnings
15
- warnings.filterwarnings('ignore')
16
-
17
-
18
- # =============================================================================
19
- # HEAVISIDE WITH STRAIGHT-THROUGH ESTIMATOR
20
- # =============================================================================
21
-
22
- class HeavisideSTE(torch.autograd.Function):
23
- """Heaviside step function with straight-through estimator for backprop."""
24
-
25
- @staticmethod
26
- def forward(ctx, x):
27
- return (x >= 0).float()
28
-
29
- @staticmethod
30
- def backward(ctx, grad_output):
31
- # STE: pass gradient through unchanged
32
- return grad_output
33
-
34
-
35
- def heaviside(x: torch.Tensor) -> torch.Tensor:
36
- """Heaviside step: 1 if x >= 0, else 0. Uses STE for training."""
37
- return HeavisideSTE.apply(x)
38
-
39
-
40
- # =============================================================================
41
- # CIRCUIT EXECUTOR - Runs the frozen threshold circuits
42
- # =============================================================================
43
-
44
- class CircuitExecutor(nn.Module):
45
- """
46
- Executes threshold logic circuits from the safetensors file.
47
- All circuit weights are frozen - only interface layers train.
48
- """
49
-
50
- def __init__(self, circuit_path: str, device: str = 'cpu'):
51
- super().__init__()
52
- self.device = device
53
-
54
- # Load all circuit tensors
55
- raw_circuits = load_file(circuit_path)
56
-
57
- # Store as frozen parameters (use underscores for valid param names)
58
- self.circuits = {}
59
- for k, v in raw_circuits.items():
60
- safe_name = k.replace('.', '__')
61
- self.register_buffer(safe_name, v.float().to(device))
62
- self.circuits[k] = safe_name
63
-
64
- def _get(self, name: str) -> torch.Tensor:
65
- """Get circuit tensor by original dotted name."""
66
- return getattr(self, self.circuits[name])
67
-
68
- # -------------------------------------------------------------------------
69
- # Boolean Gates
70
- # -------------------------------------------------------------------------
71
-
72
- def eval_and(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
73
- """AND gate: output 1 iff both inputs are 1."""
74
- inp = torch.stack([a, b], dim=-1)
75
- w = self._get('boolean.and.weight')
76
- bias = self._get('boolean.and.bias')
77
- return heaviside(inp @ w + bias)
78
-
79
- def eval_or(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
80
- """OR gate: output 1 if either input is 1."""
81
- inp = torch.stack([a, b], dim=-1)
82
- w = self._get('boolean.or.weight')
83
- bias = self._get('boolean.or.bias')
84
- return heaviside(inp @ w + bias)
85
-
86
- def eval_xor(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
87
- """XOR gate: two-layer network (not linearly separable)."""
88
- inp = torch.stack([a, b], dim=-1)
89
-
90
- # Layer 1: OR and NAND neurons
91
- w1_n1 = self._get('boolean.xor.layer1.neuron1.weight')
92
- b1_n1 = self._get('boolean.xor.layer1.neuron1.bias')
93
- w1_n2 = self._get('boolean.xor.layer1.neuron2.weight')
94
- b1_n2 = self._get('boolean.xor.layer1.neuron2.bias')
95
-
96
- h1 = heaviside(inp @ w1_n1 + b1_n1)
97
- h2 = heaviside(inp @ w1_n2 + b1_n2)
98
- hidden = torch.stack([h1, h2], dim=-1)
99
-
100
- # Layer 2: AND of hidden
101
- w2 = self._get('boolean.xor.layer2.weight')
102
- b2 = self._get('boolean.xor.layer2.bias')
103
-
104
- return heaviside(hidden @ w2 + b2)
105
-
106
- # -------------------------------------------------------------------------
107
- # Arithmetic: Full Adder
108
- # -------------------------------------------------------------------------
109
-
110
- def eval_full_adder(self, a: torch.Tensor, b: torch.Tensor,
111
- cin: torch.Tensor, prefix: str) -> Tuple[torch.Tensor, torch.Tensor]:
112
- """
113
- Full adder: sum = a XOR b XOR cin, cout = (a AND b) OR (cin AND (a XOR b))
114
- Returns (sum_bit, carry_out)
115
- """
116
- inp_ab = torch.stack([a, b], dim=-1)
117
-
118
- # HA1: a XOR b
119
- w1_or = self._get(f'{prefix}.ha1.sum.layer1.or.weight')
120
- b1_or = self._get(f'{prefix}.ha1.sum.layer1.or.bias')
121
- w1_nand = self._get(f'{prefix}.ha1.sum.layer1.nand.weight')
122
- b1_nand = self._get(f'{prefix}.ha1.sum.layer1.nand.bias')
123
- w2 = self._get(f'{prefix}.ha1.sum.layer2.weight')
124
- b2 = self._get(f'{prefix}.ha1.sum.layer2.bias')
125
-
126
- h_or = heaviside(inp_ab @ w1_or + b1_or)
127
- h_nand = heaviside(inp_ab @ w1_nand + b1_nand)
128
- hidden = torch.stack([h_or, h_nand], dim=-1)
129
- ha1_sum = heaviside(hidden @ w2 + b2)
130
-
131
- # HA1 carry
132
- w_c1 = self._get(f'{prefix}.ha1.carry.weight')
133
- b_c1 = self._get(f'{prefix}.ha1.carry.bias')
134
- ha1_carry = heaviside(inp_ab @ w_c1 + b_c1)
135
-
136
- # HA2: ha1_sum XOR cin
137
- inp_ha2 = torch.stack([ha1_sum, cin], dim=-1)
138
- w1_or = self._get(f'{prefix}.ha2.sum.layer1.or.weight')
139
- b1_or = self._get(f'{prefix}.ha2.sum.layer1.or.bias')
140
- w1_nand = self._get(f'{prefix}.ha2.sum.layer1.nand.weight')
141
- b1_nand = self._get(f'{prefix}.ha2.sum.layer1.nand.bias')
142
- w2 = self._get(f'{prefix}.ha2.sum.layer2.weight')
143
- b2 = self._get(f'{prefix}.ha2.sum.layer2.bias')
144
-
145
- h_or = heaviside(inp_ha2 @ w1_or + b1_or)
146
- h_nand = heaviside(inp_ha2 @ w1_nand + b1_nand)
147
- hidden = torch.stack([h_or, h_nand], dim=-1)
148
- ha2_sum = heaviside(hidden @ w2 + b2)
149
-
150
- # HA2 carry
151
- w_c2 = self._get(f'{prefix}.ha2.carry.weight')
152
- b_c2 = self._get(f'{prefix}.ha2.carry.bias')
153
- ha2_carry = heaviside(inp_ha2 @ w_c2 + b_c2)
154
-
155
- # Carry out = ha1_carry OR ha2_carry
156
- inp_cout = torch.stack([ha1_carry, ha2_carry], dim=-1)
157
- w_or = self._get(f'{prefix}.carry_or.weight')
158
- b_or = self._get(f'{prefix}.carry_or.bias')
159
- cout = heaviside(inp_cout @ w_or + b_or)
160
-
161
- return ha2_sum, cout
162
-
163
- # -------------------------------------------------------------------------
164
- # Arithmetic: 8-bit Ripple Carry Adder
165
- # -------------------------------------------------------------------------
166
-
167
- def add_8bit(self, a_bits: torch.Tensor, b_bits: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
168
- """
169
- 8-bit ripple carry addition.
170
- a_bits, b_bits: [..., 8] tensors of bits (LSB first)
171
- Returns: (result_bits [..., 8], carry_out [...])
172
- """
173
- batch_shape = a_bits.shape[:-1]
174
- carry = torch.zeros(batch_shape, device=a_bits.device)
175
- result_bits = []
176
-
177
- for i in range(8):
178
- a_i = a_bits[..., i]
179
- b_i = b_bits[..., i]
180
- sum_bit, carry = self.eval_full_adder(
181
- a_i, b_i, carry,
182
- f'arithmetic.ripplecarry8bit.fa{i}'
183
- )
184
- result_bits.append(sum_bit)
185
-
186
- return torch.stack(result_bits, dim=-1), carry
187
-
188
- # -------------------------------------------------------------------------
189
- # Arithmetic: 8-bit Comparators
190
- # -------------------------------------------------------------------------
191
-
192
- def greater_than_8bit(self, a_bits: torch.Tensor, b_bits: torch.Tensor) -> torch.Tensor:
193
- """Returns 1 if a > b, else 0. Bits are MSB first."""
194
- diff = a_bits - b_bits # [..., 8]
195
- w = self._get('arithmetic.greaterthan8bit.comparator')
196
- score = (diff * w).sum(dim=-1)
197
- return (score > 0).float()
198
-
199
- def less_than_8bit(self, a_bits: torch.Tensor, b_bits: torch.Tensor) -> torch.Tensor:
200
- """Returns 1 if a < b, else 0. Bits are MSB first."""
201
- diff = b_bits - a_bits # [..., 8]
202
- w = self._get('arithmetic.lessthan8bit.comparator')
203
- score = (diff * w).sum(dim=-1)
204
- return (score > 0).float()
205
-
206
- def equal_8bit(self, a_bits: torch.Tensor, b_bits: torch.Tensor) -> torch.Tensor:
207
- """Returns 1 if a == b, else 0."""
208
- gt = self.greater_than_8bit(a_bits, b_bits)
209
- lt = self.less_than_8bit(a_bits, b_bits)
210
- return (1 - gt) * (1 - lt)
211
-
212
-
213
- # =============================================================================
214
- # BIT EXTRACTION / INJECTION INTERFACES
215
- # =============================================================================
216
-
217
- class BitExtractor(nn.Module):
218
- """
219
- Learns to extract 8-bit operands from token embeddings.
220
- Maps embedding -> 16 bits (two 8-bit operands).
221
- """
222
-
223
- def __init__(self, d_model: int):
224
- super().__init__()
225
- self.d_model = d_model
226
-
227
- # Project to logits, then binarize
228
- self.proj = nn.Linear(d_model, 16)
229
-
230
- # Learnable temperature for sigmoid approximation during training
231
- self.temperature = nn.Parameter(torch.tensor(1.0))
232
-
233
- def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
234
- """
235
- x: [..., d_model]
236
- Returns: a_bits [..., 8], b_bits [..., 8] (LSB first for arithmetic)
237
- """
238
- logits = self.proj(x) # [..., 16]
239
-
240
- # Binarize with STE
241
- bits = heaviside(logits)
242
-
243
- # Split into two operands
244
- a_bits = bits[..., :8]
245
- b_bits = bits[..., 8:]
246
-
247
- return a_bits, b_bits
248
-
249
-
250
- class BitInjector(nn.Module):
251
- """
252
- Learns to inject circuit results back into embedding space.
253
- Maps 16 bits (result + flags) -> embedding delta.
254
- """
255
-
256
- def __init__(self, d_model: int):
257
- super().__init__()
258
- self.d_model = d_model
259
-
260
- # Project bits to embedding
261
- self.proj = nn.Linear(16, d_model)
262
-
263
- # Learnable scale
264
- self.scale = nn.Parameter(torch.tensor(0.1))
265
-
266
- def forward(self, result_bits: torch.Tensor, flags: torch.Tensor) -> torch.Tensor:
267
- """
268
- result_bits: [..., 8]
269
- flags: [..., 8] (carry, overflow, zero, negative, etc.)
270
- Returns: [..., d_model]
271
- """
272
- combined = torch.cat([result_bits, flags], dim=-1) # [..., 16]
273
- return self.proj(combined) * self.scale
274
-
275
-
276
- # =============================================================================
277
- # CIRCUIT-AUGMENTED MLP BLOCK
278
- # =============================================================================
279
-
280
- class CircuitAugmentedMLP(nn.Module):
281
- """
282
- MLP block augmented with frozen threshold circuits.
283
-
284
- The original MLP path runs in parallel with the circuit path.
285
- A learned router decides how much to use each.
286
- """
287
-
288
- def __init__(
289
- self,
290
- d_model: int,
291
- intermediate_size: int,
292
- circuit_path: str,
293
- device: str = 'cpu'
294
- ):
295
- super().__init__()
296
- self.d_model = d_model
297
-
298
- # Original MLP components (will be loaded from pretrained)
299
- self.gate_proj = nn.Linear(d_model, intermediate_size, bias=False)
300
- self.up_proj = nn.Linear(d_model, intermediate_size, bias=False)
301
- self.down_proj = nn.Linear(intermediate_size, d_model, bias=False)
302
- self.act_fn = nn.SiLU()
303
-
304
- # Circuit components
305
- self.circuits = CircuitExecutor(circuit_path, device)
306
- self.bit_extractor = BitExtractor(d_model)
307
- self.bit_injector = BitInjector(d_model)
308
-
309
- # Router: decides circuit vs MLP contribution
310
- self.router = nn.Sequential(
311
- nn.Linear(d_model, 64),
312
- nn.ReLU(),
313
- nn.Linear(64, 2),
314
- nn.Softmax(dim=-1)
315
- )
316
-
317
- # Operation selector (which arithmetic op to perform)
318
- self.op_selector = nn.Sequential(
319
- nn.Linear(d_model, 32),
320
- nn.ReLU(),
321
- nn.Linear(32, 4), # add, sub, compare, passthrough
322
- nn.Softmax(dim=-1)
323
- )
324
-
325
- def _compute_flags(self, result_bits: torch.Tensor, carry: torch.Tensor) -> torch.Tensor:
326
- """Compute status flags from result."""
327
- batch_shape = result_bits.shape[:-1]
328
-
329
- # Zero flag: all bits are 0
330
- zero = (result_bits.sum(dim=-1) == 0).float()
331
-
332
- # Negative flag: MSB is 1 (two's complement)
333
- negative = result_bits[..., 7]
334
-
335
- # Carry flag
336
- carry_flag = carry
337
-
338
- # Pad to 8 flags
339
- flags = torch.zeros(*batch_shape, 8, device=result_bits.device)
340
- flags[..., 0] = zero
341
- flags[..., 1] = negative
342
- flags[..., 2] = carry_flag
343
-
344
- return flags
345
-
346
- def _circuit_forward(self, x: torch.Tensor) -> torch.Tensor:
347
- """Run input through threshold circuits."""
348
- # Extract operands
349
- a_bits, b_bits = self.bit_extractor(x)
350
-
351
- # Get operation weights
352
- op_weights = self.op_selector(x) # [..., 4]
353
-
354
- # Compute addition
355
- add_result, add_carry = self.circuits.add_8bit(a_bits, b_bits)
356
- add_flags = self._compute_flags(add_result, add_carry)
357
-
358
- # Compute subtraction (a + (~b) + 1, simplified: just use add for now)
359
- # For MVP, we'll focus on addition
360
-
361
- # Inject result back
362
- circuit_delta = self.bit_injector(add_result, add_flags)
363
-
364
- return circuit_delta
365
-
366
- def forward(self, x: torch.Tensor) -> torch.Tensor:
367
- """
368
- x: [batch, seq_len, d_model]
369
- Returns: [batch, seq_len, d_model]
370
- """
371
- # Original MLP path
372
- mlp_out = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
373
-
374
- # Circuit path
375
- circuit_out = self._circuit_forward(x)
376
-
377
- # Route between paths
378
- route_weights = self.router(x) # [..., 2]
379
- mlp_weight = route_weights[..., 0:1]
380
- circuit_weight = route_weights[..., 1:2]
381
-
382
- # Combine: MLP output + weighted circuit contribution
383
- output = mlp_out + circuit_weight * circuit_out
384
-
385
- return output
386
-
387
-
388
- # =============================================================================
389
- # MODEL SURGERY: Insert circuits into SmolLM2
390
- # =============================================================================
391
-
392
- def augment_smollm2_with_circuits(
393
- model: AutoModelForCausalLM,
394
- circuit_path: str,
395
- layer_indices: list = None,
396
- device: str = 'cpu'
397
- ) -> AutoModelForCausalLM:
398
- """
399
- Surgically insert circuit blocks into SmolLM2's MLP layers.
400
-
401
- Args:
402
- model: Pretrained SmolLM2 model
403
- circuit_path: Path to neural_computer.safetensors
404
- layer_indices: Which layers to augment (default: middle layers)
405
- device: Device for circuit tensors
406
-
407
- Returns:
408
- Modified model with circuit-augmented MLPs
409
- """
410
- config = model.config
411
- num_layers = config.num_hidden_layers
412
-
413
- # Default: augment middle third of layers
414
- if layer_indices is None:
415
- start = num_layers // 3
416
- end = 2 * num_layers // 3
417
- layer_indices = list(range(start, end))
418
-
419
- print(f"Augmenting layers {layer_indices} with threshold circuits...")
420
-
421
- for idx in layer_indices:
422
- layer = model.model.layers[idx]
423
- old_mlp = layer.mlp
424
-
425
- # Create augmented MLP
426
- new_mlp = CircuitAugmentedMLP(
427
- d_model=config.hidden_size,
428
- intermediate_size=config.intermediate_size,
429
- circuit_path=circuit_path,
430
- device=device
431
- )
432
-
433
- # Copy pretrained weights
434
- new_mlp.gate_proj.weight.data = old_mlp.gate_proj.weight.data.clone()
435
- new_mlp.up_proj.weight.data = old_mlp.up_proj.weight.data.clone()
436
- new_mlp.down_proj.weight.data = old_mlp.down_proj.weight.data.clone()
437
-
438
- # Replace
439
- layer.mlp = new_mlp
440
-
441
- # Freeze circuit weights, keep interfaces trainable
442
- for name, param in model.named_parameters():
443
- if 'circuits' in name:
444
- param.requires_grad = False
445
-
446
- print(f"Done. Circuit weights frozen, interfaces trainable.")
447
-
448
- return model
449
-
450
-
451
- # =============================================================================
452
- # TRAINING UTILITIES
453
- # =============================================================================
454
-
455
- def generate_arithmetic_batch(batch_size: int, max_val: int = 255) -> Tuple[list, list]:
456
- """Generate batch of arithmetic problems and solutions."""
457
- prompts = []
458
- targets = []
459
-
460
- for _ in range(batch_size):
461
- a = torch.randint(0, max_val + 1, (1,)).item()
462
- b = torch.randint(0, max_val + 1, (1,)).item()
463
- result = (a + b) % 256
464
-
465
- prompts.append(f"{a} + {b} =")
466
- targets.append(f" {result}")
467
-
468
- return prompts, targets
469
-
470
-
471
- def evaluate_arithmetic(
472
- model: AutoModelForCausalLM,
473
- tokenizer: AutoTokenizer,
474
- n_problems: int = 100,
475
- device: str = 'cpu'
476
- ) -> dict:
477
- """Evaluate model on random arithmetic problems."""
478
- correct = 0
479
- total = 0
480
- errors = []
481
-
482
- model.eval()
483
-
484
- for _ in range(n_problems):
485
- a = torch.randint(0, 256, (1,)).item()
486
- b = torch.randint(0, 256, (1,)).item()
487
- expected = (a + b) % 256
488
-
489
- prompt = f"{a} + {b} ="
490
- inputs = tokenizer(prompt, return_tensors='pt').to(device)
491
-
492
- with torch.no_grad():
493
- outputs = model.generate(
494
- **inputs,
495
- max_new_tokens=10,
496
- do_sample=False,
497
- pad_token_id=tokenizer.eos_token_id
498
- )
499
-
500
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
501
-
502
- # Extract number from response
503
- try:
504
- # Find the part after "="
505
- answer_part = response.split('=')[-1].strip()
506
- # Extract first number
507
- predicted = int(''.join(c for c in answer_part.split()[0] if c.isdigit()))
508
-
509
- if predicted == expected:
510
- correct += 1
511
- else:
512
- errors.append((a, b, expected, predicted))
513
- except:
514
- errors.append((a, b, expected, "parse_error"))
515
-
516
- total += 1
517
-
518
- return {
519
- 'accuracy': correct / total,
520
- 'correct': correct,
521
- 'total': total,
522
- 'errors': errors[:10] # First 10 errors
523
- }
524
-
525
-
526
- # =============================================================================
527
- # MAIN: Demo
528
- # =============================================================================
529
-
530
- if __name__ == "__main__":
531
- import argparse
532
-
533
- parser = argparse.ArgumentParser(description='Circuit-Augmented LLM Demo')
534
- parser.add_argument('--circuit-path', type=str,
535
- default='./neural_computer.safetensors',
536
- help='Path to circuit weights')
537
- parser.add_argument('--device', type=str, default='cpu',
538
- help='Device (cpu or cuda)')
539
- parser.add_argument('--eval-only', action='store_true',
540
- help='Only evaluate, do not augment')
541
- args = parser.parse_args()
542
-
543
- print("=" * 70)
544
- print(" CIRCUIT-AUGMENTED LLM")
545
- print("=" * 70)
546
-
547
- # Load tokenizer and model
548
- print("\n[1] Loading SmolLM2-360M...")
549
- model_id = "HuggingFaceTB/SmolLM2-360M"
550
- tokenizer = AutoTokenizer.from_pretrained(model_id)
551
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
552
-
553
- print(f" Parameters: {sum(p.numel() for p in model.parameters()):,}")
554
-
555
- # Baseline evaluation
556
- print("\n[2] Baseline arithmetic evaluation...")
557
- baseline = evaluate_arithmetic(model, tokenizer, n_problems=50, device=args.device)
558
- print(f" Accuracy: {baseline['accuracy']*100:.1f}% ({baseline['correct']}/{baseline['total']})")
559
- if baseline['errors']:
560
- print(f" Sample errors:")
561
- for a, b, exp, got in baseline['errors'][:5]:
562
- print(f" {a} + {b} = {exp}, model said {got}")
563
-
564
- if args.eval_only:
565
- print("\nDone (eval only mode).")
566
- exit(0)
567
-
568
- # Augment with circuits
569
- print(f"\n[3] Augmenting with threshold circuits...")
570
- print(f" Circuit path: {args.circuit_path}")
571
- model = augment_smollm2_with_circuits(
572
- model,
573
- args.circuit_path,
574
- device=args.device
575
- )
576
-
577
- new_params = sum(p.numel() for p in model.parameters())
578
- trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
579
- print(f" Total parameters: {new_params:,}")
580
- print(f" Trainable parameters: {trainable:,}")
581
-
582
- # Test circuit execution directly
583
- print("\n[4] Testing circuit execution...")
584
- circuit_exec = CircuitExecutor(args.circuit_path, args.device)
585
-
586
- test_cases = [(127, 128), (255, 1), (0, 0), (100, 55)]
587
- for a, b in test_cases:
588
- # Convert to bits (LSB first)
589
- a_bits = torch.tensor([(a >> i) & 1 for i in range(8)], dtype=torch.float32)
590
- b_bits = torch.tensor([(b >> i) & 1 for i in range(8)], dtype=torch.float32)
591
-
592
- result_bits, carry = circuit_exec.add_8bit(
593
- a_bits.unsqueeze(0),
594
- b_bits.unsqueeze(0)
595
- )
596
-
597
- # Convert result bits back to int
598
- result = sum(int(result_bits[0, i].item()) * (2**i) for i in range(8))
599
- expected = (a + b) % 256
600
-
601
- status = "OK" if result == expected else "FAIL"
602
- print(f" {a} + {b} = {result} (expected {expected}) [{status}]")
603
-
604
- print("\n[5] Model ready for fine-tuning.")
605
- print(" Next: Train interface layers on arithmetic examples.")
606
- print("=" * 70)
 
1
+ """
2
+ Circuit-Augmented LLM: Embedding threshold logic circuits into SmolLM2
3
+ ======================================================================
4
+
5
+ Replaces/augments MLP layers with frozen threshold circuits for exact arithmetic.
6
+ """
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.nn.functional as F
11
+ from typing import Dict, Optional, Tuple
12
+ from safetensors.torch import load_file
13
+ from transformers import AutoModelForCausalLM, AutoTokenizer
14
+ import warnings
15
+ warnings.filterwarnings('ignore')
16
+
17
+
18
+ # =============================================================================
19
+ # HEAVISIDE WITH STRAIGHT-THROUGH ESTIMATOR
20
+ # =============================================================================
21
+
22
+ class HeavisideSTE(torch.autograd.Function):
23
+ """Heaviside step function with straight-through estimator for backprop."""
24
+
25
+ @staticmethod
26
+ def forward(ctx, x):
27
+ return (x >= 0).float()
28
+
29
+ @staticmethod
30
+ def backward(ctx, grad_output):
31
+ # STE: pass gradient through unchanged
32
+ return grad_output
33
+
34
+
35
+ def heaviside(x: torch.Tensor) -> torch.Tensor:
36
+ """Heaviside step: 1 if x >= 0, else 0. Uses STE for training."""
37
+ return HeavisideSTE.apply(x)
38
+
39
+
40
+ # =============================================================================
41
+ # CIRCUIT EXECUTOR - Runs the frozen threshold circuits
42
+ # =============================================================================
43
+
44
+ class CircuitExecutor(nn.Module):
45
+ """
46
+ Executes threshold logic circuits from the safetensors file.
47
+ All circuit weights are frozen - only interface layers train.
48
+ """
49
+
50
+ def __init__(self, circuit_path: str, device: str = 'cpu'):
51
+ super().__init__()
52
+ self.device = device
53
+
54
+ # Load all circuit tensors
55
+ raw_circuits = load_file(circuit_path)
56
+
57
+ # Store as frozen parameters (use underscores for valid param names)
58
+ self.circuits = {}
59
+ for k, v in raw_circuits.items():
60
+ safe_name = k.replace('.', '__')
61
+ self.register_buffer(safe_name, v.float().to(device))
62
+ self.circuits[k] = safe_name
63
+
64
+ def _get(self, name: str) -> torch.Tensor:
65
+ """Get circuit tensor by original dotted name."""
66
+ return getattr(self, self.circuits[name])
67
+
68
+ # -------------------------------------------------------------------------
69
+ # Boolean Gates
70
+ # -------------------------------------------------------------------------
71
+
72
+ def eval_and(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
73
+ """AND gate: output 1 iff both inputs are 1."""
74
+ inp = torch.stack([a, b], dim=-1)
75
+ w = self._get('boolean.and.weight')
76
+ bias = self._get('boolean.and.bias')
77
+ return heaviside(inp @ w + bias)
78
+
79
+ def eval_or(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
80
+ """OR gate: output 1 if either input is 1."""
81
+ inp = torch.stack([a, b], dim=-1)
82
+ w = self._get('boolean.or.weight')
83
+ bias = self._get('boolean.or.bias')
84
+ return heaviside(inp @ w + bias)
85
+
86
+ def eval_xor(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
87
+ """XOR gate: two-layer network (not linearly separable)."""
88
+ inp = torch.stack([a, b], dim=-1)
89
+
90
+ # Layer 1: OR and NAND neurons
91
+ w1_n1 = self._get('boolean.xor.layer1.neuron1.weight')
92
+ b1_n1 = self._get('boolean.xor.layer1.neuron1.bias')
93
+ w1_n2 = self._get('boolean.xor.layer1.neuron2.weight')
94
+ b1_n2 = self._get('boolean.xor.layer1.neuron2.bias')
95
+
96
+ h1 = heaviside(inp @ w1_n1 + b1_n1)
97
+ h2 = heaviside(inp @ w1_n2 + b1_n2)
98
+ hidden = torch.stack([h1, h2], dim=-1)
99
+
100
+ # Layer 2: AND of hidden
101
+ w2 = self._get('boolean.xor.layer2.weight')
102
+ b2 = self._get('boolean.xor.layer2.bias')
103
+
104
+ return heaviside(hidden @ w2 + b2)
105
+
106
+ # -------------------------------------------------------------------------
107
+ # Arithmetic: Full Adder
108
+ # -------------------------------------------------------------------------
109
+
110
+ def eval_full_adder(self, a: torch.Tensor, b: torch.Tensor,
111
+ cin: torch.Tensor, prefix: str) -> Tuple[torch.Tensor, torch.Tensor]:
112
+ """
113
+ Full adder: sum = a XOR b XOR cin, cout = (a AND b) OR (cin AND (a XOR b))
114
+ Returns (sum_bit, carry_out)
115
+ """
116
+ inp_ab = torch.stack([a, b], dim=-1)
117
+
118
+ # HA1: a XOR b
119
+ w1_or = self._get(f'{prefix}.ha1.sum.layer1.or.weight')
120
+ b1_or = self._get(f'{prefix}.ha1.sum.layer1.or.bias')
121
+ w1_nand = self._get(f'{prefix}.ha1.sum.layer1.nand.weight')
122
+ b1_nand = self._get(f'{prefix}.ha1.sum.layer1.nand.bias')
123
+ w2 = self._get(f'{prefix}.ha1.sum.layer2.weight')
124
+ b2 = self._get(f'{prefix}.ha1.sum.layer2.bias')
125
+
126
+ h_or = heaviside(inp_ab @ w1_or + b1_or)
127
+ h_nand = heaviside(inp_ab @ w1_nand + b1_nand)
128
+ hidden = torch.stack([h_or, h_nand], dim=-1)
129
+ ha1_sum = heaviside(hidden @ w2 + b2)
130
+
131
+ # HA1 carry
132
+ w_c1 = self._get(f'{prefix}.ha1.carry.weight')
133
+ b_c1 = self._get(f'{prefix}.ha1.carry.bias')
134
+ ha1_carry = heaviside(inp_ab @ w_c1 + b_c1)
135
+
136
+ # HA2: ha1_sum XOR cin
137
+ inp_ha2 = torch.stack([ha1_sum, cin], dim=-1)
138
+ w1_or = self._get(f'{prefix}.ha2.sum.layer1.or.weight')
139
+ b1_or = self._get(f'{prefix}.ha2.sum.layer1.or.bias')
140
+ w1_nand = self._get(f'{prefix}.ha2.sum.layer1.nand.weight')
141
+ b1_nand = self._get(f'{prefix}.ha2.sum.layer1.nand.bias')
142
+ w2 = self._get(f'{prefix}.ha2.sum.layer2.weight')
143
+ b2 = self._get(f'{prefix}.ha2.sum.layer2.bias')
144
+
145
+ h_or = heaviside(inp_ha2 @ w1_or + b1_or)
146
+ h_nand = heaviside(inp_ha2 @ w1_nand + b1_nand)
147
+ hidden = torch.stack([h_or, h_nand], dim=-1)
148
+ ha2_sum = heaviside(hidden @ w2 + b2)
149
+
150
+ # HA2 carry
151
+ w_c2 = self._get(f'{prefix}.ha2.carry.weight')
152
+ b_c2 = self._get(f'{prefix}.ha2.carry.bias')
153
+ ha2_carry = heaviside(inp_ha2 @ w_c2 + b_c2)
154
+
155
+ # Carry out = ha1_carry OR ha2_carry
156
+ inp_cout = torch.stack([ha1_carry, ha2_carry], dim=-1)
157
+ w_or = self._get(f'{prefix}.carry_or.weight')
158
+ b_or = self._get(f'{prefix}.carry_or.bias')
159
+ cout = heaviside(inp_cout @ w_or + b_or)
160
+
161
+ return ha2_sum, cout
162
+
163
+ # -------------------------------------------------------------------------
164
+ # Arithmetic: 8-bit Ripple Carry Adder
165
+ # -------------------------------------------------------------------------
166
+
167
+ def add_8bit(self, a_bits: torch.Tensor, b_bits: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
168
+ """
169
+ 8-bit ripple carry addition.
170
+ a_bits, b_bits: [..., 8] tensors of bits (LSB first)
171
+ Returns: (result_bits [..., 8], carry_out [...])
172
+ """
173
+ batch_shape = a_bits.shape[:-1]
174
+ carry = torch.zeros(batch_shape, device=a_bits.device)
175
+ result_bits = []
176
+
177
+ for i in range(8):
178
+ a_i = a_bits[..., i]
179
+ b_i = b_bits[..., i]
180
+ sum_bit, carry = self.eval_full_adder(
181
+ a_i, b_i, carry,
182
+ f'arithmetic.ripplecarry8bit.fa{i}'
183
+ )
184
+ result_bits.append(sum_bit)
185
+
186
+ return torch.stack(result_bits, dim=-1), carry
187
+
188
+ # -------------------------------------------------------------------------
189
+ # Arithmetic: 8-bit Comparators
190
+ # -------------------------------------------------------------------------
191
+
192
+ def greater_than_8bit(self, a_bits: torch.Tensor, b_bits: torch.Tensor) -> torch.Tensor:
193
+ """Returns 1 if a > b, else 0. Bits are MSB first."""
194
+ diff = a_bits - b_bits # [..., 8]
195
+ w = self._get('arithmetic.greaterthan8bit.comparator')
196
+ score = (diff * w).sum(dim=-1)
197
+ return (score > 0).float()
198
+
199
+ def less_than_8bit(self, a_bits: torch.Tensor, b_bits: torch.Tensor) -> torch.Tensor:
200
+ """Returns 1 if a < b, else 0. Bits are MSB first."""
201
+ diff = b_bits - a_bits # [..., 8]
202
+ w = self._get('arithmetic.lessthan8bit.comparator')
203
+ score = (diff * w).sum(dim=-1)
204
+ return (score > 0).float()
205
+
206
+ def equal_8bit(self, a_bits: torch.Tensor, b_bits: torch.Tensor) -> torch.Tensor:
207
+ """Returns 1 if a == b, else 0."""
208
+ gt = self.greater_than_8bit(a_bits, b_bits)
209
+ lt = self.less_than_8bit(a_bits, b_bits)
210
+ return (1 - gt) * (1 - lt)
211
+
212
+
213
+ # =============================================================================
214
+ # BIT EXTRACTION / INJECTION INTERFACES
215
+ # =============================================================================
216
+
217
+ class BitExtractor(nn.Module):
218
+ """
219
+ Learns to extract 8-bit operands from token embeddings.
220
+ Maps embedding -> 16 bits (two 8-bit operands).
221
+ """
222
+
223
+ def __init__(self, d_model: int):
224
+ super().__init__()
225
+ self.d_model = d_model
226
+
227
+ # Project to logits, then binarize
228
+ self.proj = nn.Linear(d_model, 16)
229
+
230
+ # Learnable temperature for sigmoid approximation during training
231
+ self.temperature = nn.Parameter(torch.tensor(1.0))
232
+
233
+ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
234
+ """
235
+ x: [..., d_model]
236
+ Returns: a_bits [..., 8], b_bits [..., 8] (LSB first for arithmetic)
237
+ """
238
+ logits = self.proj(x) # [..., 16]
239
+
240
+ # Binarize with STE
241
+ bits = heaviside(logits)
242
+
243
+ # Split into two operands
244
+ a_bits = bits[..., :8]
245
+ b_bits = bits[..., 8:]
246
+
247
+ return a_bits, b_bits
248
+
249
+
250
+ class BitInjector(nn.Module):
251
+ """
252
+ Learns to inject circuit results back into embedding space.
253
+ Maps 16 bits (result + flags) -> embedding delta.
254
+ """
255
+
256
+ def __init__(self, d_model: int):
257
+ super().__init__()
258
+ self.d_model = d_model
259
+
260
+ # Project bits to embedding
261
+ self.proj = nn.Linear(16, d_model)
262
+
263
+ # Learnable scale
264
+ self.scale = nn.Parameter(torch.tensor(0.1))
265
+
266
+ def forward(self, result_bits: torch.Tensor, flags: torch.Tensor) -> torch.Tensor:
267
+ """
268
+ result_bits: [..., 8]
269
+ flags: [..., 8] (carry, overflow, zero, negative, etc.)
270
+ Returns: [..., d_model]
271
+ """
272
+ combined = torch.cat([result_bits, flags], dim=-1) # [..., 16]
273
+ return self.proj(combined) * self.scale
274
+
275
+
276
+ # =============================================================================
277
+ # CIRCUIT-AUGMENTED MLP BLOCK
278
+ # =============================================================================
279
+
280
+ class CircuitAugmentedMLP(nn.Module):
281
+ """
282
+ MLP block augmented with frozen threshold circuits.
283
+
284
+ The original MLP path runs in parallel with the circuit path.
285
+ A learned router decides how much to use each.
286
+ """
287
+
288
+ def __init__(
289
+ self,
290
+ d_model: int,
291
+ intermediate_size: int,
292
+ circuit_path: str,
293
+ device: str = 'cpu'
294
+ ):
295
+ super().__init__()
296
+ self.d_model = d_model
297
+
298
+ # Original MLP components (will be loaded from pretrained)
299
+ self.gate_proj = nn.Linear(d_model, intermediate_size, bias=False)
300
+ self.up_proj = nn.Linear(d_model, intermediate_size, bias=False)
301
+ self.down_proj = nn.Linear(intermediate_size, d_model, bias=False)
302
+ self.act_fn = nn.SiLU()
303
+
304
+ # Circuit components
305
+ self.circuits = CircuitExecutor(circuit_path, device)
306
+ self.bit_extractor = BitExtractor(d_model)
307
+ self.bit_injector = BitInjector(d_model)
308
+
309
+ # Router: decides circuit vs MLP contribution
310
+ self.router = nn.Sequential(
311
+ nn.Linear(d_model, 64),
312
+ nn.ReLU(),
313
+ nn.Linear(64, 2),
314
+ nn.Softmax(dim=-1)
315
+ )
316
+
317
+ # Operation selector (which arithmetic op to perform)
318
+ self.op_selector = nn.Sequential(
319
+ nn.Linear(d_model, 32),
320
+ nn.ReLU(),
321
+ nn.Linear(32, 4), # add, sub, compare, passthrough
322
+ nn.Softmax(dim=-1)
323
+ )
324
+
325
+ def _compute_flags(self, result_bits: torch.Tensor, carry: torch.Tensor) -> torch.Tensor:
326
+ """Compute status flags from result."""
327
+ batch_shape = result_bits.shape[:-1]
328
+
329
+ # Zero flag: all bits are 0
330
+ zero = (result_bits.sum(dim=-1) == 0).float()
331
+
332
+ # Negative flag: MSB is 1 (two's complement)
333
+ negative = result_bits[..., 7]
334
+
335
+ # Carry flag
336
+ carry_flag = carry
337
+
338
+ # Pad to 8 flags
339
+ flags = torch.zeros(*batch_shape, 8, device=result_bits.device)
340
+ flags[..., 0] = zero
341
+ flags[..., 1] = negative
342
+ flags[..., 2] = carry_flag
343
+
344
+ return flags
345
+
346
+ def _circuit_forward(self, x: torch.Tensor) -> torch.Tensor:
347
+ """Run input through threshold circuits."""
348
+ # Extract operands
349
+ a_bits, b_bits = self.bit_extractor(x)
350
+
351
+ # Get operation weights
352
+ op_weights = self.op_selector(x) # [..., 4]
353
+
354
+ # Compute addition
355
+ add_result, add_carry = self.circuits.add_8bit(a_bits, b_bits)
356
+ add_flags = self._compute_flags(add_result, add_carry)
357
+
358
+ # Compute subtraction (a + (~b) + 1, simplified: just use add for now)
359
+ # For MVP, we'll focus on addition
360
+
361
+ # Inject result back
362
+ circuit_delta = self.bit_injector(add_result, add_flags)
363
+
364
+ return circuit_delta
365
+
366
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
367
+ """
368
+ x: [batch, seq_len, d_model]
369
+ Returns: [batch, seq_len, d_model]
370
+ """
371
+ # Original MLP path
372
+ mlp_out = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
373
+
374
+ # Circuit path
375
+ circuit_out = self._circuit_forward(x)
376
+
377
+ # Route between paths
378
+ route_weights = self.router(x) # [..., 2]
379
+ mlp_weight = route_weights[..., 0:1]
380
+ circuit_weight = route_weights[..., 1:2]
381
+
382
+ # Combine: MLP output + weighted circuit contribution
383
+ output = mlp_out + circuit_weight * circuit_out
384
+
385
+ return output
386
+
387
+
388
+ # =============================================================================
389
+ # MODEL SURGERY: Insert circuits into SmolLM2
390
+ # =============================================================================
391
+
392
+ def augment_smollm2_with_circuits(
393
+ model: AutoModelForCausalLM,
394
+ circuit_path: str,
395
+ layer_indices: list = None,
396
+ device: str = 'cpu'
397
+ ) -> AutoModelForCausalLM:
398
+ """
399
+ Surgically insert circuit blocks into SmolLM2's MLP layers.
400
+
401
+ Args:
402
+ model: Pretrained SmolLM2 model
403
+ circuit_path: Path to neural_computer.safetensors
404
+ layer_indices: Which layers to augment (default: middle layers)
405
+ device: Device for circuit tensors
406
+
407
+ Returns:
408
+ Modified model with circuit-augmented MLPs
409
+ """
410
+ config = model.config
411
+ num_layers = config.num_hidden_layers
412
+
413
+ # Default: augment middle third of layers
414
+ if layer_indices is None:
415
+ start = num_layers // 3
416
+ end = 2 * num_layers // 3
417
+ layer_indices = list(range(start, end))
418
+
419
+ print(f"Augmenting layers {layer_indices} with threshold circuits...")
420
+
421
+ for idx in layer_indices:
422
+ layer = model.model.layers[idx]
423
+ old_mlp = layer.mlp
424
+
425
+ # Create augmented MLP
426
+ new_mlp = CircuitAugmentedMLP(
427
+ d_model=config.hidden_size,
428
+ intermediate_size=config.intermediate_size,
429
+ circuit_path=circuit_path,
430
+ device=device
431
+ )
432
+
433
+ # Copy pretrained weights
434
+ new_mlp.gate_proj.weight.data = old_mlp.gate_proj.weight.data.clone()
435
+ new_mlp.up_proj.weight.data = old_mlp.up_proj.weight.data.clone()
436
+ new_mlp.down_proj.weight.data = old_mlp.down_proj.weight.data.clone()
437
+
438
+ # Replace
439
+ layer.mlp = new_mlp
440
+
441
+ # Freeze circuit weights, keep interfaces trainable
442
+ for name, param in model.named_parameters():
443
+ if 'circuits' in name:
444
+ param.requires_grad = False
445
+
446
+ print(f"Done. Circuit weights frozen, interfaces trainable.")
447
+
448
+ return model
449
+
450
+
451
+ # =============================================================================
452
+ # TRAINING UTILITIES
453
+ # =============================================================================
454
+
455
+ def generate_arithmetic_batch(batch_size: int, max_val: int = 255) -> Tuple[list, list]:
456
+ """Generate batch of arithmetic problems and solutions."""
457
+ prompts = []
458
+ targets = []
459
+
460
+ for _ in range(batch_size):
461
+ a = torch.randint(0, max_val + 1, (1,)).item()
462
+ b = torch.randint(0, max_val + 1, (1,)).item()
463
+ result = (a + b) % 256
464
+
465
+ prompts.append(f"{a} + {b} =")
466
+ targets.append(f" {result}")
467
+
468
+ return prompts, targets
469
+
470
+
471
+ def evaluate_arithmetic(
472
+ model: AutoModelForCausalLM,
473
+ tokenizer: AutoTokenizer,
474
+ n_problems: int = 100,
475
+ device: str = 'cpu'
476
+ ) -> dict:
477
+ """Evaluate model on random arithmetic problems."""
478
+ correct = 0
479
+ total = 0
480
+ errors = []
481
+
482
+ model.eval()
483
+
484
+ for _ in range(n_problems):
485
+ a = torch.randint(0, 256, (1,)).item()
486
+ b = torch.randint(0, 256, (1,)).item()
487
+ expected = (a + b) % 256
488
+
489
+ prompt = f"{a} + {b} ="
490
+ inputs = tokenizer(prompt, return_tensors='pt').to(device)
491
+
492
+ with torch.no_grad():
493
+ outputs = model.generate(
494
+ **inputs,
495
+ max_new_tokens=10,
496
+ do_sample=False,
497
+ pad_token_id=tokenizer.eos_token_id
498
+ )
499
+
500
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
501
+
502
+ # Extract number from response
503
+ try:
504
+ # Find the part after "="
505
+ answer_part = response.split('=')[-1].strip()
506
+ # Extract first number
507
+ predicted = int(''.join(c for c in answer_part.split()[0] if c.isdigit()))
508
+
509
+ if predicted == expected:
510
+ correct += 1
511
+ else:
512
+ errors.append((a, b, expected, predicted))
513
+ except:
514
+ errors.append((a, b, expected, "parse_error"))
515
+
516
+ total += 1
517
+
518
+ return {
519
+ 'accuracy': correct / total,
520
+ 'correct': correct,
521
+ 'total': total,
522
+ 'errors': errors[:10] # First 10 errors
523
+ }
524
+
525
+
526
+ # =============================================================================
527
+ # MAIN: Demo
528
+ # =============================================================================
529
+
530
+ if __name__ == "__main__":
531
+ import argparse
532
+
533
+ parser = argparse.ArgumentParser(description='Circuit-Augmented LLM Demo')
534
+ parser.add_argument('--circuit-path', type=str,
535
+ default='./neural_computer.safetensors',
536
+ help='Path to circuit weights')
537
+ parser.add_argument('--device', type=str, default='cpu',
538
+ help='Device (cpu or cuda)')
539
+ parser.add_argument('--eval-only', action='store_true',
540
+ help='Only evaluate, do not augment')
541
+ args = parser.parse_args()
542
+
543
+ print("=" * 70)
544
+ print(" CIRCUIT-AUGMENTED LLM")
545
+ print("=" * 70)
546
+
547
+ # Load tokenizer and model
548
+ print("\n[1] Loading SmolLM2-360M...")
549
+ model_id = "HuggingFaceTB/SmolLM2-360M"
550
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
551
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
552
+
553
+ print(f" Parameters: {sum(p.numel() for p in model.parameters()):,}")
554
+
555
+ # Baseline evaluation
556
+ print("\n[2] Baseline arithmetic evaluation...")
557
+ baseline = evaluate_arithmetic(model, tokenizer, n_problems=50, device=args.device)
558
+ print(f" Accuracy: {baseline['accuracy']*100:.1f}% ({baseline['correct']}/{baseline['total']})")
559
+ if baseline['errors']:
560
+ print(f" Sample errors:")
561
+ for a, b, exp, got in baseline['errors'][:5]:
562
+ print(f" {a} + {b} = {exp}, model said {got}")
563
+
564
+ if args.eval_only:
565
+ print("\nDone (eval only mode).")
566
+ exit(0)
567
+
568
+ # Augment with circuits
569
+ print(f"\n[3] Augmenting with threshold circuits...")
570
+ print(f" Circuit path: {args.circuit_path}")
571
+ model = augment_smollm2_with_circuits(
572
+ model,
573
+ args.circuit_path,
574
+ device=args.device
575
+ )
576
+
577
+ new_params = sum(p.numel() for p in model.parameters())
578
+ trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
579
+ print(f" Total parameters: {new_params:,}")
580
+ print(f" Trainable parameters: {trainable:,}")
581
+
582
+ # Test circuit execution directly
583
+ print("\n[4] Testing circuit execution...")
584
+ circuit_exec = CircuitExecutor(args.circuit_path, args.device)
585
+
586
+ test_cases = [(127, 128), (255, 1), (0, 0), (100, 55)]
587
+ for a, b in test_cases:
588
+ # Convert to bits (LSB first)
589
+ a_bits = torch.tensor([(a >> i) & 1 for i in range(8)], dtype=torch.float32)
590
+ b_bits = torch.tensor([(b >> i) & 1 for i in range(8)], dtype=torch.float32)
591
+
592
+ result_bits, carry = circuit_exec.add_8bit(
593
+ a_bits.unsqueeze(0),
594
+ b_bits.unsqueeze(0)
595
+ )
596
+
597
+ # Convert result bits back to int
598
+ result = sum(int(result_bits[0, i].item()) * (2**i) for i in range(8))
599
+ expected = (a + b) % 256
600
+
601
+ status = "OK" if result == expected else "FAIL"
602
+ print(f" {a} + {b} = {result} (expected {expected}) [{status}]")
603
+
604
+ print("\n[5] Model ready for fine-tuning.")
605
+ print(" Next: Train interface layers on arithmetic examples.")
606
+ print("=" * 70)
llm/guide.md CHANGED
@@ -1,615 +1,615 @@
1
- # Embedding Threshold Logic Circuits into Transformer MLPs
2
-
3
- ## Technical Implementation Guide
4
-
5
- ---
6
-
7
- ## 1. Core Thesis
8
-
9
- Standard LLMs fail at arithmetic because they're interpolators—they approximate functions over training distributions rather than compute exact results. A 360M parameter model trained on internet text has seen "127 + 128 = 255" zero or few times, so it guesses "140" based on pattern matching.
10
-
11
- We solve this by embedding **frozen, proven-correct arithmetic circuits** directly into the transformer's MLP layers. The circuits use threshold logic (weighted sums + step activation), which is structurally compatible with neural network layers. We train only the **interface layers** that learn to:
12
-
13
- 1. Extract operands from token embeddings
14
- 2. Route computation through the circuits
15
- 3. Inject results back into the residual stream
16
-
17
- The model learns **call dispatch**, not arithmetic. The arithmetic is already solved.
18
-
19
- ---
20
-
21
- ## 2. Threshold Logic Fundamentals
22
-
23
- ### 2.1 Single Threshold Gate
24
-
25
- A threshold gate computes:
26
-
27
- ```
28
- output = 1 if (Σ wᵢxᵢ + b) ≥ 0
29
- 0 otherwise
30
- ```
31
-
32
- This is a neuron with Heaviside step activation. With integer weights `w` and bias `b`, it computes a Boolean function of binary inputs.
33
-
34
- **Example: AND gate**
35
- ```
36
- w = [1, 1], b = -2
37
- AND(0,0) = H(0 + 0 - 2) = H(-2) = 0
38
- AND(0,1) = H(0 + 1 - 2) = H(-1) = 0
39
- AND(1,0) = H(1 + 0 - 2) = H(-1) = 0
40
- AND(1,1) = H(1 + 1 - 2) = H(0) = 1
41
- ```
42
-
43
- **Example: OR gate**
44
- ```
45
- w = [1, 1], b = -1
46
- OR(0,0) = H(0 + 0 - 1) = H(-1) = 0
47
- OR(0,1) = H(0 + 1 - 1) = H(0) = 1
48
- OR(1,0) = H(1 + 0 - 1) = H(0) = 1
49
- OR(1,1) = H(1 + 1 - 1) = H(1) = 1
50
- ```
51
-
52
- ### 2.2 Multi-Layer Circuits
53
-
54
- XOR is not linearly separable—it requires two layers:
55
-
56
- ```
57
- Layer 1:
58
- neuron1 (OR): w=[1,1], b=-1 → fires if a OR b
59
- neuron2 (NAND): w=[-1,-1], b=1 → fires if NOT(a AND b)
60
-
61
- Layer 2:
62
- neuron3 (AND): w=[1,1], b=-2 → fires if both layer1 outputs are 1
63
-
64
- XOR(a,b) = AND(OR(a,b), NAND(a,b))
65
- ```
66
-
67
- ### 2.3 Full Adder
68
-
69
- A full adder computes `sum` and `carry_out` from inputs `a`, `b`, `carry_in`:
70
-
71
- ```
72
- sum = a XOR b XOR cin
73
- cout = (a AND b) OR (cin AND (a XOR b))
74
- ```
75
-
76
- Implementation uses two half-adders chained:
77
-
78
- ```
79
- HA1: (a, b) → (sum1 = a XOR b, carry1 = a AND b)
80
- HA2: (sum1, cin) → (sum2 = sum1 XOR cin, carry2 = sum1 AND cin)
81
- cout = carry1 OR carry2
82
- final_sum = sum2
83
- ```
84
-
85
- Each XOR is 2 layers, each AND/OR is 1 layer. Total depth: ~4 layers per full adder.
86
-
87
- ### 2.4 8-bit Ripple Carry Adder
88
-
89
- Chain 8 full adders, propagating carry:
90
-
91
- ```
92
- FA0: (a[0], b[0], 0) → (sum[0], c0)
93
- FA1: (a[1], b[1], c0) → (sum[1], c1)
94
- FA2: (a[2], b[2], c1) → (sum[2], c2)
95
- ...
96
- FA7: (a[7], b[7], c6) → (sum[7], c7)
97
- ```
98
-
99
- Total circuit depth: ~32 threshold layers (8 FAs × 4 layers each).
100
-
101
- ---
102
-
103
- ## 3. Circuit Inventory
104
-
105
- The `neural_computer.safetensors` contains 3,122 tensors / 5,648 parameters implementing:
106
-
107
- | Category | Circuits | Tensors |
108
- |----------|----------|---------|
109
- | Boolean | AND, OR, NOT, NAND, NOR, XOR, XNOR, IMPLIES, BIIMPLIES | ~30 |
110
- | Arithmetic | Half adder, Full adder, Ripple carry 2/4/8-bit, 8×8 multiplier | ~800 |
111
- | Comparators | GT, LT, GEQ, LEQ, EQ (8-bit) | ~50 |
112
- | ALU | 16-operation ALU, opcode decoder, flag computation | ~400 |
113
- | Control | JMP, JZ, JNZ, JC, JNC, JN, JP, CALL, RET, PUSH, POP | ~200 |
114
- | Modular | Divisibility by 2-12 | ~600 |
115
- | Error Detection | Parity, CRC, Hamming, checksum | ~200 |
116
- | Pattern | Popcount, leading zeros, symmetry | ~150 |
117
- | Threshold | k-of-n gates, majority, minority | ~100 |
118
-
119
- All weights are integers. All activations are Heaviside. Verified with 6,590 exhaustive tests.
120
-
121
- ---
122
-
123
- ## 4. Transformer Integration Architecture
124
-
125
- ### 4.1 Target: SmolLM2-360M
126
-
127
- ```
128
- Architecture: LlamaForCausalLM
129
- Hidden dim: 960
130
- Layers: 32
131
- Heads: 15
132
- MLP expansion: 4x (intermediate = 3840)
133
- Vocab: 49152
134
- Parameters: 361,821,120
135
- ```
136
-
137
- Standard MLP block:
138
- ```python
139
- def forward(x): # x: [batch, seq, 960]
140
- gate = self.gate_proj(x) # [batch, seq, 3840]
141
- up = self.up_proj(x) # [batch, seq, 3840]
142
- hidden = silu(gate) * up # SwiGLU activation
143
- return self.down_proj(hidden) # [batch, seq, 960]
144
- ```
145
-
146
- ### 4.2 Augmented MLP Block
147
-
148
- ```python
149
- def forward(x): # x: [batch, seq, 960]
150
- # Original MLP path (unchanged)
151
- mlp_out = self.down_proj(silu(self.gate_proj(x)) * self.up_proj(x))
152
-
153
- # Circuit path (new)
154
- a_bits, b_bits = self.bit_extractor(x) # [batch, seq, 8] each
155
- result_bits, carry = self.circuits.add_8bit(a_bits, b_bits)
156
- flags = self.compute_flags(result_bits, carry)
157
- circuit_delta = self.bit_injector(result_bits, flags)
158
-
159
- # Routing
160
- route_weights = self.router(x) # [batch, seq, 2] softmax
161
-
162
- # Combine
163
- return mlp_out + route_weights[..., 1:2] * circuit_delta
164
- ```
165
-
166
- ### 4.3 Layer Selection
167
-
168
- We augment the **middle third** of layers (10-20 of 32):
169
-
170
- - Early layers (0-9): Token/position encoding, not arithmetic-relevant
171
- - Middle layers (10-20): Abstract reasoning, computation
172
- - Late layers (21-31): Output formatting, vocabulary projection
173
-
174
- Rationale: Arithmetic computation happens in middle layers where the model processes relationships between tokens. Early layers haven't built sufficient representations; late layers are committed to output tokens.
175
-
176
- ---
177
-
178
- ## 5. Interface Layers (Trainable)
179
-
180
- ### 5.1 BitExtractor
181
-
182
- Maps token embedding → two 8-bit operands.
183
-
184
- ```python
185
- class BitExtractor(nn.Module):
186
- def __init__(self, d_model=960):
187
- self.proj = nn.Linear(d_model, 16) # 960 → 16
188
-
189
- def forward(self, x):
190
- logits = self.proj(x) # [batch, seq, 16]
191
- bits = heaviside(logits) # binarize with STE
192
- a_bits = bits[..., :8] # first operand
193
- b_bits = bits[..., 8:] # second operand
194
- return a_bits, b_bits # both [batch, seq, 8], LSB first
195
- ```
196
-
197
- **What it learns**: Which embedding dimensions encode numeric magnitude. For token "127", it must learn that certain activation patterns correspond to bits `[1,1,1,1,1,1,1,0]`.
198
-
199
- **Parameters**: 960 × 16 + 16 = 15,376
200
-
201
- ### 5.2 BitInjector
202
-
203
- Maps circuit outputs → embedding delta.
204
-
205
- ```python
206
- class BitInjector(nn.Module):
207
- def __init__(self, d_model=960):
208
- self.proj = nn.Linear(16, d_model) # 16 → 960
209
- self.scale = nn.Parameter(torch.tensor(0.1))
210
-
211
- def forward(self, result_bits, flags):
212
- combined = torch.cat([result_bits, flags], dim=-1) # [batch, seq, 16]
213
- return self.proj(combined) * self.scale # [batch, seq, 960]
214
- ```
215
-
216
- **What it learns**: How to inject the result bits back into embedding space such that subsequent layers (and the final vocabulary projection) produce the correct output tokens.
217
-
218
- **Parameters**: 16 × 960 + 960 + 1 = 16,321
219
-
220
- ### 5.3 Router
221
-
222
- Decides when to use circuit path.
223
-
224
- ```python
225
- class Router(nn.Module):
226
- def __init__(self, d_model=960):
227
- self.net = nn.Sequential(
228
- nn.Linear(d_model, 64),
229
- nn.ReLU(),
230
- nn.Linear(64, 2),
231
- nn.Softmax(dim=-1)
232
- )
233
-
234
- def forward(self, x):
235
- return self.net(x) # [batch, seq, 2]: [mlp_weight, circuit_weight]
236
- ```
237
-
238
- **What it learns**: "This position contains arithmetic" → route through circuits. "This is prose" → use normal MLP.
239
-
240
- **Parameters**: 960 × 64 + 64 + 64 × 2 + 2 = 61,698
241
-
242
- ### 5.4 Total Trainable Parameters
243
-
244
- Per augmented layer:
245
- ```
246
- BitExtractor: 15,376
247
- BitInjector: 16,321
248
- Router: 61,698
249
- OpSelector: ~31,000
250
- ───────────────────────
251
- Total: ~124,395 per layer
252
- ```
253
-
254
- For 11 augmented layers: **~1.37M trainable parameters**
255
-
256
- This is 0.38% of the model. The other 99.62% (including all circuit weights) is frozen.
257
-
258
- ---
259
-
260
- ## 6. Gradient Flow Through Heaviside
261
-
262
- ### 6.1 The Problem
263
-
264
- Heaviside has zero gradient almost everywhere:
265
-
266
- ```
267
- H(x) = 1 if x ≥ 0 else 0
268
- dH/dx = 0 for x ≠ 0, undefined at x = 0
269
- ```
270
-
271
- Standard backprop would give zero gradients to BitExtractor.
272
-
273
- ### 6.2 Straight-Through Estimator (STE)
274
-
275
- We use STE: forward pass uses true Heaviside, backward pass pretends it's identity.
276
-
277
- ```python
278
- class HeavisideSTE(torch.autograd.Function):
279
- @staticmethod
280
- def forward(ctx, x):
281
- return (x >= 0).float() # true step function
282
-
283
- @staticmethod
284
- def backward(ctx, grad_output):
285
- return grad_output # pass gradient through unchanged
286
- ```
287
-
288
- **Intuition**: "If making the input larger would have helped the output, increase the input." The gradient tells us the direction even though the function is flat.
289
-
290
- ### 6.3 Alternative: Sigmoid Annealing
291
-
292
- During training, use sigmoid with increasing temperature:
293
-
294
- ```python
295
- def soft_heaviside(x, temperature):
296
- return torch.sigmoid(x * temperature)
297
-
298
- # temperature: 1 → 10 → 100 over training
299
- # At high temperature, sigmoid ≈ step function
300
- ```
301
-
302
- This provides smoother gradients early in training, then sharpens to true binary at inference.
303
-
304
- ---
305
-
306
- ## 7. Training Strategy
307
-
308
- ### 7.1 Data Generation
309
-
310
- Generate arithmetic problems exhaustively:
311
-
312
- ```python
313
- def generate_batch(batch_size):
314
- a = torch.randint(0, 256, (batch_size,))
315
- b = torch.randint(0, 256, (batch_size,))
316
- result = (a + b) % 256
317
-
318
- prompts = [f"{a[i]} + {b[i]} =" for i in range(batch_size)]
319
- targets = [f" {result[i]}" for i in range(batch_size)]
320
-
321
- return prompts, targets
322
- ```
323
-
324
- For 8-bit addition, there are 256 × 256 = 65,536 unique problems. We can cover the entire space.
325
-
326
- ### 7.2 Loss Function
327
-
328
- Standard cross-entropy on next-token prediction:
329
-
330
- ```python
331
- outputs = model(input_ids, attention_mask=mask, labels=labels)
332
- loss = outputs.loss # CE loss, only on target tokens
333
- ```
334
-
335
- Labels are masked for prompt tokens (`-100`), so loss only backprops through the answer.
336
-
337
- ### 7.3 Optimizer Configuration
338
-
339
- ```python
340
- # Only train interface layers
341
- interface_params = [p for n, p in model.named_parameters()
342
- if any(x in n for x in ['bit_extractor', 'bit_injector', 'router'])]
343
-
344
- optimizer = AdamW(interface_params, lr=1e-4, weight_decay=0.01)
345
- scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
346
- ```
347
-
348
- ### 7.4 Curriculum Learning
349
-
350
- Start simple, increase difficulty:
351
-
352
- ```
353
- Phase 1 (epochs 1-2): Single-digit addition (0-9 + 0-9)
354
- Phase 2 (epochs 3-4): Two-digit addition (0-99 + 0-99)
355
- Phase 3 (epochs 5-7): Full 8-bit addition (0-255 + 0-255)
356
- Phase 4 (epochs 8-10): Adversarial cases (carry chains: 127+128, 255+1)
357
- ```
358
-
359
- This helps the interface layers learn the basic extraction pattern before tackling hard cases.
360
-
361
- ### 7.5 Training Hyperparameters
362
-
363
- ```
364
- Model: SmolLM2-360M
365
- Augmented: Layers 10-20 (11 layers)
366
- Trainable: 1.37M parameters
367
- Frozen: 362M parameters (including 5.6K circuit params)
368
-
369
- Batch size: 32
370
- Learning rate: 1e-4
371
- Epochs: 10
372
- Samples: 10,000 per epoch
373
- Warmup: 500 steps
374
- Device: RTX 6000 Ada (48GB)
375
-
376
- Expected time: ~30 minutes total
377
- ```
378
-
379
- ---
380
-
381
- ## 8. Forward Pass Walkthrough
382
-
383
- Input: `"127 + 128 ="`
384
-
385
- ### 8.1 Tokenization
386
-
387
- ```
388
- Tokens: ["127", " +", " 128", " ="]
389
- IDs: [12700, 489, 13824, 284] # hypothetical
390
- ```
391
-
392
- ### 8.2 Embedding
393
-
394
- ```
395
- embeddings = embed(input_ids) # [1, 4, 960]
396
- ```
397
-
398
- ### 8.3 Layers 0-9 (Unchanged)
399
-
400
- Standard attention + MLP, building representations.
401
-
402
- ### 8.4 Layer 10 (Augmented)
403
-
404
- ```python
405
- # After attention
406
- x = layer_norm(attn_output + residual) # [1, 4, 960]
407
-
408
- # MLP path
409
- mlp_out = down_proj(silu(gate_proj(x)) * up_proj(x))
410
-
411
- # Circuit path
412
- a_bits, b_bits = bit_extractor(x)
413
- # Position 0 ("127"): a_bits ≈ [1,1,1,1,1,1,1,0] if well-trained
414
- # Position 2 ("128"): b_bits ≈ [0,0,0,0,0,0,0,1]
415
- # (In practice, extraction happens per-position; aggregation is learned)
416
-
417
- result_bits, carry = circuits.add_8bit(a_bits, b_bits)
418
- # result_bits = [1,1,1,1,1,1,1,1] = 255
419
-
420
- flags = compute_flags(result_bits, carry)
421
- # zero=0, negative=1, carry=1
422
-
423
- circuit_delta = bit_injector(result_bits, flags) # [1, 4, 960]
424
-
425
- # Routing
426
- route = router(x) # [1, 4, 2]
427
- # Position 3 ("="): route ≈ [0.1, 0.9] → use circuits
428
- # Position 1 ("+"): route ≈ [0.8, 0.2] → mostly MLP
429
-
430
- # Combine
431
- output = mlp_out + route[..., 1:2] * circuit_delta
432
- ```
433
-
434
- ### 8.5 Layers 11-31
435
-
436
- Continue processing, eventually projecting to vocabulary.
437
-
438
- ### 8.6 Output
439
-
440
- ```
441
- logits = lm_head(final_hidden) # [1, 4, 49152]
442
- next_token = argmax(logits[0, 3, :]) # token after "="
443
- # Should decode to "255" (possibly as " 255" or "255")
444
- ```
445
-
446
- ---
447
-
448
- ## 9. Inference Characteristics
449
-
450
- ### 9.1 Exactness
451
-
452
- At inference, Heaviside is true step function—no approximation. If BitExtractor correctly maps "127" → bits and "128" → bits, the circuit **will** output 255. The only failure mode is incorrect extraction.
453
-
454
- ### 9.2 Latency
455
-
456
- Circuit computation adds ~5-10% overhead:
457
- - BitExtractor: 1 linear layer (960→16)
458
- - Circuits: ~32 threshold layers, but sparse and tiny
459
- - BitInjector: 1 linear layer (16→960)
460
- - Router: 2 linear layers
461
-
462
- The circuits have only 5,648 parameters total—negligible versus the 361M in the base model.
463
-
464
- ### 9.3 Generalization
465
-
466
- Once the interface learns the mapping, it generalizes to **all** 65,536 8-bit additions. There's no memorization—the circuits compute.
467
-
468
- ---
469
-
470
- ## 10. Evaluation Metrics
471
-
472
- ### 10.1 Arithmetic Accuracy
473
-
474
- ```python
475
- def eval_accuracy(model, n_problems=1000):
476
- correct = 0
477
- for _ in range(n_problems):
478
- a, b = random 8-bit values
479
- expected = (a + b) % 256
480
- predicted = model.generate(f"{a} + {b} =")
481
- if parse_int(predicted) == expected:
482
- correct += 1
483
- return correct / n_problems
484
- ```
485
-
486
- **Baseline SmolLM2**: ~5-10% (guessing based on patterns)
487
- **Target**: >95% (circuit-accurate)
488
-
489
- ### 10.2 Edge Case Performance
490
-
491
- Specifically test:
492
- - Carry propagation: 127+128, 255+1, 128+128
493
- - Zeros: 0+0, 0+255
494
- - Identity: x+0 for various x
495
- - Commutativity: verify a+b == b+a
496
-
497
- ### 10.3 Non-Arithmetic Preservation
498
-
499
- Verify general capability isn't degraded:
500
- - Perplexity on held-out text
501
- - Common benchmarks (HellaSwag, etc.)
502
-
503
- The augmentation should be **additive**—circuits help arithmetic, MLP handles everything else via routing.
504
-
505
- ---
506
-
507
- ## 11. Extension Roadmap
508
-
509
- ### 11.1 Additional Operations
510
-
511
- The circuit inventory includes:
512
- - Subtraction (via two's complement)
513
- - Multiplication (8×8 → 16-bit)
514
- - Division (iterative subtraction)
515
- - Bitwise ops (AND, OR, XOR, shifts)
516
- - Comparisons (GT, LT, EQ)
517
-
518
- Each needs its own extraction/injection interface, or a unified interface with operation selection.
519
-
520
- ### 11.2 Multi-Operand Expressions
521
-
522
- For "15 + 27 + 33 =", need:
523
- - Operand count detection
524
- - Sequential circuit invocation
525
- - Accumulator pattern
526
-
527
- ### 11.3 Larger Bit Widths
528
-
529
- 16-bit and 32-bit arithmetic require:
530
- - Larger circuits (or chained 8-bit)
531
- - Wider BitExtractor (32 or 64 output dims)
532
- - More training data
533
-
534
- ### 11.4 Symbolic Integration
535
-
536
- Ultimate goal: the model recognizes when it needs to compute, invokes circuits, and integrates results into coherent natural language output.
537
-
538
- ```
539
- User: "If I have 127 apples and buy 128 more, how many do I have?"
540
- Model: [extracts 127, 128] [routes to circuit] [gets 255]
541
- "You would have 255 apples."
542
- ```
543
-
544
- ---
545
-
546
- ## 12. File Structure
547
-
548
- ```
549
- 8bit-threshold-computer/
550
- ├── neural_computer.safetensors # Frozen circuits (3,122 tensors)
551
- ├── circuit_llm.py # Integration architecture
552
- ├── train_circuit_interface.py # Training loop
553
- ├── iron_eval.py # Circuit verification (6,590 tests)
554
- ├── skeptic_test.py # Algebraic identity tests (127 tests)
555
- ├── prune_weights.py # Weight optimization
556
- ├── tensors.txt # Tensor manifest
557
- ├── guide.md # This document
558
- └── README.md # Project overview
559
- ```
560
-
561
- ---
562
-
563
- ## 13. Key Equations
564
-
565
- ### Heaviside Step
566
- ```
567
- H(x) = 1 if x ≥ 0 else 0
568
- ```
569
-
570
- ### Threshold Gate
571
- ```
572
- f(x₁,...,xₙ) = H(Σᵢ wᵢxᵢ + b)
573
- ```
574
-
575
- ### Full Adder
576
- ```
577
- sum = a ⊕ b ⊕ cᵢₙ
578
- cₒᵤₜ = (a ∧ b) ∨ (cᵢₙ ∧ (a ⊕ b))
579
- ```
580
-
581
- ### STE Gradient
582
- ```
583
- Forward: y = H(x)
584
- Backward: ∂L/∂x = ∂L/∂y
585
- ```
586
-
587
- ### Router Combination
588
- ```
589
- output = mlp_out + softmax(router(x))[1] × circuit_delta
590
- ```
591
-
592
- ---
593
-
594
- ## 14. References
595
-
596
- 1. McCulloch & Pitts (1943). "A Logical Calculus of Ideas Immanent in Nervous Activity"
597
- 2. Muroga (1971). "Threshold Logic and Its Applications"
598
- 3. Siegelmann & Sontag (1995). "On the Computational Power of Neural Nets"
599
- 4. Bengio et al. (2013). "Estimating or Propagating Gradients Through Stochastic Neurons"
600
- 5. Ma et al. (2024). "The Era of 1-bit LLMs" (BitNet b1.58)
601
- 6. HuggingFace (2024). "SmolLM2: Small Language Models"
602
-
603
- ---
604
-
605
- ## 15. Summary
606
-
607
- We embed a proven-correct 8-bit threshold logic computer into SmolLM2's MLP layers. The circuits are frozen; we train only the interface layers that learn call dispatch. This gives the LLM exact arithmetic capability without training it to "do math"—the math is already done.
608
-
609
- The approach is:
610
- - **Sound**: Circuits verified with 6,590 tests
611
- - **Efficient**: 1.37M trainable params, 5.6K circuit params
612
- - **Exact**: Heaviside at inference means no approximation error
613
- - **Composable**: Add more circuits (multiply, compare, etc.) with same pattern
614
-
615
- The model learns when to call the calculator, not how to calculate.
 
1
+ # Embedding Threshold Logic Circuits into Transformer MLPs
2
+
3
+ ## Technical Implementation Guide
4
+
5
+ ---
6
+
7
+ ## 1. Core Thesis
8
+
9
+ Standard LLMs fail at arithmetic because they're interpolators—they approximate functions over training distributions rather than compute exact results. A 360M parameter model trained on internet text has seen "127 + 128 = 255" zero or few times, so it guesses "140" based on pattern matching.
10
+
11
+ We solve this by embedding **frozen, proven-correct arithmetic circuits** directly into the transformer's MLP layers. The circuits use threshold logic (weighted sums + step activation), which is structurally compatible with neural network layers. We train only the **interface layers** that learn to:
12
+
13
+ 1. Extract operands from token embeddings
14
+ 2. Route computation through the circuits
15
+ 3. Inject results back into the residual stream
16
+
17
+ The model learns **call dispatch**, not arithmetic. The arithmetic is already solved.
18
+
19
+ ---
20
+
21
+ ## 2. Threshold Logic Fundamentals
22
+
23
+ ### 2.1 Single Threshold Gate
24
+
25
+ A threshold gate computes:
26
+
27
+ ```
28
+ output = 1 if (Σ wᵢxᵢ + b) ≥ 0
29
+ 0 otherwise
30
+ ```
31
+
32
+ This is a neuron with Heaviside step activation. With integer weights `w` and bias `b`, it computes a Boolean function of binary inputs.
33
+
34
+ **Example: AND gate**
35
+ ```
36
+ w = [1, 1], b = -2
37
+ AND(0,0) = H(0 + 0 - 2) = H(-2) = 0
38
+ AND(0,1) = H(0 + 1 - 2) = H(-1) = 0
39
+ AND(1,0) = H(1 + 0 - 2) = H(-1) = 0
40
+ AND(1,1) = H(1 + 1 - 2) = H(0) = 1
41
+ ```
42
+
43
+ **Example: OR gate**
44
+ ```
45
+ w = [1, 1], b = -1
46
+ OR(0,0) = H(0 + 0 - 1) = H(-1) = 0
47
+ OR(0,1) = H(0 + 1 - 1) = H(0) = 1
48
+ OR(1,0) = H(1 + 0 - 1) = H(0) = 1
49
+ OR(1,1) = H(1 + 1 - 1) = H(1) = 1
50
+ ```
51
+
52
+ ### 2.2 Multi-Layer Circuits
53
+
54
+ XOR is not linearly separable—it requires two layers:
55
+
56
+ ```
57
+ Layer 1:
58
+ neuron1 (OR): w=[1,1], b=-1 → fires if a OR b
59
+ neuron2 (NAND): w=[-1,-1], b=1 → fires if NOT(a AND b)
60
+
61
+ Layer 2:
62
+ neuron3 (AND): w=[1,1], b=-2 → fires if both layer1 outputs are 1
63
+
64
+ XOR(a,b) = AND(OR(a,b), NAND(a,b))
65
+ ```
66
+
67
+ ### 2.3 Full Adder
68
+
69
+ A full adder computes `sum` and `carry_out` from inputs `a`, `b`, `carry_in`:
70
+
71
+ ```
72
+ sum = a XOR b XOR cin
73
+ cout = (a AND b) OR (cin AND (a XOR b))
74
+ ```
75
+
76
+ Implementation uses two half-adders chained:
77
+
78
+ ```
79
+ HA1: (a, b) → (sum1 = a XOR b, carry1 = a AND b)
80
+ HA2: (sum1, cin) → (sum2 = sum1 XOR cin, carry2 = sum1 AND cin)
81
+ cout = carry1 OR carry2
82
+ final_sum = sum2
83
+ ```
84
+
85
+ Each XOR is 2 layers, each AND/OR is 1 layer. Total depth: ~4 layers per full adder.
86
+
87
+ ### 2.4 8-bit Ripple Carry Adder
88
+
89
+ Chain 8 full adders, propagating carry:
90
+
91
+ ```
92
+ FA0: (a[0], b[0], 0) → (sum[0], c0)
93
+ FA1: (a[1], b[1], c0) → (sum[1], c1)
94
+ FA2: (a[2], b[2], c1) → (sum[2], c2)
95
+ ...
96
+ FA7: (a[7], b[7], c6) → (sum[7], c7)
97
+ ```
98
+
99
+ Total circuit depth: ~32 threshold layers (8 FAs × 4 layers each).
100
+
101
+ ---
102
+
103
+ ## 3. Circuit Inventory
104
+
105
+ The `neural_computer.safetensors` contains 3,122 tensors / 5,648 parameters implementing:
106
+
107
+ | Category | Circuits | Tensors |
108
+ |----------|----------|---------|
109
+ | Boolean | AND, OR, NOT, NAND, NOR, XOR, XNOR, IMPLIES, BIIMPLIES | ~30 |
110
+ | Arithmetic | Half adder, Full adder, Ripple carry 2/4/8-bit, 8×8 multiplier | ~800 |
111
+ | Comparators | GT, LT, GEQ, LEQ, EQ (8-bit) | ~50 |
112
+ | ALU | 16-operation ALU, opcode decoder, flag computation | ~400 |
113
+ | Control | JMP, JZ, JNZ, JC, JNC, JN, JP, CALL, RET, PUSH, POP | ~200 |
114
+ | Modular | Divisibility by 2-12 | ~600 |
115
+ | Error Detection | Parity, CRC, Hamming, checksum | ~200 |
116
+ | Pattern | Popcount, leading zeros, symmetry | ~150 |
117
+ | Threshold | k-of-n gates, majority, minority | ~100 |
118
+
119
+ All weights are integers. All activations are Heaviside. Verified with 6,590 exhaustive tests.
120
+
121
+ ---
122
+
123
+ ## 4. Transformer Integration Architecture
124
+
125
+ ### 4.1 Target: SmolLM2-360M
126
+
127
+ ```
128
+ Architecture: LlamaForCausalLM
129
+ Hidden dim: 960
130
+ Layers: 32
131
+ Heads: 15
132
+ MLP expansion: 4x (intermediate = 3840)
133
+ Vocab: 49152
134
+ Parameters: 361,821,120
135
+ ```
136
+
137
+ Standard MLP block:
138
+ ```python
139
+ def forward(x): # x: [batch, seq, 960]
140
+ gate = self.gate_proj(x) # [batch, seq, 3840]
141
+ up = self.up_proj(x) # [batch, seq, 3840]
142
+ hidden = silu(gate) * up # SwiGLU activation
143
+ return self.down_proj(hidden) # [batch, seq, 960]
144
+ ```
145
+
146
+ ### 4.2 Augmented MLP Block
147
+
148
+ ```python
149
+ def forward(x): # x: [batch, seq, 960]
150
+ # Original MLP path (unchanged)
151
+ mlp_out = self.down_proj(silu(self.gate_proj(x)) * self.up_proj(x))
152
+
153
+ # Circuit path (new)
154
+ a_bits, b_bits = self.bit_extractor(x) # [batch, seq, 8] each
155
+ result_bits, carry = self.circuits.add_8bit(a_bits, b_bits)
156
+ flags = self.compute_flags(result_bits, carry)
157
+ circuit_delta = self.bit_injector(result_bits, flags)
158
+
159
+ # Routing
160
+ route_weights = self.router(x) # [batch, seq, 2] softmax
161
+
162
+ # Combine
163
+ return mlp_out + route_weights[..., 1:2] * circuit_delta
164
+ ```
165
+
166
+ ### 4.3 Layer Selection
167
+
168
+ We augment the **middle third** of layers (10-20 of 32):
169
+
170
+ - Early layers (0-9): Token/position encoding, not arithmetic-relevant
171
+ - Middle layers (10-20): Abstract reasoning, computation
172
+ - Late layers (21-31): Output formatting, vocabulary projection
173
+
174
+ Rationale: Arithmetic computation happens in middle layers where the model processes relationships between tokens. Early layers haven't built sufficient representations; late layers are committed to output tokens.
175
+
176
+ ---
177
+
178
+ ## 5. Interface Layers (Trainable)
179
+
180
+ ### 5.1 BitExtractor
181
+
182
+ Maps token embedding → two 8-bit operands.
183
+
184
+ ```python
185
+ class BitExtractor(nn.Module):
186
+ def __init__(self, d_model=960):
187
+ self.proj = nn.Linear(d_model, 16) # 960 → 16
188
+
189
+ def forward(self, x):
190
+ logits = self.proj(x) # [batch, seq, 16]
191
+ bits = heaviside(logits) # binarize with STE
192
+ a_bits = bits[..., :8] # first operand
193
+ b_bits = bits[..., 8:] # second operand
194
+ return a_bits, b_bits # both [batch, seq, 8], LSB first
195
+ ```
196
+
197
+ **What it learns**: Which embedding dimensions encode numeric magnitude. For token "127", it must learn that certain activation patterns correspond to bits `[1,1,1,1,1,1,1,0]`.
198
+
199
+ **Parameters**: 960 × 16 + 16 = 15,376
200
+
201
+ ### 5.2 BitInjector
202
+
203
+ Maps circuit outputs → embedding delta.
204
+
205
+ ```python
206
+ class BitInjector(nn.Module):
207
+ def __init__(self, d_model=960):
208
+ self.proj = nn.Linear(16, d_model) # 16 → 960
209
+ self.scale = nn.Parameter(torch.tensor(0.1))
210
+
211
+ def forward(self, result_bits, flags):
212
+ combined = torch.cat([result_bits, flags], dim=-1) # [batch, seq, 16]
213
+ return self.proj(combined) * self.scale # [batch, seq, 960]
214
+ ```
215
+
216
+ **What it learns**: How to inject the result bits back into embedding space such that subsequent layers (and the final vocabulary projection) produce the correct output tokens.
217
+
218
+ **Parameters**: 16 × 960 + 960 + 1 = 16,321
219
+
220
+ ### 5.3 Router
221
+
222
+ Decides when to use circuit path.
223
+
224
+ ```python
225
+ class Router(nn.Module):
226
+ def __init__(self, d_model=960):
227
+ self.net = nn.Sequential(
228
+ nn.Linear(d_model, 64),
229
+ nn.ReLU(),
230
+ nn.Linear(64, 2),
231
+ nn.Softmax(dim=-1)
232
+ )
233
+
234
+ def forward(self, x):
235
+ return self.net(x) # [batch, seq, 2]: [mlp_weight, circuit_weight]
236
+ ```
237
+
238
+ **What it learns**: "This position contains arithmetic" → route through circuits. "This is prose" → use normal MLP.
239
+
240
+ **Parameters**: 960 × 64 + 64 + 64 × 2 + 2 = 61,698
241
+
242
+ ### 5.4 Total Trainable Parameters
243
+
244
+ Per augmented layer:
245
+ ```
246
+ BitExtractor: 15,376
247
+ BitInjector: 16,321
248
+ Router: 61,698
249
+ OpSelector: ~31,000
250
+ ───────────────────────
251
+ Total: ~124,395 per layer
252
+ ```
253
+
254
+ For 11 augmented layers: **~1.37M trainable parameters**
255
+
256
+ This is 0.38% of the model. The other 99.62% (including all circuit weights) is frozen.
257
+
258
+ ---
259
+
260
+ ## 6. Gradient Flow Through Heaviside
261
+
262
+ ### 6.1 The Problem
263
+
264
+ Heaviside has zero gradient almost everywhere:
265
+
266
+ ```
267
+ H(x) = 1 if x ≥ 0 else 0
268
+ dH/dx = 0 for x ≠ 0, undefined at x = 0
269
+ ```
270
+
271
+ Standard backprop would give zero gradients to BitExtractor.
272
+
273
+ ### 6.2 Straight-Through Estimator (STE)
274
+
275
+ We use STE: forward pass uses true Heaviside, backward pass pretends it's identity.
276
+
277
+ ```python
278
+ class HeavisideSTE(torch.autograd.Function):
279
+ @staticmethod
280
+ def forward(ctx, x):
281
+ return (x >= 0).float() # true step function
282
+
283
+ @staticmethod
284
+ def backward(ctx, grad_output):
285
+ return grad_output # pass gradient through unchanged
286
+ ```
287
+
288
+ **Intuition**: "If making the input larger would have helped the output, increase the input." The gradient tells us the direction even though the function is flat.
289
+
290
+ ### 6.3 Alternative: Sigmoid Annealing
291
+
292
+ During training, use sigmoid with increasing temperature:
293
+
294
+ ```python
295
+ def soft_heaviside(x, temperature):
296
+ return torch.sigmoid(x * temperature)
297
+
298
+ # temperature: 1 → 10 → 100 over training
299
+ # At high temperature, sigmoid ≈ step function
300
+ ```
301
+
302
+ This provides smoother gradients early in training, then sharpens to true binary at inference.
303
+
304
+ ---
305
+
306
+ ## 7. Training Strategy
307
+
308
+ ### 7.1 Data Generation
309
+
310
+ Generate arithmetic problems exhaustively:
311
+
312
+ ```python
313
+ def generate_batch(batch_size):
314
+ a = torch.randint(0, 256, (batch_size,))
315
+ b = torch.randint(0, 256, (batch_size,))
316
+ result = (a + b) % 256
317
+
318
+ prompts = [f"{a[i]} + {b[i]} =" for i in range(batch_size)]
319
+ targets = [f" {result[i]}" for i in range(batch_size)]
320
+
321
+ return prompts, targets
322
+ ```
323
+
324
+ For 8-bit addition, there are 256 × 256 = 65,536 unique problems. We can cover the entire space.
325
+
326
+ ### 7.2 Loss Function
327
+
328
+ Standard cross-entropy on next-token prediction:
329
+
330
+ ```python
331
+ outputs = model(input_ids, attention_mask=mask, labels=labels)
332
+ loss = outputs.loss # CE loss, only on target tokens
333
+ ```
334
+
335
+ Labels are masked for prompt tokens (`-100`), so loss only backprops through the answer.
336
+
337
+ ### 7.3 Optimizer Configuration
338
+
339
+ ```python
340
+ # Only train interface layers
341
+ interface_params = [p for n, p in model.named_parameters()
342
+ if any(x in n for x in ['bit_extractor', 'bit_injector', 'router'])]
343
+
344
+ optimizer = AdamW(interface_params, lr=1e-4, weight_decay=0.01)
345
+ scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
346
+ ```
347
+
348
+ ### 7.4 Curriculum Learning
349
+
350
+ Start simple, increase difficulty:
351
+
352
+ ```
353
+ Phase 1 (epochs 1-2): Single-digit addition (0-9 + 0-9)
354
+ Phase 2 (epochs 3-4): Two-digit addition (0-99 + 0-99)
355
+ Phase 3 (epochs 5-7): Full 8-bit addition (0-255 + 0-255)
356
+ Phase 4 (epochs 8-10): Adversarial cases (carry chains: 127+128, 255+1)
357
+ ```
358
+
359
+ This helps the interface layers learn the basic extraction pattern before tackling hard cases.
360
+
361
+ ### 7.5 Training Hyperparameters
362
+
363
+ ```
364
+ Model: SmolLM2-360M
365
+ Augmented: Layers 10-20 (11 layers)
366
+ Trainable: 1.37M parameters
367
+ Frozen: 362M parameters (including 5.6K circuit params)
368
+
369
+ Batch size: 32
370
+ Learning rate: 1e-4
371
+ Epochs: 10
372
+ Samples: 10,000 per epoch
373
+ Warmup: 500 steps
374
+ Device: RTX 6000 Ada (48GB)
375
+
376
+ Expected time: ~30 minutes total
377
+ ```
378
+
379
+ ---
380
+
381
+ ## 8. Forward Pass Walkthrough
382
+
383
+ Input: `"127 + 128 ="`
384
+
385
+ ### 8.1 Tokenization
386
+
387
+ ```
388
+ Tokens: ["127", " +", " 128", " ="]
389
+ IDs: [12700, 489, 13824, 284] # hypothetical
390
+ ```
391
+
392
+ ### 8.2 Embedding
393
+
394
+ ```
395
+ embeddings = embed(input_ids) # [1, 4, 960]
396
+ ```
397
+
398
+ ### 8.3 Layers 0-9 (Unchanged)
399
+
400
+ Standard attention + MLP, building representations.
401
+
402
+ ### 8.4 Layer 10 (Augmented)
403
+
404
+ ```python
405
+ # After attention
406
+ x = layer_norm(attn_output + residual) # [1, 4, 960]
407
+
408
+ # MLP path
409
+ mlp_out = down_proj(silu(gate_proj(x)) * up_proj(x))
410
+
411
+ # Circuit path
412
+ a_bits, b_bits = bit_extractor(x)
413
+ # Position 0 ("127"): a_bits ≈ [1,1,1,1,1,1,1,0] if well-trained
414
+ # Position 2 ("128"): b_bits ≈ [0,0,0,0,0,0,0,1]
415
+ # (In practice, extraction happens per-position; aggregation is learned)
416
+
417
+ result_bits, carry = circuits.add_8bit(a_bits, b_bits)
418
+ # result_bits = [1,1,1,1,1,1,1,1] = 255
419
+
420
+ flags = compute_flags(result_bits, carry)
421
+ # zero=0, negative=1, carry=1
422
+
423
+ circuit_delta = bit_injector(result_bits, flags) # [1, 4, 960]
424
+
425
+ # Routing
426
+ route = router(x) # [1, 4, 2]
427
+ # Position 3 ("="): route ≈ [0.1, 0.9] → use circuits
428
+ # Position 1 ("+"): route ≈ [0.8, 0.2] → mostly MLP
429
+
430
+ # Combine
431
+ output = mlp_out + route[..., 1:2] * circuit_delta
432
+ ```
433
+
434
+ ### 8.5 Layers 11-31
435
+
436
+ Continue processing, eventually projecting to vocabulary.
437
+
438
+ ### 8.6 Output
439
+
440
+ ```
441
+ logits = lm_head(final_hidden) # [1, 4, 49152]
442
+ next_token = argmax(logits[0, 3, :]) # token after "="
443
+ # Should decode to "255" (possibly as " 255" or "255")
444
+ ```
445
+
446
+ ---
447
+
448
+ ## 9. Inference Characteristics
449
+
450
+ ### 9.1 Exactness
451
+
452
+ At inference, Heaviside is true step function—no approximation. If BitExtractor correctly maps "127" → bits and "128" → bits, the circuit **will** output 255. The only failure mode is incorrect extraction.
453
+
454
+ ### 9.2 Latency
455
+
456
+ Circuit computation adds ~5-10% overhead:
457
+ - BitExtractor: 1 linear layer (960→16)
458
+ - Circuits: ~32 threshold layers, but sparse and tiny
459
+ - BitInjector: 1 linear layer (16→960)
460
+ - Router: 2 linear layers
461
+
462
+ The circuits have only 5,648 parameters total—negligible versus the 361M in the base model.
463
+
464
+ ### 9.3 Generalization
465
+
466
+ Once the interface learns the mapping, it generalizes to **all** 65,536 8-bit additions. There's no memorization—the circuits compute.
467
+
468
+ ---
469
+
470
+ ## 10. Evaluation Metrics
471
+
472
+ ### 10.1 Arithmetic Accuracy
473
+
474
+ ```python
475
+ def eval_accuracy(model, n_problems=1000):
476
+ correct = 0
477
+ for _ in range(n_problems):
478
+ a, b = random 8-bit values
479
+ expected = (a + b) % 256
480
+ predicted = model.generate(f"{a} + {b} =")
481
+ if parse_int(predicted) == expected:
482
+ correct += 1
483
+ return correct / n_problems
484
+ ```
485
+
486
+ **Baseline SmolLM2**: ~5-10% (guessing based on patterns)
487
+ **Target**: >95% (circuit-accurate)
488
+
489
+ ### 10.2 Edge Case Performance
490
+
491
+ Specifically test:
492
+ - Carry propagation: 127+128, 255+1, 128+128
493
+ - Zeros: 0+0, 0+255
494
+ - Identity: x+0 for various x
495
+ - Commutativity: verify a+b == b+a
496
+
497
+ ### 10.3 Non-Arithmetic Preservation
498
+
499
+ Verify general capability isn't degraded:
500
+ - Perplexity on held-out text
501
+ - Common benchmarks (HellaSwag, etc.)
502
+
503
+ The augmentation should be **additive**—circuits help arithmetic, MLP handles everything else via routing.
504
+
505
+ ---
506
+
507
+ ## 11. Extension Roadmap
508
+
509
+ ### 11.1 Additional Operations
510
+
511
+ The circuit inventory includes:
512
+ - Subtraction (via two's complement)
513
+ - Multiplication (8×8 → 16-bit)
514
+ - Division (iterative subtraction)
515
+ - Bitwise ops (AND, OR, XOR, shifts)
516
+ - Comparisons (GT, LT, EQ)
517
+
518
+ Each needs its own extraction/injection interface, or a unified interface with operation selection.
519
+
520
+ ### 11.2 Multi-Operand Expressions
521
+
522
+ For "15 + 27 + 33 =", need:
523
+ - Operand count detection
524
+ - Sequential circuit invocation
525
+ - Accumulator pattern
526
+
527
+ ### 11.3 Larger Bit Widths
528
+
529
+ 16-bit and 32-bit arithmetic require:
530
+ - Larger circuits (or chained 8-bit)
531
+ - Wider BitExtractor (32 or 64 output dims)
532
+ - More training data
533
+
534
+ ### 11.4 Symbolic Integration
535
+
536
+ Ultimate goal: the model recognizes when it needs to compute, invokes circuits, and integrates results into coherent natural language output.
537
+
538
+ ```
539
+ User: "If I have 127 apples and buy 128 more, how many do I have?"
540
+ Model: [extracts 127, 128] [routes to circuit] [gets 255]
541
+ "You would have 255 apples."
542
+ ```
543
+
544
+ ---
545
+
546
+ ## 12. File Structure
547
+
548
+ ```
549
+ 8bit-threshold-computer/
550
+ ├── neural_computer.safetensors # Frozen circuits (3,122 tensors)
551
+ ├── circuit_llm.py # Integration architecture
552
+ ├── train_circuit_interface.py # Training loop
553
+ ├── iron_eval.py # Circuit verification (6,590 tests)
554
+ ├── skeptic_test.py # Algebraic identity tests (127 tests)
555
+ ├── prune_weights.py # Weight optimization
556
+ ├── tensors.txt # Tensor manifest
557
+ ├── guide.md # This document
558
+ └── README.md # Project overview
559
+ ```
560
+
561
+ ---
562
+
563
+ ## 13. Key Equations
564
+
565
+ ### Heaviside Step
566
+ ```
567
+ H(x) = 1 if x ≥ 0 else 0
568
+ ```
569
+
570
+ ### Threshold Gate
571
+ ```
572
+ f(x₁,...,xₙ) = H(Σᵢ wᵢxᵢ + b)
573
+ ```
574
+
575
+ ### Full Adder
576
+ ```
577
+ sum = a ⊕ b ⊕ cᵢₙ
578
+ cₒᵤₜ = (a ∧ b) ∨ (cᵢₙ ∧ (a ⊕ b))
579
+ ```
580
+
581
+ ### STE Gradient
582
+ ```
583
+ Forward: y = H(x)
584
+ Backward: ∂L/∂x = ∂L/∂y
585
+ ```
586
+
587
+ ### Router Combination
588
+ ```
589
+ output = mlp_out + softmax(router(x))[1] × circuit_delta
590
+ ```
591
+
592
+ ---
593
+
594
+ ## 14. References
595
+
596
+ 1. McCulloch & Pitts (1943). "A Logical Calculus of Ideas Immanent in Nervous Activity"
597
+ 2. Muroga (1971). "Threshold Logic and Its Applications"
598
+ 3. Siegelmann & Sontag (1995). "On the Computational Power of Neural Nets"
599
+ 4. Bengio et al. (2013). "Estimating or Propagating Gradients Through Stochastic Neurons"
600
+ 5. Ma et al. (2024). "The Era of 1-bit LLMs" (BitNet b1.58)
601
+ 6. HuggingFace (2024). "SmolLM2: Small Language Models"
602
+
603
+ ---
604
+
605
+ ## 15. Summary
606
+
607
+ We embed a proven-correct 8-bit threshold logic computer into SmolLM2's MLP layers. The circuits are frozen; we train only the interface layers that learn call dispatch. This gives the LLM exact arithmetic capability without training it to "do math"—the math is already done.
608
+
609
+ The approach is:
610
+ - **Sound**: Circuits verified with 6,590 tests
611
+ - **Efficient**: 1.37M trainable params, 5.6K circuit params
612
+ - **Exact**: Heaviside at inference means no approximation error
613
+ - **Composable**: Add more circuits (multiply, compare, etc.) with same pattern
614
+
615
+ The model learns when to call the calculator, not how to calculate.
llm/train_circuit_interface.py CHANGED
@@ -1,306 +1,306 @@
1
- """
2
- Train the circuit interface layers on arithmetic examples.
3
- ============================================================
4
-
5
- The threshold circuits are frozen - we only train:
6
- - BitExtractor: embedding -> operand bits
7
- - BitInjector: result bits -> embedding
8
- - Router: when to use circuits vs MLP
9
- """
10
-
11
- import torch
12
- import torch.nn as nn
13
- from torch.utils.data import Dataset, DataLoader
14
- from transformers import AutoModelForCausalLM, AutoTokenizer
15
- from tqdm import tqdm
16
- import argparse
17
- import warnings
18
- warnings.filterwarnings('ignore')
19
-
20
- from circuit_llm import (
21
- augment_smollm2_with_circuits,
22
- evaluate_arithmetic,
23
- CircuitExecutor
24
- )
25
-
26
-
27
- # =============================================================================
28
- # ARITHMETIC DATASET
29
- # =============================================================================
30
-
31
- class ArithmeticDataset(Dataset):
32
- """Dataset of 8-bit addition problems."""
33
-
34
- def __init__(self, tokenizer, n_samples: int = 10000, max_val: int = 255):
35
- self.tokenizer = tokenizer
36
- self.n_samples = n_samples
37
- self.max_val = max_val
38
-
39
- # Pre-generate all examples
40
- self.examples = []
41
- for _ in range(n_samples):
42
- a = torch.randint(0, max_val + 1, (1,)).item()
43
- b = torch.randint(0, max_val + 1, (1,)).item()
44
- result = (a + b) % 256
45
-
46
- prompt = f"{a} + {b} ="
47
- target = f" {result}"
48
-
49
- self.examples.append((prompt, target, a, b, result))
50
-
51
- def __len__(self):
52
- return len(self.examples)
53
-
54
- def __getitem__(self, idx):
55
- prompt, target, a, b, result = self.examples[idx]
56
-
57
- # Tokenize
58
- prompt_ids = self.tokenizer.encode(prompt, add_special_tokens=False)
59
- target_ids = self.tokenizer.encode(target, add_special_tokens=False)
60
-
61
- input_ids = prompt_ids + target_ids
62
- labels = [-100] * len(prompt_ids) + target_ids # Only predict target
63
-
64
- return {
65
- 'input_ids': torch.tensor(input_ids),
66
- 'labels': torch.tensor(labels),
67
- 'a': a,
68
- 'b': b,
69
- 'result': result
70
- }
71
-
72
-
73
- def collate_fn(batch):
74
- """Collate with padding."""
75
- max_len = max(len(item['input_ids']) for item in batch)
76
-
77
- input_ids = []
78
- labels = []
79
- attention_mask = []
80
-
81
- for item in batch:
82
- pad_len = max_len - len(item['input_ids'])
83
-
84
- input_ids.append(
85
- torch.cat([item['input_ids'], torch.zeros(pad_len, dtype=torch.long)])
86
- )
87
- labels.append(
88
- torch.cat([item['labels'], torch.full((pad_len,), -100, dtype=torch.long)])
89
- )
90
- attention_mask.append(
91
- torch.cat([torch.ones(len(item['input_ids'])), torch.zeros(pad_len)])
92
- )
93
-
94
- return {
95
- 'input_ids': torch.stack(input_ids),
96
- 'labels': torch.stack(labels),
97
- 'attention_mask': torch.stack(attention_mask),
98
- }
99
-
100
-
101
- # =============================================================================
102
- # TRAINING LOOP
103
- # =============================================================================
104
-
105
- def train_interface(
106
- model: AutoModelForCausalLM,
107
- tokenizer: AutoTokenizer,
108
- n_epochs: int = 3,
109
- batch_size: int = 16,
110
- lr: float = 1e-4,
111
- n_train_samples: int = 10000,
112
- device: str = 'cpu',
113
- eval_every: int = 500
114
- ):
115
- """
116
- Train the circuit interface layers.
117
-
118
- Only trains:
119
- - bit_extractor (embedding -> bits)
120
- - bit_injector (bits -> embedding)
121
- - router (circuit vs MLP weighting)
122
- - op_selector (which operation)
123
- """
124
- print("\n" + "=" * 70)
125
- print(" TRAINING CIRCUIT INTERFACE")
126
- print("=" * 70)
127
-
128
- # Freeze everything except interface layers
129
- interface_params = []
130
- frozen_count = 0
131
- trainable_count = 0
132
-
133
- for name, param in model.named_parameters():
134
- if any(x in name for x in ['bit_extractor', 'bit_injector', 'router', 'op_selector']):
135
- param.requires_grad = True
136
- interface_params.append(param)
137
- trainable_count += param.numel()
138
- else:
139
- param.requires_grad = False
140
- frozen_count += param.numel()
141
-
142
- print(f"\n Frozen parameters: {frozen_count:,}")
143
- print(f" Trainable parameters: {trainable_count:,}")
144
- print(f" Training {len(interface_params)} parameter groups")
145
-
146
- # Create dataset
147
- print(f"\n Creating dataset ({n_train_samples} examples)...")
148
- dataset = ArithmeticDataset(tokenizer, n_samples=n_train_samples)
149
- dataloader = DataLoader(
150
- dataset,
151
- batch_size=batch_size,
152
- shuffle=True,
153
- collate_fn=collate_fn
154
- )
155
-
156
- # Optimizer
157
- optimizer = torch.optim.AdamW(interface_params, lr=lr)
158
-
159
- # Training
160
- model.to(device)
161
- model.train()
162
-
163
- global_step = 0
164
- total_loss = 0
165
-
166
- for epoch in range(n_epochs):
167
- print(f"\n Epoch {epoch + 1}/{n_epochs}")
168
- print(" " + "-" * 60)
169
-
170
- epoch_loss = 0
171
- epoch_steps = 0
172
-
173
- pbar = tqdm(dataloader, desc=f" Training", leave=False)
174
-
175
- for batch in pbar:
176
- input_ids = batch['input_ids'].to(device)
177
- labels = batch['labels'].to(device)
178
- attention_mask = batch['attention_mask'].to(device)
179
-
180
- # Forward
181
- outputs = model(
182
- input_ids=input_ids,
183
- attention_mask=attention_mask,
184
- labels=labels
185
- )
186
-
187
- loss = outputs.loss
188
-
189
- # Backward
190
- optimizer.zero_grad()
191
- loss.backward()
192
- optimizer.step()
193
-
194
- # Logging
195
- epoch_loss += loss.item()
196
- epoch_steps += 1
197
- global_step += 1
198
- total_loss += loss.item()
199
-
200
- pbar.set_postfix({'loss': f'{loss.item():.4f}'})
201
-
202
- # Periodic evaluation
203
- if global_step % eval_every == 0:
204
- model.eval()
205
- eval_results = evaluate_arithmetic(model, tokenizer, n_problems=50, device=device)
206
- print(f"\n Step {global_step}: Loss={total_loss/eval_every:.4f}, "
207
- f"Accuracy={eval_results['accuracy']*100:.1f}%")
208
- total_loss = 0
209
- model.train()
210
-
211
- avg_loss = epoch_loss / epoch_steps
212
- print(f"\n Epoch {epoch + 1} complete. Avg loss: {avg_loss:.4f}")
213
-
214
- # End of epoch evaluation
215
- model.eval()
216
- eval_results = evaluate_arithmetic(model, tokenizer, n_problems=100, device=device)
217
- print(f" Evaluation: {eval_results['accuracy']*100:.1f}% "
218
- f"({eval_results['correct']}/{eval_results['total']})")
219
-
220
- if eval_results['errors']:
221
- print(f" Sample errors:")
222
- for a, b, exp, got in eval_results['errors'][:3]:
223
- print(f" {a} + {b} = {exp}, model said {got}")
224
-
225
- model.train()
226
-
227
- print("\n" + "=" * 70)
228
- print(" TRAINING COMPLETE")
229
- print("=" * 70)
230
-
231
- return model
232
-
233
-
234
- # =============================================================================
235
- # MAIN
236
- # =============================================================================
237
-
238
- if __name__ == "__main__":
239
- parser = argparse.ArgumentParser(description='Train Circuit Interface')
240
- parser.add_argument('--circuit-path', type=str,
241
- default='./neural_computer.safetensors',
242
- help='Path to circuit weights')
243
- parser.add_argument('--device', type=str, default='cpu',
244
- help='Device (cpu or cuda)')
245
- parser.add_argument('--epochs', type=int, default=3,
246
- help='Number of epochs')
247
- parser.add_argument('--batch-size', type=int, default=8,
248
- help='Batch size')
249
- parser.add_argument('--lr', type=float, default=1e-4,
250
- help='Learning rate')
251
- parser.add_argument('--n-samples', type=int, default=5000,
252
- help='Number of training samples')
253
- args = parser.parse_args()
254
-
255
- print("=" * 70)
256
- print(" CIRCUIT-AUGMENTED LLM TRAINING")
257
- print("=" * 70)
258
-
259
- # Load model
260
- print("\n[1] Loading SmolLM2-360M...")
261
- model_id = "HuggingFaceTB/SmolLM2-360M"
262
- tokenizer = AutoTokenizer.from_pretrained(model_id)
263
- tokenizer.pad_token = tokenizer.eos_token
264
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
265
-
266
- # Baseline
267
- print("\n[2] Baseline evaluation...")
268
- baseline = evaluate_arithmetic(model, tokenizer, n_problems=50, device=args.device)
269
- print(f" Baseline accuracy: {baseline['accuracy']*100:.1f}%")
270
-
271
- # Augment
272
- print("\n[3] Augmenting with circuits...")
273
- model = augment_smollm2_with_circuits(
274
- model,
275
- args.circuit_path,
276
- device=args.device
277
- )
278
-
279
- # Train
280
- print("\n[4] Training interface layers...")
281
- model = train_interface(
282
- model,
283
- tokenizer,
284
- n_epochs=args.epochs,
285
- batch_size=args.batch_size,
286
- lr=args.lr,
287
- n_train_samples=args.n_samples,
288
- device=args.device
289
- )
290
-
291
- # Final evaluation
292
- print("\n[5] Final evaluation...")
293
- final = evaluate_arithmetic(model, tokenizer, n_problems=100, device=args.device)
294
- print(f" Final accuracy: {final['accuracy']*100:.1f}%")
295
- print(f" Improvement: {baseline['accuracy']*100:.1f}% -> {final['accuracy']*100:.1f}%")
296
-
297
- # Save
298
- save_path = './circuit_augmented_smollm2.pt'
299
- print(f"\n[6] Saving to {save_path}...")
300
- torch.save({
301
- 'model_state_dict': model.state_dict(),
302
- 'baseline_accuracy': baseline['accuracy'],
303
- 'final_accuracy': final['accuracy']
304
- }, save_path)
305
-
306
- print("\nDone!")
 
1
+ """
2
+ Train the circuit interface layers on arithmetic examples.
3
+ ============================================================
4
+
5
+ The threshold circuits are frozen - we only train:
6
+ - BitExtractor: embedding -> operand bits
7
+ - BitInjector: result bits -> embedding
8
+ - Router: when to use circuits vs MLP
9
+ """
10
+
11
+ import torch
12
+ import torch.nn as nn
13
+ from torch.utils.data import Dataset, DataLoader
14
+ from transformers import AutoModelForCausalLM, AutoTokenizer
15
+ from tqdm import tqdm
16
+ import argparse
17
+ import warnings
18
+ warnings.filterwarnings('ignore')
19
+
20
+ from circuit_llm import (
21
+ augment_smollm2_with_circuits,
22
+ evaluate_arithmetic,
23
+ CircuitExecutor
24
+ )
25
+
26
+
27
+ # =============================================================================
28
+ # ARITHMETIC DATASET
29
+ # =============================================================================
30
+
31
+ class ArithmeticDataset(Dataset):
32
+ """Dataset of 8-bit addition problems."""
33
+
34
+ def __init__(self, tokenizer, n_samples: int = 10000, max_val: int = 255):
35
+ self.tokenizer = tokenizer
36
+ self.n_samples = n_samples
37
+ self.max_val = max_val
38
+
39
+ # Pre-generate all examples
40
+ self.examples = []
41
+ for _ in range(n_samples):
42
+ a = torch.randint(0, max_val + 1, (1,)).item()
43
+ b = torch.randint(0, max_val + 1, (1,)).item()
44
+ result = (a + b) % 256
45
+
46
+ prompt = f"{a} + {b} ="
47
+ target = f" {result}"
48
+
49
+ self.examples.append((prompt, target, a, b, result))
50
+
51
+ def __len__(self):
52
+ return len(self.examples)
53
+
54
+ def __getitem__(self, idx):
55
+ prompt, target, a, b, result = self.examples[idx]
56
+
57
+ # Tokenize
58
+ prompt_ids = self.tokenizer.encode(prompt, add_special_tokens=False)
59
+ target_ids = self.tokenizer.encode(target, add_special_tokens=False)
60
+
61
+ input_ids = prompt_ids + target_ids
62
+ labels = [-100] * len(prompt_ids) + target_ids # Only predict target
63
+
64
+ return {
65
+ 'input_ids': torch.tensor(input_ids),
66
+ 'labels': torch.tensor(labels),
67
+ 'a': a,
68
+ 'b': b,
69
+ 'result': result
70
+ }
71
+
72
+
73
+ def collate_fn(batch):
74
+ """Collate with padding."""
75
+ max_len = max(len(item['input_ids']) for item in batch)
76
+
77
+ input_ids = []
78
+ labels = []
79
+ attention_mask = []
80
+
81
+ for item in batch:
82
+ pad_len = max_len - len(item['input_ids'])
83
+
84
+ input_ids.append(
85
+ torch.cat([item['input_ids'], torch.zeros(pad_len, dtype=torch.long)])
86
+ )
87
+ labels.append(
88
+ torch.cat([item['labels'], torch.full((pad_len,), -100, dtype=torch.long)])
89
+ )
90
+ attention_mask.append(
91
+ torch.cat([torch.ones(len(item['input_ids'])), torch.zeros(pad_len)])
92
+ )
93
+
94
+ return {
95
+ 'input_ids': torch.stack(input_ids),
96
+ 'labels': torch.stack(labels),
97
+ 'attention_mask': torch.stack(attention_mask),
98
+ }
99
+
100
+
101
+ # =============================================================================
102
+ # TRAINING LOOP
103
+ # =============================================================================
104
+
105
+ def train_interface(
106
+ model: AutoModelForCausalLM,
107
+ tokenizer: AutoTokenizer,
108
+ n_epochs: int = 3,
109
+ batch_size: int = 16,
110
+ lr: float = 1e-4,
111
+ n_train_samples: int = 10000,
112
+ device: str = 'cpu',
113
+ eval_every: int = 500
114
+ ):
115
+ """
116
+ Train the circuit interface layers.
117
+
118
+ Only trains:
119
+ - bit_extractor (embedding -> bits)
120
+ - bit_injector (bits -> embedding)
121
+ - router (circuit vs MLP weighting)
122
+ - op_selector (which operation)
123
+ """
124
+ print("\n" + "=" * 70)
125
+ print(" TRAINING CIRCUIT INTERFACE")
126
+ print("=" * 70)
127
+
128
+ # Freeze everything except interface layers
129
+ interface_params = []
130
+ frozen_count = 0
131
+ trainable_count = 0
132
+
133
+ for name, param in model.named_parameters():
134
+ if any(x in name for x in ['bit_extractor', 'bit_injector', 'router', 'op_selector']):
135
+ param.requires_grad = True
136
+ interface_params.append(param)
137
+ trainable_count += param.numel()
138
+ else:
139
+ param.requires_grad = False
140
+ frozen_count += param.numel()
141
+
142
+ print(f"\n Frozen parameters: {frozen_count:,}")
143
+ print(f" Trainable parameters: {trainable_count:,}")
144
+ print(f" Training {len(interface_params)} parameter groups")
145
+
146
+ # Create dataset
147
+ print(f"\n Creating dataset ({n_train_samples} examples)...")
148
+ dataset = ArithmeticDataset(tokenizer, n_samples=n_train_samples)
149
+ dataloader = DataLoader(
150
+ dataset,
151
+ batch_size=batch_size,
152
+ shuffle=True,
153
+ collate_fn=collate_fn
154
+ )
155
+
156
+ # Optimizer
157
+ optimizer = torch.optim.AdamW(interface_params, lr=lr)
158
+
159
+ # Training
160
+ model.to(device)
161
+ model.train()
162
+
163
+ global_step = 0
164
+ total_loss = 0
165
+
166
+ for epoch in range(n_epochs):
167
+ print(f"\n Epoch {epoch + 1}/{n_epochs}")
168
+ print(" " + "-" * 60)
169
+
170
+ epoch_loss = 0
171
+ epoch_steps = 0
172
+
173
+ pbar = tqdm(dataloader, desc=f" Training", leave=False)
174
+
175
+ for batch in pbar:
176
+ input_ids = batch['input_ids'].to(device)
177
+ labels = batch['labels'].to(device)
178
+ attention_mask = batch['attention_mask'].to(device)
179
+
180
+ # Forward
181
+ outputs = model(
182
+ input_ids=input_ids,
183
+ attention_mask=attention_mask,
184
+ labels=labels
185
+ )
186
+
187
+ loss = outputs.loss
188
+
189
+ # Backward
190
+ optimizer.zero_grad()
191
+ loss.backward()
192
+ optimizer.step()
193
+
194
+ # Logging
195
+ epoch_loss += loss.item()
196
+ epoch_steps += 1
197
+ global_step += 1
198
+ total_loss += loss.item()
199
+
200
+ pbar.set_postfix({'loss': f'{loss.item():.4f}'})
201
+
202
+ # Periodic evaluation
203
+ if global_step % eval_every == 0:
204
+ model.eval()
205
+ eval_results = evaluate_arithmetic(model, tokenizer, n_problems=50, device=device)
206
+ print(f"\n Step {global_step}: Loss={total_loss/eval_every:.4f}, "
207
+ f"Accuracy={eval_results['accuracy']*100:.1f}%")
208
+ total_loss = 0
209
+ model.train()
210
+
211
+ avg_loss = epoch_loss / epoch_steps
212
+ print(f"\n Epoch {epoch + 1} complete. Avg loss: {avg_loss:.4f}")
213
+
214
+ # End of epoch evaluation
215
+ model.eval()
216
+ eval_results = evaluate_arithmetic(model, tokenizer, n_problems=100, device=device)
217
+ print(f" Evaluation: {eval_results['accuracy']*100:.1f}% "
218
+ f"({eval_results['correct']}/{eval_results['total']})")
219
+
220
+ if eval_results['errors']:
221
+ print(f" Sample errors:")
222
+ for a, b, exp, got in eval_results['errors'][:3]:
223
+ print(f" {a} + {b} = {exp}, model said {got}")
224
+
225
+ model.train()
226
+
227
+ print("\n" + "=" * 70)
228
+ print(" TRAINING COMPLETE")
229
+ print("=" * 70)
230
+
231
+ return model
232
+
233
+
234
+ # =============================================================================
235
+ # MAIN
236
+ # =============================================================================
237
+
238
+ if __name__ == "__main__":
239
+ parser = argparse.ArgumentParser(description='Train Circuit Interface')
240
+ parser.add_argument('--circuit-path', type=str,
241
+ default='./neural_computer.safetensors',
242
+ help='Path to circuit weights')
243
+ parser.add_argument('--device', type=str, default='cpu',
244
+ help='Device (cpu or cuda)')
245
+ parser.add_argument('--epochs', type=int, default=3,
246
+ help='Number of epochs')
247
+ parser.add_argument('--batch-size', type=int, default=8,
248
+ help='Batch size')
249
+ parser.add_argument('--lr', type=float, default=1e-4,
250
+ help='Learning rate')
251
+ parser.add_argument('--n-samples', type=int, default=5000,
252
+ help='Number of training samples')
253
+ args = parser.parse_args()
254
+
255
+ print("=" * 70)
256
+ print(" CIRCUIT-AUGMENTED LLM TRAINING")
257
+ print("=" * 70)
258
+
259
+ # Load model
260
+ print("\n[1] Loading SmolLM2-360M...")
261
+ model_id = "HuggingFaceTB/SmolLM2-360M"
262
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
263
+ tokenizer.pad_token = tokenizer.eos_token
264
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
265
+
266
+ # Baseline
267
+ print("\n[2] Baseline evaluation...")
268
+ baseline = evaluate_arithmetic(model, tokenizer, n_problems=50, device=args.device)
269
+ print(f" Baseline accuracy: {baseline['accuracy']*100:.1f}%")
270
+
271
+ # Augment
272
+ print("\n[3] Augmenting with circuits...")
273
+ model = augment_smollm2_with_circuits(
274
+ model,
275
+ args.circuit_path,
276
+ device=args.device
277
+ )
278
+
279
+ # Train
280
+ print("\n[4] Training interface layers...")
281
+ model = train_interface(
282
+ model,
283
+ tokenizer,
284
+ n_epochs=args.epochs,
285
+ batch_size=args.batch_size,
286
+ lr=args.lr,
287
+ n_train_samples=args.n_samples,
288
+ device=args.device
289
+ )
290
+
291
+ # Final evaluation
292
+ print("\n[5] Final evaluation...")
293
+ final = evaluate_arithmetic(model, tokenizer, n_problems=100, device=args.device)
294
+ print(f" Final accuracy: {final['accuracy']*100:.1f}%")
295
+ print(f" Improvement: {baseline['accuracy']*100:.1f}% -> {final['accuracy']*100:.1f}%")
296
+
297
+ # Save
298
+ save_path = './circuit_augmented_smollm2.pt'
299
+ print(f"\n[6] Saving to {save_path}...")
300
+ torch.save({
301
+ 'model_state_dict': model.state_dict(),
302
+ 'baseline_accuracy': baseline['accuracy'],
303
+ 'final_accuracy': final['accuracy']
304
+ }, save_path)
305
+
306
+ print("\nDone!")
neural_computer.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51d4e2725c0d24bce807a5b7dc58319e9eed0f95c17fc39e662272ed0cbe8f1f
3
- size 351104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec37339654639ab0a16a32fe5324f6bc1ed02d457d0936383ea9993c19edc92a
3
+ size 358696
tensors.txt CHANGED
The diff for this file is too large to render. See raw diff
 
todo.md CHANGED
@@ -6,7 +6,7 @@
6
  |-------------------------|-----------------------------|--------------------------------|
7
  | SUB | Subtraction circuit | Missing - need NOT(B)+1+A path |
8
  | DIV | Division circuit | Missing |
9
- | NEG | Two's complement negate | Missing |
10
  | Program Counter | PC register + increment | Missing |
11
  | PC Load | Load PC from jump target | Missing |
12
  | Register File MUX | Select 1-of-4 GPRs | Missing |
 
6
  |-------------------------|-----------------------------|--------------------------------|
7
  | SUB | Subtraction circuit | Missing - need NOT(B)+1+A path |
8
  | DIV | Division circuit | Missing |
9
+ | NEG | Two's complement negate | DONE - 76 tensors, 256/256 tests pass |
10
  | Program Counter | PC register + increment | Missing |
11
  | PC Load | Load PC from jump target | Missing |
12
  | Register File MUX | Select 1-of-4 GPRs | Missing |