phanerozoic commited on
Commit
6f0857b
·
verified ·
1 Parent(s): 0a7c400

Delete test_perturbation.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. test_perturbation.py +0 -480
test_perturbation.py DELETED
@@ -1,480 +0,0 @@
1
- """
2
- TEST #4: Adversarial Weight Perturbation
3
- =========================================
4
- Flip one weight in one gate. Prove exactly which tests fail and why.
5
- Show failure is localized and predictable, not catastrophic.
6
-
7
- A skeptic would demand: "Prove your system fails gracefully. Show me that
8
- perturbing one weight breaks only what it should break."
9
- """
10
-
11
- import torch
12
- from safetensors.torch import load_file
13
- import copy
14
-
15
- # Load circuits
16
- original_model = load_file('neural_computer.safetensors')
17
-
18
- def heaviside(x):
19
- return (x >= 0).float()
20
-
21
- def eval_gate(model, prefix, a, b):
22
- """Evaluate a 2-input single-layer gate."""
23
- inp = torch.tensor([float(a), float(b)])
24
- w = model[f'{prefix}.weight']
25
- bias = model[f'{prefix}.bias']
26
- return int(heaviside(inp @ w + bias).item())
27
-
28
- def eval_xor(model, a, b):
29
- """Evaluate XOR gate (2-layer)."""
30
- inp = torch.tensor([float(a), float(b)])
31
- w1_n1 = model['boolean.xor.layer1.neuron1.weight']
32
- b1_n1 = model['boolean.xor.layer1.neuron1.bias']
33
- w1_n2 = model['boolean.xor.layer1.neuron2.weight']
34
- b1_n2 = model['boolean.xor.layer1.neuron2.bias']
35
- w2 = model['boolean.xor.layer2.weight']
36
- b2 = model['boolean.xor.layer2.bias']
37
- h1 = heaviside(inp @ w1_n1 + b1_n1)
38
- h2 = heaviside(inp @ w1_n2 + b1_n2)
39
- hidden = torch.tensor([h1.item(), h2.item()])
40
- return int(heaviside(hidden @ w2 + b2).item())
41
-
42
- def eval_full_adder(model, a, b, cin, prefix):
43
- """Evaluate full adder."""
44
- def eval_xor_arith(inp, xor_prefix):
45
- w1_or = model[f'{xor_prefix}.layer1.or.weight']
46
- b1_or = model[f'{xor_prefix}.layer1.or.bias']
47
- w1_nand = model[f'{xor_prefix}.layer1.nand.weight']
48
- b1_nand = model[f'{xor_prefix}.layer1.nand.bias']
49
- w2 = model[f'{xor_prefix}.layer2.weight']
50
- b2 = model[f'{xor_prefix}.layer2.bias']
51
- h_or = heaviside(inp @ w1_or + b1_or)
52
- h_nand = heaviside(inp @ w1_nand + b1_nand)
53
- hidden = torch.tensor([h_or.item(), h_nand.item()])
54
- return heaviside(hidden @ w2 + b2).item()
55
-
56
- inp_ab = torch.tensor([a, b], dtype=torch.float32)
57
- ha1_sum = eval_xor_arith(inp_ab, f'{prefix}.ha1.sum')
58
- w_c1 = model[f'{prefix}.ha1.carry.weight']
59
- b_c1 = model[f'{prefix}.ha1.carry.bias']
60
- ha1_carry = heaviside(inp_ab @ w_c1 + b_c1).item()
61
- inp_ha2 = torch.tensor([ha1_sum, cin], dtype=torch.float32)
62
- ha2_sum = eval_xor_arith(inp_ha2, f'{prefix}.ha2.sum')
63
- w_c2 = model[f'{prefix}.ha2.carry.weight']
64
- b_c2 = model[f'{prefix}.ha2.carry.bias']
65
- ha2_carry = heaviside(inp_ha2 @ w_c2 + b_c2).item()
66
- inp_cout = torch.tensor([ha1_carry, ha2_carry], dtype=torch.float32)
67
- w_or = model[f'{prefix}.carry_or.weight']
68
- b_or = model[f'{prefix}.carry_or.bias']
69
- cout = heaviside(inp_cout @ w_or + b_or).item()
70
- return int(ha2_sum), int(cout)
71
-
72
- def add_8bit(model, a, b):
73
- """8-bit addition."""
74
- carry = 0.0
75
- result_bits = []
76
- for i in range(8):
77
- a_bit = (a >> i) & 1
78
- b_bit = (b >> i) & 1
79
- s, carry = eval_full_adder(model, float(a_bit), float(b_bit), carry,
80
- f'arithmetic.ripplecarry8bit.fa{i}')
81
- result_bits.append(s)
82
- result = sum(result_bits[i] * (2**i) for i in range(8))
83
- return result, int(carry)
84
-
85
- def test_boolean_gates(model):
86
- """Test all basic Boolean gates, return (passed, failed, details)."""
87
- failures = []
88
-
89
- # AND
90
- expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
91
- for (a,b), exp in expected_and.items():
92
- got = eval_gate(model, 'boolean.and', a, b)
93
- if got != exp:
94
- failures.append(('AND', a, b, exp, got))
95
-
96
- # OR
97
- expected_or = {(0,0):0, (0,1):1, (1,0):1, (1,1):1}
98
- for (a,b), exp in expected_or.items():
99
- got = eval_gate(model, 'boolean.or', a, b)
100
- if got != exp:
101
- failures.append(('OR', a, b, exp, got))
102
-
103
- # NAND
104
- expected_nand = {(0,0):1, (0,1):1, (1,0):1, (1,1):0}
105
- for (a,b), exp in expected_nand.items():
106
- got = eval_gate(model, 'boolean.nand', a, b)
107
- if got != exp:
108
- failures.append(('NAND', a, b, exp, got))
109
-
110
- # NOR
111
- expected_nor = {(0,0):1, (0,1):0, (1,0):0, (1,1):0}
112
- for (a,b), exp in expected_nor.items():
113
- got = eval_gate(model, 'boolean.nor', a, b)
114
- if got != exp:
115
- failures.append(('NOR', a, b, exp, got))
116
-
117
- # XOR
118
- expected_xor = {(0,0):0, (0,1):1, (1,0):1, (1,1):0}
119
- for (a,b), exp in expected_xor.items():
120
- got = eval_xor(model, a, b)
121
- if got != exp:
122
- failures.append(('XOR', a, b, exp, got))
123
-
124
- total = 20 # 4 gates * 4 cases + XOR 4 cases
125
- passed = total - len(failures)
126
- return passed, len(failures), failures
127
-
128
- def test_addition_sample(model, n=100):
129
- """Test a sample of additions."""
130
- failures = []
131
- for a in range(0, 256, 256//10):
132
- for b in range(0, 256, 256//10):
133
- result, _ = add_8bit(model, a, b)
134
- expected = (a + b) % 256
135
- if result != expected:
136
- failures.append((a, b, expected, result))
137
-
138
- return 100 - len(failures), len(failures), failures
139
-
140
- def perturb_weight(model, tensor_name, index, delta):
141
- """Create a perturbed copy of the model."""
142
- perturbed = {k: v.clone() for k, v in model.items()}
143
-
144
- flat = perturbed[tensor_name].flatten()
145
- old_val = flat[index].item()
146
- flat[index] = old_val + delta
147
- perturbed[tensor_name] = flat.view(model[tensor_name].shape)
148
-
149
- return perturbed, old_val, old_val + delta
150
-
151
- # =============================================================================
152
- # PERTURBATION EXPERIMENTS
153
- # =============================================================================
154
-
155
- def experiment_perturb_and_gate():
156
- """
157
- Perturb the AND gate's first weight from 1 to 0.
158
- Expected: AND becomes a threshold-1 gate (fires if b=1).
159
- """
160
- print("\n[EXPERIMENT 1] Perturb AND gate: w[0] = 1 -> 0")
161
- print("-" * 60)
162
-
163
- perturbed, old, new = perturb_weight(original_model, 'boolean.and.weight', 0, -1)
164
-
165
- print(f" Original: w={original_model['boolean.and.weight'].tolist()}, b={original_model['boolean.and.bias'].item()}")
166
- print(f" Perturbed: w={perturbed['boolean.and.weight'].tolist()}, b={perturbed['boolean.and.bias'].item()}")
167
- print()
168
-
169
- # Test AND gate directly
170
- print(" AND gate truth table after perturbation:")
171
- print(" Input Expected Got")
172
- failures = []
173
- expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
174
- for (a,b), exp in expected_and.items():
175
- got = eval_gate(perturbed, 'boolean.and', a, b)
176
- status = "OK" if got == exp else "FAIL"
177
- print(f" ({a},{b}) {exp} {got} [{status}]")
178
- if got != exp:
179
- failures.append((a, b, exp, got))
180
-
181
- print()
182
- print(f" Analysis: With w=[0,1], b=-2, gate fires when 0*a + 1*b >= 2")
183
- print(f" This is NEVER true (max sum = 1), so output is always 0")
184
- print(f" AND(1,1) now incorrectly returns 0")
185
- print()
186
-
187
- # Check cascade effect on adders
188
- print(" Cascade effect on arithmetic (AND is used in carry logic):")
189
- _, add_fails, add_details = test_addition_sample(perturbed)
190
- print(f" Addition failures: {add_fails}/100 sampled")
191
-
192
- if add_fails > 0:
193
- print(f" Sample failures: {add_details[:3]}")
194
-
195
- return len(failures), failures
196
-
197
- def experiment_perturb_or_gate():
198
- """
199
- Perturb the OR gate's bias from -1 to -2.
200
- Expected: OR becomes AND (needs both inputs).
201
- """
202
- print("\n[EXPERIMENT 2] Perturb OR gate: bias = -1 -> -2")
203
- print("-" * 60)
204
-
205
- perturbed = {k: v.clone() for k, v in original_model.items()}
206
- perturbed['boolean.or.bias'] = torch.tensor([-2.0])
207
-
208
- print(f" Original: w={original_model['boolean.or.weight'].tolist()}, b={original_model['boolean.or.bias'].item()}")
209
- print(f" Perturbed: w={perturbed['boolean.or.weight'].tolist()}, b={perturbed['boolean.or.bias'].item()}")
210
- print()
211
-
212
- print(" OR gate truth table after perturbation:")
213
- print(" Input Expected Got")
214
- failures = []
215
- expected_or = {(0,0):0, (0,1):1, (1,0):1, (1,1):1}
216
- for (a,b), exp in expected_or.items():
217
- got = eval_gate(perturbed, 'boolean.or', a, b)
218
- status = "OK" if got == exp else "FAIL"
219
- print(f" ({a},{b}) {exp} {got} [{status}]")
220
- if got != exp:
221
- failures.append((a, b, exp, got))
222
-
223
- print()
224
- print(f" Analysis: With w=[1,1], b=-2, gate fires when a + b >= 2")
225
- print(f" This is AND, not OR. OR(0,1) and OR(1,0) now return 0")
226
- print()
227
-
228
- return len(failures), failures
229
-
230
- def experiment_perturb_xor_hidden():
231
- """
232
- Perturb XOR's first hidden neuron (OR) to become AND.
233
- Expected: XOR becomes something else entirely.
234
- """
235
- print("\n[EXPERIMENT 3] Perturb XOR's hidden OR neuron: bias -1 -> -2")
236
- print("-" * 60)
237
-
238
- perturbed = {k: v.clone() for k, v in original_model.items()}
239
- perturbed['boolean.xor.layer1.neuron1.bias'] = torch.tensor([-2.0])
240
-
241
- print(f" Original XOR hidden1 (OR): w={original_model['boolean.xor.layer1.neuron1.weight'].tolist()}, b={original_model['boolean.xor.layer1.neuron1.bias'].item()}")
242
- print(f" Perturbed: bias = -2 (now behaves as AND)")
243
- print()
244
-
245
- print(" XOR truth table after perturbation:")
246
- print(" Input Expected Got")
247
- failures = []
248
- expected_xor = {(0,0):0, (0,1):1, (1,0):1, (1,1):0}
249
- for (a,b), exp in expected_xor.items():
250
- got = eval_xor(perturbed, a, b)
251
- status = "OK" if got == exp else "FAIL"
252
- print(f" ({a},{b}) {exp} {got} [{status}]")
253
- if got != exp:
254
- failures.append((a, b, exp, got))
255
-
256
- print()
257
- print(f" Analysis: XOR = AND(OR(a,b), NAND(a,b))")
258
- print(f" With OR->AND: XOR = AND(AND(a,b), NAND(a,b))")
259
- print(f" AND(a,b)=1 only when a=b=1, but NAND(1,1)=0")
260
- print(f" So AND(AND, NAND) = 0 for all inputs -> constant 0")
261
- print()
262
-
263
- return len(failures), failures
264
-
265
- def experiment_perturb_fa0_carry():
266
- """
267
- Perturb the first full adder's carry_or gate.
268
- Expected: Carry propagation breaks at bit 0.
269
- """
270
- print("\n[EXPERIMENT 4] Perturb FA0 carry_or: bias 0 -> -2 (OR -> AND)")
271
- print("-" * 60)
272
-
273
- perturbed = {k: v.clone() for k, v in original_model.items()}
274
- # Change carry_or from OR (b=-1) to AND (b=-2)
275
- perturbed['arithmetic.ripplecarry8bit.fa0.carry_or.bias'] = torch.tensor([-2.0])
276
-
277
- print(f" Perturbation: FA0.carry_or bias changed from -1 to -2")
278
- print(f" Effect: OR gate becomes AND gate in carry chain")
279
- print()
280
-
281
- # Test specific carry-critical cases
282
- test_cases = [
283
- (1, 1, 2), # 1+1=2, needs carry from bit 0
284
- (3, 1, 4), # 11+01=100, needs carry
285
- (127, 1, 128), # Carry through multiple bits
286
- (255, 1, 0), # Full carry chain
287
- (128, 128, 0), # High bit carry
288
- ]
289
-
290
- print(" Critical carry test cases:")
291
- failures = []
292
- for a, b, expected in test_cases:
293
- result, _ = add_8bit(perturbed, a, b)
294
- status = "OK" if result == expected else "FAIL"
295
- print(f" {a:3d} + {b:3d} = {result:3d} (expected {expected:3d}) [{status}]")
296
- if result != expected:
297
- failures.append((a, b, expected, result))
298
-
299
- print()
300
- print(f" Analysis: FA0.carry_or computes c_out = ha1_carry OR ha2_carry")
301
- print(f" With OR->AND, carry only propagates when BOTH internal carries fire")
302
- print(f" This breaks 1+1 (ha1_carry=1, ha2_carry=0 -> AND gives 0)")
303
- print()
304
-
305
- return len(failures), failures
306
-
307
- def experiment_sign_flip():
308
- """
309
- Flip the sign of a weight.
310
- Expected: Gate inverts its response to that input.
311
- """
312
- print("\n[EXPERIMENT 5] Sign flip: AND w[0] = 1 -> -1")
313
- print("-" * 60)
314
-
315
- perturbed, old, new = perturb_weight(original_model, 'boolean.and.weight', 0, -2)
316
-
317
- print(f" Original: w={original_model['boolean.and.weight'].tolist()}, b={original_model['boolean.and.bias'].item()}")
318
- print(f" Perturbed: w={perturbed['boolean.and.weight'].tolist()}, b={perturbed['boolean.and.bias'].item()}")
319
- print()
320
-
321
- print(" AND gate truth table after sign flip:")
322
- print(" Input Expected Got Analysis")
323
- failures = []
324
- expected_and = {(0,0):0, (0,1):0, (1,0):0, (1,1):1}
325
- for (a,b), exp in expected_and.items():
326
- got = eval_gate(perturbed, 'boolean.and', a, b)
327
- weighted_sum = -1*a + 1*b - 2
328
- status = "OK" if got == exp else "FAIL"
329
- print(f" ({a},{b}) {exp} {got} sum = -1*{a} + 1*{b} - 2 = {weighted_sum} [{status}]")
330
- if got != exp:
331
- failures.append((a, b, exp, got))
332
-
333
- print()
334
- print(f" Analysis: With w=[-1,1], b=-2, fires when -a + b >= 2")
335
- print(f" Max value is -0 + 1 - 2 = -1, never >= 0")
336
- print(f" Gate becomes constant 0")
337
- print()
338
-
339
- return len(failures), failures
340
-
341
- def experiment_localization():
342
- """
343
- Perturb one gate, verify other gates are unaffected.
344
- """
345
- print("\n[EXPERIMENT 6] Failure Localization Test")
346
- print("-" * 60)
347
-
348
- # Perturb AND gate
349
- perturbed = {k: v.clone() for k, v in original_model.items()}
350
- perturbed['boolean.and.weight'] = torch.tensor([0.0, 1.0])
351
-
352
- print(" Perturbation: AND gate w=[1,1] -> [0,1]")
353
- print()
354
-
355
- # Test each gate type
356
- gates_status = {}
357
-
358
- # AND (perturbed)
359
- failures = []
360
- for a in [0,1]:
361
- for b in [0,1]:
362
- got = eval_gate(perturbed, 'boolean.and', a, b)
363
- exp = a & b
364
- if got != exp:
365
- failures.append((a,b))
366
- gates_status['AND'] = 'BROKEN' if failures else 'OK'
367
-
368
- # OR (should be unaffected)
369
- failures = []
370
- for a in [0,1]:
371
- for b in [0,1]:
372
- got = eval_gate(perturbed, 'boolean.or', a, b)
373
- exp = a | b
374
- if got != exp:
375
- failures.append((a,b))
376
- gates_status['OR'] = 'BROKEN' if failures else 'OK'
377
-
378
- # NAND (should be unaffected)
379
- failures = []
380
- for a in [0,1]:
381
- for b in [0,1]:
382
- got = eval_gate(perturbed, 'boolean.nand', a, b)
383
- exp = 1 - (a & b)
384
- if got != exp:
385
- failures.append((a,b))
386
- gates_status['NAND'] = 'BROKEN' if failures else 'OK'
387
-
388
- # NOR (should be unaffected)
389
- failures = []
390
- for a in [0,1]:
391
- for b in [0,1]:
392
- got = eval_gate(perturbed, 'boolean.nor', a, b)
393
- exp = 1 - (a | b)
394
- if got != exp:
395
- failures.append((a,b))
396
- gates_status['NOR'] = 'BROKEN' if failures else 'OK'
397
-
398
- # XOR (should be unaffected - uses its own internal gates)
399
- failures = []
400
- for a in [0,1]:
401
- for b in [0,1]:
402
- got = eval_xor(perturbed, a, b)
403
- exp = a ^ b
404
- if got != exp:
405
- failures.append((a,b))
406
- gates_status['XOR'] = 'BROKEN' if failures else 'OK'
407
-
408
- print(" Gate status after AND perturbation:")
409
- for gate, status in gates_status.items():
410
- indicator = "X" if status == 'BROKEN' else " "
411
- print(f" [{indicator}] {gate:6s} {status}")
412
-
413
- print()
414
- broken_count = sum(1 for s in gates_status.values() if s == 'BROKEN')
415
- print(f" Result: {broken_count}/5 gates affected")
416
- print(f" Localization: {'PASSED' if broken_count == 1 else 'FAILED'} - only perturbed gate broke")
417
-
418
- return broken_count == 1
419
-
420
- # =============================================================================
421
- # MAIN
422
- # =============================================================================
423
-
424
- if __name__ == "__main__":
425
- print("=" * 70)
426
- print(" TEST #4: ADVERSARIAL WEIGHT PERTURBATION")
427
- print(" Single-weight changes, localized and predictable failures")
428
- print("=" * 70)
429
-
430
- # First verify original model works
431
- print("\n[BASELINE] Verifying original model...")
432
- bool_passed, bool_failed, _ = test_boolean_gates(original_model)
433
- add_passed, add_failed, _ = test_addition_sample(original_model)
434
- print(f" Boolean gates: {bool_passed}/{bool_passed + bool_failed} passed")
435
- print(f" Addition sample: {add_passed}/{add_passed + add_failed} passed")
436
-
437
- if bool_failed > 0 or add_failed > 0:
438
- print(" ERROR: Original model has failures!")
439
- exit(1)
440
- print(" Original model verified OK")
441
-
442
- # Run experiments
443
- results = []
444
-
445
- n, _ = experiment_perturb_and_gate()
446
- results.append(("AND w[0]: 1->0", n > 0, "Breaks AND(1,1)"))
447
-
448
- n, _ = experiment_perturb_or_gate()
449
- results.append(("OR bias: -1->-2", n > 0, "OR becomes AND"))
450
-
451
- n, _ = experiment_perturb_xor_hidden()
452
- results.append(("XOR hidden OR->AND", n > 0, "XOR becomes const 0"))
453
-
454
- n, _ = experiment_perturb_fa0_carry()
455
- results.append(("FA0 carry_or OR->AND", n > 0, "Carry chain breaks"))
456
-
457
- n, _ = experiment_sign_flip()
458
- results.append(("AND w[0] sign flip", n > 0, "AND becomes const 0"))
459
-
460
- localized = experiment_localization()
461
- results.append(("Failure localization", localized, "Only target gate breaks"))
462
-
463
- print("\n" + "=" * 70)
464
- print(" SUMMARY")
465
- print("=" * 70)
466
-
467
- all_passed = True
468
- for name, passed, desc in results:
469
- status = "PASS" if passed else "FAIL"
470
- if not passed:
471
- all_passed = False
472
- print(f" {name:25s} [{status}] - {desc}")
473
-
474
- print()
475
- if all_passed:
476
- print(" STATUS: ALL PERTURBATIONS CAUSED PREDICTABLE, LOCALIZED FAILURES")
477
- else:
478
- print(" STATUS: SOME PERTURBATIONS DID NOT BEHAVE AS EXPECTED")
479
-
480
- print("=" * 70)