TokenBender commited on
Commit
f75ca2f
·
verified ·
1 Parent(s): 358a86b

Add issue 14 cross-model replication artifacts

Browse files
circuit-shotting/artifacts/issue14/cross_model_replication/qwen25_1p5b_2digit_cross_model_issue14.artifacts.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79eb27f50ff5d4cfabbf22608cf845735226a055926f4f5d51aee5caf2c544e6
3
+ size 284455854
circuit-shotting/artifacts/issue14/cross_model_replication/qwen25_1p5b_2digit_cross_model_issue14.artifacts.tar.gz.sha256 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b81d60cb375a20916aae55901bfe5ae91aaa8ac21a81c5b4af113ac0baa68a
3
+ size 130
circuit-shotting/artifacts/issue14/cross_model_replication/summary.json ADDED
@@ -0,0 +1,742 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_dir": "results/qwen25_1p5b_2digit_cross_model_issue14",
3
+ "model": "Qwen/Qwen2.5-1.5B",
4
+ "model_key": "qwen25_1p5b",
5
+ "base_accuracy": {
6
+ "n_tested": 8100,
7
+ "n_correct": 6670,
8
+ "accuracy": 0.8234567901234567
9
+ },
10
+ "variants": {
11
+ "rank32_kl": {
12
+ "variant": "rank32_kl",
13
+ "train_final_accuracy": 0.027777777777777776,
14
+ "merge_selection": {
15
+ "rule": "max_accuracy_no_scale_met_threshold",
16
+ "row": {
17
+ "scale": 0.3,
18
+ "kl": {
19
+ "kl_sum": 14387.0,
20
+ "kl_positions": 18616,
21
+ "kl_mean": 0.7728298238074774
22
+ },
23
+ "exact": {
24
+ "n": 8100,
25
+ "correct": 7762,
26
+ "accuracy": 0.9582716049382716,
27
+ "wrong_sample": [
28
+ {
29
+ "prompt": "10 + 70 =",
30
+ "answer": "80",
31
+ "pred": "800"
32
+ },
33
+ {
34
+ "prompt": "10 + 71 =",
35
+ "answer": "81",
36
+ "pred": "811"
37
+ },
38
+ {
39
+ "prompt": "10 + 72 =",
40
+ "answer": "82",
41
+ "pred": "822"
42
+ },
43
+ {
44
+ "prompt": "10 + 76 =",
45
+ "answer": "86",
46
+ "pred": "866"
47
+ },
48
+ {
49
+ "prompt": "10 + 78 =",
50
+ "answer": "88",
51
+ "pred": "888"
52
+ },
53
+ {
54
+ "prompt": "10 + 79 =",
55
+ "answer": "89",
56
+ "pred": "899"
57
+ },
58
+ {
59
+ "prompt": "10 + 81 =",
60
+ "answer": "91",
61
+ "pred": "911"
62
+ },
63
+ {
64
+ "prompt": "10 + 88 =",
65
+ "answer": "98",
66
+ "pred": "988"
67
+ },
68
+ {
69
+ "prompt": "10 + 98 =",
70
+ "answer": "108",
71
+ "pred": "1088"
72
+ },
73
+ {
74
+ "prompt": "10 + 99 =",
75
+ "answer": "109",
76
+ "pred": "1099"
77
+ },
78
+ {
79
+ "prompt": "11 + 10 =",
80
+ "answer": "21",
81
+ "pred": "11"
82
+ },
83
+ {
84
+ "prompt": "12 + 48 =",
85
+ "answer": "60",
86
+ "pred": "50"
87
+ },
88
+ {
89
+ "prompt": "12 + 68 =",
90
+ "answer": "80",
91
+ "pred": "70"
92
+ },
93
+ {
94
+ "prompt": "12 + 69 =",
95
+ "answer": "81",
96
+ "pred": "811"
97
+ },
98
+ {
99
+ "prompt": "12 + 71 =",
100
+ "answer": "83",
101
+ "pred": "833"
102
+ },
103
+ {
104
+ "prompt": "12 + 74 =",
105
+ "answer": "86",
106
+ "pred": "866"
107
+ },
108
+ {
109
+ "prompt": "12 + 75 =",
110
+ "answer": "87",
111
+ "pred": "877"
112
+ },
113
+ {
114
+ "prompt": "12 + 76 =",
115
+ "answer": "88",
116
+ "pred": "888"
117
+ },
118
+ {
119
+ "prompt": "12 + 77 =",
120
+ "answer": "89",
121
+ "pred": "899"
122
+ },
123
+ {
124
+ "prompt": "12 + 79 =",
125
+ "answer": "91",
126
+ "pred": "911"
127
+ },
128
+ {
129
+ "prompt": "12 + 96 =",
130
+ "answer": "108",
131
+ "pred": "1088"
132
+ },
133
+ {
134
+ "prompt": "12 + 99 =",
135
+ "answer": "111",
136
+ "pred": "1111"
137
+ },
138
+ {
139
+ "prompt": "13 + 74 =",
140
+ "answer": "87",
141
+ "pred": "877"
142
+ },
143
+ {
144
+ "prompt": "13 + 75 =",
145
+ "answer": "88",
146
+ "pred": "888"
147
+ },
148
+ {
149
+ "prompt": "13 + 76 =",
150
+ "answer": "89",
151
+ "pred": "899"
152
+ },
153
+ {
154
+ "prompt": "13 + 78 =",
155
+ "answer": "91",
156
+ "pred": "911"
157
+ },
158
+ {
159
+ "prompt": "13 + 96 =",
160
+ "answer": "109",
161
+ "pred": "1099"
162
+ },
163
+ {
164
+ "prompt": "13 + 98 =",
165
+ "answer": "111",
166
+ "pred": "1111"
167
+ },
168
+ {
169
+ "prompt": "14 + 66 =",
170
+ "answer": "80",
171
+ "pred": "800"
172
+ },
173
+ {
174
+ "prompt": "14 + 67 =",
175
+ "answer": "81",
176
+ "pred": "8111"
177
+ },
178
+ {
179
+ "prompt": "14 + 77 =",
180
+ "answer": "91",
181
+ "pred": "911"
182
+ },
183
+ {
184
+ "prompt": "14 + 96 =",
185
+ "answer": "110",
186
+ "pred": "1100"
187
+ },
188
+ {
189
+ "prompt": "14 + 97 =",
190
+ "answer": "111",
191
+ "pred": "111111"
192
+ },
193
+ {
194
+ "prompt": "15 + 66 =",
195
+ "answer": "81",
196
+ "pred": "811"
197
+ },
198
+ {
199
+ "prompt": "15 + 96 =",
200
+ "answer": "111",
201
+ "pred": "1111"
202
+ },
203
+ {
204
+ "prompt": "16 + 10 =",
205
+ "answer": "26",
206
+ "pred": "36"
207
+ },
208
+ {
209
+ "prompt": "16 + 12 =",
210
+ "answer": "28",
211
+ "pred": "38"
212
+ },
213
+ {
214
+ "prompt": "16 + 65 =",
215
+ "answer": "81",
216
+ "pred": "811"
217
+ },
218
+ {
219
+ "prompt": "16 + 71 =",
220
+ "answer": "87",
221
+ "pred": "877"
222
+ },
223
+ {
224
+ "prompt": "16 + 75 =",
225
+ "answer": "91",
226
+ "pred": "911"
227
+ },
228
+ {
229
+ "prompt": "16 + 95 =",
230
+ "answer": "111",
231
+ "pred": "111111"
232
+ },
233
+ {
234
+ "prompt": "16 + 98 =",
235
+ "answer": "114",
236
+ "pred": "1144"
237
+ },
238
+ {
239
+ "prompt": "17 + 10 =",
240
+ "answer": "27",
241
+ "pred": "37"
242
+ },
243
+ {
244
+ "prompt": "17 + 12 =",
245
+ "answer": "29",
246
+ "pred": "39"
247
+ },
248
+ {
249
+ "prompt": "17 + 72 =",
250
+ "answer": "89",
251
+ "pred": "899"
252
+ },
253
+ {
254
+ "prompt": "17 + 74 =",
255
+ "answer": "91",
256
+ "pred": "911"
257
+ },
258
+ {
259
+ "prompt": "17 + 82 =",
260
+ "answer": "99",
261
+ "pred": "109"
262
+ },
263
+ {
264
+ "prompt": "17 + 94 =",
265
+ "answer": "111",
266
+ "pred": "111111"
267
+ },
268
+ {
269
+ "prompt": "18 + 71 =",
270
+ "answer": "89",
271
+ "pred": "899"
272
+ },
273
+ {
274
+ "prompt": "18 + 96 =",
275
+ "answer": "114",
276
+ "pred": "1144"
277
+ }
278
+ ]
279
+ }
280
+ }
281
+ },
282
+ "merged_exact_accuracy": 0.9582716049382716,
283
+ "merged_kl_mean": 0.7728298238074774,
284
+ "correct_dataset": {
285
+ "n_tested": 8100,
286
+ "n_correct": 7726,
287
+ "accuracy": 0.9538271604938272
288
+ },
289
+ "rate_distortion": {
290
+ "topk_100": {
291
+ "accuracy": 0.05333333333333334,
292
+ "mlp_kept": 1561,
293
+ "mlp_channel_fraction": 0.006222098214285714,
294
+ "selected_weight_fraction": 0.004646697674418604
295
+ },
296
+ "rel_0.05": {
297
+ "accuracy": 0.06133333333333333,
298
+ "mlp_kept": 1439,
299
+ "mlp_channel_fraction": 0.005735809948979592,
300
+ "selected_weight_fraction": 0.00428353488372093
301
+ },
302
+ "topk_500": {
303
+ "accuracy": 0.082,
304
+ "mlp_kept": 19009,
305
+ "mlp_channel_fraction": 0.07576929209183673,
306
+ "selected_weight_fraction": 0.05658493023255814
307
+ },
308
+ "rel_0.01": {
309
+ "accuracy": 0.06266666666666666,
310
+ "mlp_kept": 20155,
311
+ "mlp_channel_fraction": 0.08033721301020408,
312
+ "selected_weight_fraction": 0.05999627906976744
313
+ },
314
+ "topk_2000": {
315
+ "accuracy": 0.20733333333333334,
316
+ "mlp_kept": 54674,
317
+ "mlp_channel_fraction": 0.21792889030612245,
318
+ "selected_weight_fraction": 0.16275051162790696
319
+ },
320
+ "rel_0.001": {
321
+ "accuracy": 0.9826666666666667,
322
+ "mlp_kept": 173462,
323
+ "mlp_channel_fraction": 0.6914142219387756,
324
+ "selected_weight_fraction": 0.516352
325
+ }
326
+ },
327
+ "target90_search": {
328
+ "best": {
329
+ "label": "baseline_rel_0.001_compress_9_accepted",
330
+ "accuracy": 0.9053333333333333,
331
+ "correct": 1358,
332
+ "n": 1500,
333
+ "mlp_kept": 114026,
334
+ "mlp_keep_fraction": 0.45450414540816325,
335
+ "heads_kept": 336,
336
+ "heads_keep_fraction": 1.0,
337
+ "by_source_position": {
338
+ "hundreds": {
339
+ "n": 500,
340
+ "correct": 458,
341
+ "accuracy": 0.916
342
+ },
343
+ "ones": {
344
+ "n": 500,
345
+ "correct": 454,
346
+ "accuracy": 0.908
347
+ },
348
+ "tens": {
349
+ "n": 500,
350
+ "correct": 446,
351
+ "accuracy": 0.892
352
+ }
353
+ }
354
+ },
355
+ "best_mask_npz": "results/qwen25_1p5b_2digit_cross_model_issue14/rank32_kl/target90_search.best_mask.full.npz"
356
+ },
357
+ "fresh_masks": [
358
+ {
359
+ "mask": "topk_500",
360
+ "mlp_kept": 19009,
361
+ "mlp_channel_fraction": 0.07576929209183673,
362
+ "selected_weight_fraction": 0.05658493023255814,
363
+ "teacher_forced_accuracy": 0.07533333333333334,
364
+ "generation_accuracy": 0.07866666666666666
365
+ },
366
+ {
367
+ "mask": "target90",
368
+ "mlp_kept": 114026,
369
+ "mlp_channel_fraction": 0.45450414540816325,
370
+ "selected_weight_fraction": 0.33942623255813953,
371
+ "teacher_forced_accuracy": 0.8926666666666667,
372
+ "generation_accuracy": 0.9006666666666666
373
+ }
374
+ ]
375
+ },
376
+ "rank32_nokl": {
377
+ "variant": "rank32_nokl",
378
+ "train_final_accuracy": 0.0,
379
+ "merge_selection": {
380
+ "rule": "max_accuracy_no_scale_met_threshold",
381
+ "row": {
382
+ "scale": 0.1,
383
+ "kl": {
384
+ "kl_sum": 13255.0,
385
+ "kl_positions": 18616,
386
+ "kl_mean": 0.7120219166308551
387
+ },
388
+ "exact": {
389
+ "n": 8100,
390
+ "correct": 7853,
391
+ "accuracy": 0.9695061728395061,
392
+ "wrong_sample": [
393
+ {
394
+ "prompt": "10 + 24 =",
395
+ "answer": "34",
396
+ "pred": "124"
397
+ },
398
+ {
399
+ "prompt": "10 + 25 =",
400
+ "answer": "35",
401
+ "pred": "155"
402
+ },
403
+ {
404
+ "prompt": "10 + 26 =",
405
+ "answer": "36",
406
+ "pred": "126"
407
+ },
408
+ {
409
+ "prompt": "10 + 28 =",
410
+ "answer": "38",
411
+ "pred": "128"
412
+ },
413
+ {
414
+ "prompt": "10 + 29 =",
415
+ "answer": "39",
416
+ "pred": "129"
417
+ },
418
+ {
419
+ "prompt": "10 + 30 =",
420
+ "answer": "40",
421
+ "pred": "130"
422
+ },
423
+ {
424
+ "prompt": "10 + 32 =",
425
+ "answer": "42",
426
+ "pred": "132"
427
+ },
428
+ {
429
+ "prompt": "10 + 33 =",
430
+ "answer": "43",
431
+ "pred": "133"
432
+ },
433
+ {
434
+ "prompt": "10 + 34 =",
435
+ "answer": "44",
436
+ "pred": "134"
437
+ },
438
+ {
439
+ "prompt": "10 + 35 =",
440
+ "answer": "45",
441
+ "pred": "135"
442
+ },
443
+ {
444
+ "prompt": "10 + 36 =",
445
+ "answer": "46",
446
+ "pred": "136"
447
+ },
448
+ {
449
+ "prompt": "10 + 37 =",
450
+ "answer": "47",
451
+ "pred": "137"
452
+ },
453
+ {
454
+ "prompt": "10 + 38 =",
455
+ "answer": "48",
456
+ "pred": "138"
457
+ },
458
+ {
459
+ "prompt": "10 + 39 =",
460
+ "answer": "49",
461
+ "pred": "139"
462
+ },
463
+ {
464
+ "prompt": "10 + 40 =",
465
+ "answer": "50",
466
+ "pred": "140"
467
+ },
468
+ {
469
+ "prompt": "10 + 41 =",
470
+ "answer": "51",
471
+ "pred": "141"
472
+ },
473
+ {
474
+ "prompt": "10 + 42 =",
475
+ "answer": "52",
476
+ "pred": "142"
477
+ },
478
+ {
479
+ "prompt": "10 + 43 =",
480
+ "answer": "53",
481
+ "pred": "143"
482
+ },
483
+ {
484
+ "prompt": "10 + 44 =",
485
+ "answer": "54",
486
+ "pred": "144"
487
+ },
488
+ {
489
+ "prompt": "10 + 45 =",
490
+ "answer": "55",
491
+ "pred": "145"
492
+ },
493
+ {
494
+ "prompt": "10 + 46 =",
495
+ "answer": "56",
496
+ "pred": "146"
497
+ },
498
+ {
499
+ "prompt": "10 + 47 =",
500
+ "answer": "57",
501
+ "pred": "147"
502
+ },
503
+ {
504
+ "prompt": "10 + 48 =",
505
+ "answer": "58",
506
+ "pred": "148"
507
+ },
508
+ {
509
+ "prompt": "10 + 49 =",
510
+ "answer": "59",
511
+ "pred": "149"
512
+ },
513
+ {
514
+ "prompt": "10 + 50 =",
515
+ "answer": "60",
516
+ "pred": "150"
517
+ },
518
+ {
519
+ "prompt": "10 + 51 =",
520
+ "answer": "61",
521
+ "pred": "151"
522
+ },
523
+ {
524
+ "prompt": "10 + 52 =",
525
+ "answer": "62",
526
+ "pred": "152"
527
+ },
528
+ {
529
+ "prompt": "10 + 53 =",
530
+ "answer": "63",
531
+ "pred": "153"
532
+ },
533
+ {
534
+ "prompt": "10 + 54 =",
535
+ "answer": "64",
536
+ "pred": "154"
537
+ },
538
+ {
539
+ "prompt": "10 + 55 =",
540
+ "answer": "65",
541
+ "pred": "155"
542
+ },
543
+ {
544
+ "prompt": "10 + 56 =",
545
+ "answer": "66",
546
+ "pred": "156"
547
+ },
548
+ {
549
+ "prompt": "10 + 57 =",
550
+ "answer": "67",
551
+ "pred": "157"
552
+ },
553
+ {
554
+ "prompt": "10 + 58 =",
555
+ "answer": "68",
556
+ "pred": "158"
557
+ },
558
+ {
559
+ "prompt": "10 + 59 =",
560
+ "answer": "69",
561
+ "pred": "159"
562
+ },
563
+ {
564
+ "prompt": "10 + 60 =",
565
+ "answer": "70",
566
+ "pred": "160"
567
+ },
568
+ {
569
+ "prompt": "10 + 61 =",
570
+ "answer": "71",
571
+ "pred": "161"
572
+ },
573
+ {
574
+ "prompt": "10 + 62 =",
575
+ "answer": "72",
576
+ "pred": "162"
577
+ },
578
+ {
579
+ "prompt": "10 + 63 =",
580
+ "answer": "73",
581
+ "pred": "163"
582
+ },
583
+ {
584
+ "prompt": "10 + 64 =",
585
+ "answer": "74",
586
+ "pred": "164"
587
+ },
588
+ {
589
+ "prompt": "10 + 65 =",
590
+ "answer": "75",
591
+ "pred": "165"
592
+ },
593
+ {
594
+ "prompt": "10 + 66 =",
595
+ "answer": "76",
596
+ "pred": "166"
597
+ },
598
+ {
599
+ "prompt": "10 + 67 =",
600
+ "answer": "77",
601
+ "pred": "167"
602
+ },
603
+ {
604
+ "prompt": "10 + 68 =",
605
+ "answer": "78",
606
+ "pred": "168"
607
+ },
608
+ {
609
+ "prompt": "10 + 69 =",
610
+ "answer": "79",
611
+ "pred": "169"
612
+ },
613
+ {
614
+ "prompt": "10 + 70 =",
615
+ "answer": "80",
616
+ "pred": "170"
617
+ },
618
+ {
619
+ "prompt": "10 + 71 =",
620
+ "answer": "81",
621
+ "pred": "171"
622
+ },
623
+ {
624
+ "prompt": "10 + 72 =",
625
+ "answer": "82",
626
+ "pred": "172"
627
+ },
628
+ {
629
+ "prompt": "10 + 73 =",
630
+ "answer": "83",
631
+ "pred": "173"
632
+ },
633
+ {
634
+ "prompt": "10 + 74 =",
635
+ "answer": "84",
636
+ "pred": "174"
637
+ },
638
+ {
639
+ "prompt": "10 + 75 =",
640
+ "answer": "85",
641
+ "pred": "175"
642
+ }
643
+ ]
644
+ }
645
+ }
646
+ },
647
+ "merged_exact_accuracy": 0.9695061728395061,
648
+ "merged_kl_mean": 0.7120219166308551,
649
+ "correct_dataset": {
650
+ "n_tested": 8100,
651
+ "n_correct": 7769,
652
+ "accuracy": 0.9591358024691358
653
+ },
654
+ "rate_distortion": {
655
+ "topk_100": {
656
+ "accuracy": 0.036,
657
+ "mlp_kept": 1802,
658
+ "mlp_channel_fraction": 0.007182716836734694,
659
+ "selected_weight_fraction": 0.005364093023255814
660
+ },
661
+ "rel_0.05": {
662
+ "accuracy": 0.03333333333333333,
663
+ "mlp_kept": 1771,
664
+ "mlp_channel_fraction": 0.007059151785714286,
665
+ "selected_weight_fraction": 0.005271813953488372
666
+ },
667
+ "topk_500": {
668
+ "accuracy": 0.020666666666666667,
669
+ "mlp_kept": 17605,
670
+ "mlp_channel_fraction": 0.07017299107142858,
671
+ "selected_weight_fraction": 0.05240558139534884
672
+ },
673
+ "rel_0.01": {
674
+ "accuracy": 0.018666666666666668,
675
+ "mlp_kept": 22425,
676
+ "mlp_channel_fraction": 0.08938536352040816,
677
+ "selected_weight_fraction": 0.06675348837209302
678
+ },
679
+ "topk_2000": {
680
+ "accuracy": 0.048,
681
+ "mlp_kept": 55862,
682
+ "mlp_channel_fraction": 0.2226642219387755,
683
+ "selected_weight_fraction": 0.16628688372093023
684
+ },
685
+ "rel_0.001": {
686
+ "accuracy": 0.9893333333333333,
687
+ "mlp_kept": 206263,
688
+ "mlp_channel_fraction": 0.8221580038265306,
689
+ "selected_weight_fraction": 0.6139921860465116
690
+ }
691
+ },
692
+ "target90_search": {
693
+ "best": {
694
+ "label": "baseline_rel_0.001_compress_6_accepted",
695
+ "accuracy": 0.9026666666666666,
696
+ "correct": 1354,
697
+ "n": 1500,
698
+ "mlp_kept": 157696,
699
+ "mlp_keep_fraction": 0.6285714285714286,
700
+ "heads_kept": 336,
701
+ "heads_keep_fraction": 1.0,
702
+ "by_source_position": {
703
+ "hundreds": {
704
+ "n": 500,
705
+ "correct": 452,
706
+ "accuracy": 0.904
707
+ },
708
+ "ones": {
709
+ "n": 500,
710
+ "correct": 482,
711
+ "accuracy": 0.964
712
+ },
713
+ "tens": {
714
+ "n": 500,
715
+ "correct": 420,
716
+ "accuracy": 0.84
717
+ }
718
+ }
719
+ },
720
+ "best_mask_npz": "results/qwen25_1p5b_2digit_cross_model_issue14/rank32_nokl/target90_search.best_mask.full.npz"
721
+ },
722
+ "fresh_masks": [
723
+ {
724
+ "mask": "topk_500",
725
+ "mlp_kept": 17605,
726
+ "mlp_channel_fraction": 0.07017299107142858,
727
+ "selected_weight_fraction": 0.05240558139534884,
728
+ "teacher_forced_accuracy": 0.017333333333333333,
729
+ "generation_accuracy": 0.019333333333333334
730
+ },
731
+ {
732
+ "mask": "target90",
733
+ "mlp_kept": 157696,
734
+ "mlp_channel_fraction": 0.6285714285714286,
735
+ "selected_weight_fraction": 0.4694206511627907,
736
+ "teacher_forced_accuracy": 0.888,
737
+ "generation_accuracy": 0.8966666666666666
738
+ }
739
+ ]
740
+ }
741
+ }
742
+ }
circuit-shotting/artifacts/issue14/cross_model_replication/summary.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Issue 14 Cross-Model Replication Summary
2
+
3
+ Model: `Qwen/Qwen2.5-1.5B`
4
+
5
+ Base 2-digit exact: `82.35%` (`6,670/8,100`).
6
+
7
+ ## Fresh Recovery
8
+
9
+ | variant | mask | MLPs | MLP-channel % | selected-weight % | teacher-forced | generation |
10
+ |---|---|---:|---:|---:|---:|---:|
11
+ | `rank32_kl` | `topk_500` | 19,009 | 7.58% | 5.66% | 7.53% | 7.87% |
12
+ | `rank32_kl` | `target90` | 114,026 | 45.45% | 33.94% | 89.27% | 90.07% |
13
+ | `rank32_nokl` | `topk_500` | 17,605 | 7.02% | 5.24% | 1.73% | 1.93% |
14
+ | `rank32_nokl` | `target90` | 157,696 | 62.86% | 46.94% | 88.80% | 89.67% |
15
+
16
+ ## Merge Selection
17
+
18
+ | variant | merged exact | selected KL | rule |
19
+ |---|---:|---:|---|
20
+ | `rank32_kl` | 95.83% | 0.7728298238074774 | `max_accuracy_no_scale_met_threshold` |
21
+ | `rank32_nokl` | 96.95% | 0.7120219166308551 | `max_accuracy_no_scale_met_threshold` |