amirali1985 commited on
Commit
a94a95e
·
verified ·
1 Parent(s): 0757c15

Delete folder add_sub_sorl_v1_abs10_K1_10K with huggingface_hub

Browse files
add_sub_sorl_v1_abs10_K1_10K/config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "architectures": [
3
- "SorlModelWrapper"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": null,
8
- "dtype": "float32",
9
- "eos_token_id": null,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 510,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 2040,
15
- "layer_types": [
16
- "full_attention",
17
- "full_attention"
18
- ],
19
- "max_position_embeddings": 128,
20
- "max_window_layers": 28,
21
- "model_type": "qwen3",
22
- "num_attention_heads": 3,
23
- "num_hidden_layers": 2,
24
- "num_key_value_heads": 3,
25
- "pad_token_id": null,
26
- "rms_norm_eps": 1e-06,
27
- "rope_parameters": {
28
- "rope_theta": 10000.0,
29
- "rope_type": "default"
30
- },
31
- "sliding_window": null,
32
- "tie_word_embeddings": false,
33
- "transformers_version": "5.5.0",
34
- "use_cache": true,
35
- "use_sliding_window": false,
36
- "vocab_size": 151654
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
add_sub_sorl_v1_abs10_K1_10K/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "output_attentions": false,
4
- "output_hidden_states": false,
5
- "transformers_version": "5.5.0",
6
- "use_cache": true
7
- }
 
 
 
 
 
 
 
 
add_sub_sorl_v1_abs10_K1_10K/metrics.json DELETED
@@ -1,1297 +0,0 @@
1
- {
2
- "history": {
3
- "step": [
4
- 50,
5
- 100,
6
- 150,
7
- 207,
8
- 257,
9
- 307,
10
- 364,
11
- 414,
12
- 464,
13
- 521,
14
- 571,
15
- 621,
16
- 678,
17
- 728,
18
- 778,
19
- 835,
20
- 885,
21
- 935,
22
- 992,
23
- 1042,
24
- 1092,
25
- 1149,
26
- 1199,
27
- 1249,
28
- 1306,
29
- 1356,
30
- 1406,
31
- 1463,
32
- 1513,
33
- 1563
34
- ],
35
- "loss": [
36
- 8.773706436157227,
37
- 4.004265308380127,
38
- 3.2570762634277344,
39
- 3.041517734527588,
40
- 2.8982486724853516,
41
- 2.4261059761047363,
42
- -1.0320148468017578,
43
- -6.537303924560547,
44
- -7.572015285491943,
45
- -8.820948600769043,
46
- -9.133442878723145,
47
- -9.698243141174316,
48
- -10.157323837280273,
49
- -9.581876754760742,
50
- -10.299582481384277,
51
- -8.835819244384766,
52
- -7.875391006469727,
53
- -6.175548553466797,
54
- -4.922896385192871,
55
- -5.274993896484375,
56
- -3.509742021560669,
57
- -2.9279913902282715,
58
- -2.248248815536499,
59
- -1.4432270526885986,
60
- -0.9849211573600769,
61
- -0.7613394260406494,
62
- -0.9093202948570251,
63
- -0.20168478786945343,
64
- -0.5009633302688599,
65
- -0.4359404742717743
66
- ],
67
- "base_loss": [
68
- 6.400631904602051,
69
- 2.4112229347229004,
70
- 1.8464170694351196,
71
- 1.8753893375396729,
72
- 1.8903639316558838,
73
- 1.8746846914291382,
74
- 1.8820133209228516,
75
- 1.855467438697815,
76
- 1.7718533277511597,
77
- 1.7219583988189697,
78
- 1.7517011165618896,
79
- 1.6724598407745361,
80
- 1.647750973701477,
81
- 1.4754587411880493,
82
- 1.4356294870376587,
83
- 1.2360666990280151,
84
- 1.1430914402008057,
85
- 0.8601657748222351,
86
- 0.6953770518302917,
87
- 0.6893697381019592,
88
- 0.4810207784175873,
89
- 0.39060816168785095,
90
- 0.3150084316730499,
91
- 0.2136605978012085,
92
- 0.163777157664299,
93
- 0.12215958535671234,
94
- 0.13819025456905365,
95
- 0.08133143931627274,
96
- 0.08307908475399017,
97
- 0.07490883022546768
98
- ],
99
- "info_loss": [
100
- -0.21113967895507812,
101
- -0.04423642158508301,
102
- -0.04978370666503906,
103
- -0.0719677209854126,
104
- -0.087005615234375,
105
- -0.13271820545196533,
106
- -0.4793226718902588,
107
- -1.025976300239563,
108
- -1.1140811443328857,
109
- -1.2247107028961182,
110
- -1.256365180015564,
111
- -1.3006340265274048,
112
- -1.3388965129852295,
113
- -1.2538009881973267,
114
- -1.3090555667877197,
115
- -1.1411150693893433,
116
- -1.0133163928985596,
117
- -0.8001939654350281,
118
- -0.6398270130157471,
119
- -0.6569589376449585,
120
- -0.44588613510131836,
121
- -0.368633508682251,
122
- -0.2934108078479767,
123
- -0.1994001418352127,
124
- -0.14748595654964447,
125
- -0.11586211621761322,
126
- -0.12713156640529633,
127
- -0.04881247505545616,
128
- -0.07891618460416794,
129
- -0.06897100806236267
130
- ],
131
- "abs_loss": [
132
- 2.0780529975891113,
133
- 1.8440923690795898,
134
- 1.8333244323730469,
135
- 1.8426448106765747,
136
- 1.8403841257095337,
137
- 1.8378067016601562,
138
- 1.8460584878921509,
139
- 1.7702795267105103,
140
- 1.5657355785369873,
141
- 1.3797943592071533,
142
- 1.3185033798217773,
143
- 1.2104538679122925,
144
- 1.125502586364746,
145
- 0.9860450029373169,
146
- 0.8728498220443726,
147
- 0.7434267997741699,
148
- 0.7268884778022766,
149
- 0.5966061949729919,
150
- 0.5761818885803223,
151
- 0.4872754216194153,
152
- 0.42560893297195435,
153
- 0.3502724766731262,
154
- 0.29791387915611267,
155
- 0.267427533864975,
156
- 0.24475598335266113,
157
- 0.22809018194675446,
158
- 0.13244910538196564,
159
- 0.15626277029514313,
160
- 0.18973781168460846,
161
- 0.15587982535362244
162
- ],
163
- "zipf_loss": [
164
- 4.276666164398193,
165
- 1.8509973287582397,
166
- 1.7251636981964111,
167
- 1.7015411853790283,
168
- 1.6939024925231934,
169
- 1.6948225498199463,
170
- 1.6945927143096924,
171
- 1.6899633407592773,
172
- 1.6403698921203613,
173
- 1.5662205219268799,
174
- 1.546657681465149,
175
- 1.514591932296753,
176
- 1.4713406562805176,
177
- 1.3820693492889404,
178
- 1.2680578231811523,
179
- 1.26492178440094,
180
- 1.0419921875,
181
- 0.9065651893615723,
182
- 0.7223787903785706,
183
- 0.5564987659454346,
184
- 0.42553776502609253,
185
- 0.33270835876464844,
186
- 0.3410593867301941,
187
- 0.3103709816932678,
188
- 0.30168575048446655,
189
- 0.25231313705444336,
190
- 0.21056024730205536,
191
- 0.18948225677013397,
192
- 0.18614569306373596,
193
- 0.16327276825904846
194
- ],
195
- "denoise_loss": [],
196
- "ortho_loss": [
197
- 0.3211989104747772,
198
- 0.19110506772994995,
199
- 0.11888417601585388,
200
- 0.0979497954249382,
201
- 0.09260708838701248,
202
- 0.10610710084438324,
203
- 0.14702288806438446,
204
- 0.21001708507537842,
205
- 0.2301187366247177,
206
- 0.24748258292675018,
207
- 0.2629227936267853,
208
- 0.26881012320518494,
209
- 0.269039511680603,
210
- 0.26941126585006714,
211
- 0.26823461055755615,
212
- 0.26427075266838074,
213
- 0.2743089199066162,
214
- 0.2734338045120239,
215
- 0.27655836939811707,
216
- 0.2692953944206238,
217
- 0.27160370349884033,
218
- 0.27964547276496887,
219
- 0.2874664068222046,
220
- 0.2921176552772522,
221
- 0.2956086993217468,
222
- 0.29919639229774475,
223
- 0.3037196099758148,
224
- 0.3071216940879822,
225
- 0.311845988035202,
226
- 0.3125845789909363
227
- ],
228
- "lr": [
229
- 7.840000000000001e-05,
230
- 8e-05,
231
- 8e-05,
232
- 8e-05,
233
- 8e-05,
234
- 8e-05,
235
- 8e-05,
236
- 8e-05,
237
- 8e-05,
238
- 8e-05,
239
- 8e-05,
240
- 8e-05,
241
- 8e-05,
242
- 8e-05,
243
- 8e-05,
244
- 8e-05,
245
- 8e-05,
246
- 8e-05,
247
- 7.656578947368422e-05,
248
- 7.064473684210527e-05,
249
- 6.472368421052631e-05,
250
- 5.7973684210526325e-05,
251
- 5.2052631578947374e-05,
252
- 4.613157894736842e-05,
253
- 3.938157894736842e-05,
254
- 3.346052631578948e-05,
255
- 2.7539473684210523e-05,
256
- 2.078947368421053e-05,
257
- 1.4868421052631572e-05,
258
- 8.947368421052635e-06
259
- ],
260
- "emb_lr": [],
261
- "eval_step": [
262
- 150,
263
- 307,
264
- 464,
265
- 621,
266
- 778,
267
- 935,
268
- 1092,
269
- 1199,
270
- 1356,
271
- 1513
272
- ],
273
- "eval_accuracy": [
274
- 0.0,
275
- 0.0,
276
- 0.0,
277
- 0.0,
278
- 0.0,
279
- 0.0,
280
- 0.0,
281
- 0.0,
282
- 0.0,
283
- 0.0
284
- ]
285
- },
286
- "final_accuracy": 0.0,
287
- "sft_eval": {
288
- "config": {
289
- "ops": "add_sub",
290
- "K": null,
291
- "mode": "sft",
292
- "n_digits": 6,
293
- "n_per_split": 250
294
- },
295
- "splits": {
296
- "add_S0": {
297
- "full_accuracy": 0.944,
298
- "n_examples": 250,
299
- "per_subtask": {
300
- "SA": {
301
- "accuracy": 0.9908076165462902,
302
- "count": 1523
303
- },
304
- "SS": {
305
- "accuracy": 1.0,
306
- "count": 227
307
- }
308
- }
309
- },
310
- "add_S1": {
311
- "full_accuracy": 0.952,
312
- "n_examples": 250,
313
- "per_subtask": {
314
- "SA": {
315
- "accuracy": 0.9870848708487084,
316
- "count": 542
317
- },
318
- "SC": {
319
- "accuracy": 0.9952038369304557,
320
- "count": 417
321
- },
322
- "SS": {
323
- "accuracy": 1.0,
324
- "count": 70
325
- },
326
- "UC": {
327
- "accuracy": 0.9958391123439667,
328
- "count": 721
329
- }
330
- }
331
- },
332
- "add_S2": {
333
- "full_accuracy": 0.912,
334
- "n_examples": 250,
335
- "per_subtask": {
336
- "SA": {
337
- "accuracy": 0.970108695652174,
338
- "count": 368
339
- },
340
- "SC": {
341
- "accuracy": 0.9781931464174455,
342
- "count": 321
343
- },
344
- "SS": {
345
- "accuracy": 0.9736842105263158,
346
- "count": 228
347
- },
348
- "UC": {
349
- "accuracy": 0.9962335216572504,
350
- "count": 531
351
- },
352
- "US": {
353
- "accuracy": 1.0,
354
- "count": 302
355
- }
356
- }
357
- },
358
- "add_S3": {
359
- "full_accuracy": 0.756,
360
- "n_examples": 250,
361
- "per_subtask": {
362
- "SA": {
363
- "accuracy": 0.990228013029316,
364
- "count": 307
365
- },
366
- "SC": {
367
- "accuracy": 0.993127147766323,
368
- "count": 291
369
- },
370
- "SS": {
371
- "accuracy": 0.9823008849557522,
372
- "count": 113
373
- },
374
- "UC": {
375
- "accuracy": 0.8799171842650103,
376
- "count": 483
377
- },
378
- "US": {
379
- "accuracy": 1.0,
380
- "count": 556
381
- }
382
- }
383
- },
384
- "add_S4": {
385
- "full_accuracy": 0.544,
386
- "n_examples": 250,
387
- "per_subtask": {
388
- "SA": {
389
- "accuracy": 1.0,
390
- "count": 238
391
- },
392
- "SC": {
393
- "accuracy": 0.988929889298893,
394
- "count": 271
395
- },
396
- "SS": {
397
- "accuracy": 1.0,
398
- "count": 59
399
- },
400
- "UC": {
401
- "accuracy": 0.782716049382716,
402
- "count": 405
403
- },
404
- "US": {
405
- "accuracy": 0.9047619047619048,
406
- "count": 777
407
- }
408
- }
409
- },
410
- "add_S5": {
411
- "full_accuracy": 0.26,
412
- "n_examples": 250,
413
- "per_subtask": {
414
- "SA": {
415
- "accuracy": 0.988,
416
- "count": 250
417
- },
418
- "SC": {
419
- "accuracy": 1.0,
420
- "count": 250
421
- },
422
- "UC": {
423
- "accuracy": 0.452,
424
- "count": 250
425
- },
426
- "US": {
427
- "accuracy": 0.736,
428
- "count": 1000
429
- }
430
- }
431
- },
432
- "add_S6": {
433
- "full_accuracy": 0.656,
434
- "n_examples": 250,
435
- "per_subtask": {
436
- "SC": {
437
- "accuracy": 1.0,
438
- "count": 250
439
- },
440
- "UC": {
441
- "accuracy": 0.812,
442
- "count": 250
443
- },
444
- "US": {
445
- "accuracy": 0.8448,
446
- "count": 1250
447
- }
448
- }
449
- },
450
- "add_random": {
451
- "full_accuracy": 0.91,
452
- "n_examples": 200,
453
- "per_subtask": {
454
- "SA": {
455
- "accuracy": 0.9886621315192744,
456
- "count": 441
457
- },
458
- "SC": {
459
- "accuracy": 0.9936908517350158,
460
- "count": 317
461
- },
462
- "SS": {
463
- "accuracy": 0.9814814814814815,
464
- "count": 54
465
- },
466
- "UC": {
467
- "accuracy": 0.981203007518797,
468
- "count": 532
469
- },
470
- "US": {
471
- "accuracy": 0.9642857142857143,
472
- "count": 56
473
- }
474
- }
475
- },
476
- "add_C3": {
477
- "full_accuracy": 0.772,
478
- "n_examples": 250,
479
- "per_subtask": {
480
- "SA": {
481
- "accuracy": 0.9786666666666667,
482
- "count": 750
483
- },
484
- "SC": {
485
- "accuracy": 0.996,
486
- "count": 250
487
- },
488
- "UC": {
489
- "accuracy": 0.9096638655462185,
490
- "count": 476
491
- },
492
- "US": {
493
- "accuracy": 1.0,
494
- "count": 274
495
- }
496
- }
497
- },
498
- "add_C4": {
499
- "full_accuracy": 0.836,
500
- "n_examples": 250,
501
- "per_subtask": {
502
- "SA": {
503
- "accuracy": 0.986,
504
- "count": 500
505
- },
506
- "SC": {
507
- "accuracy": 0.996,
508
- "count": 250
509
- },
510
- "UC": {
511
- "accuracy": 0.9515885022692889,
512
- "count": 661
513
- },
514
- "US": {
515
- "accuracy": 0.9616519174041298,
516
- "count": 339
517
- }
518
- }
519
- },
520
- "add_C5": {
521
- "full_accuracy": 0.78,
522
- "n_examples": 250,
523
- "per_subtask": {
524
- "SA": {
525
- "accuracy": 0.996,
526
- "count": 250
527
- },
528
- "SC": {
529
- "accuracy": 1.0,
530
- "count": 250
531
- },
532
- "UC": {
533
- "accuracy": 0.9344262295081968,
534
- "count": 732
535
- },
536
- "US": {
537
- "accuracy": 0.9305019305019305,
538
- "count": 518
539
- }
540
- }
541
- },
542
- "add_C6": {
543
- "full_accuracy": 0.86,
544
- "n_examples": 250,
545
- "per_subtask": {
546
- "SC": {
547
- "accuracy": 1.0,
548
- "count": 250
549
- },
550
- "UC": {
551
- "accuracy": 0.9673423423423423,
552
- "count": 888
553
- },
554
- "US": {
555
- "accuracy": 0.9673202614379085,
556
- "count": 612
557
- }
558
- }
559
- },
560
- "sub_M0": {
561
- "full_accuracy": 0.988,
562
- "n_examples": 250,
563
- "per_subtask": {
564
- "MD": {
565
- "accuracy": 0.998671096345515,
566
- "count": 1505
567
- },
568
- "ME": {
569
- "accuracy": 0.9959183673469387,
570
- "count": 245
571
- }
572
- }
573
- },
574
- "sub_M1": {
575
- "full_accuracy": 0.932,
576
- "n_examples": 250,
577
- "per_subtask": {
578
- "MD": {
579
- "accuracy": 0.9929971988795518,
580
- "count": 714
581
- },
582
- "MB": {
583
- "accuracy": 0.9786096256684492,
584
- "count": 374
585
- },
586
- "ME": {
587
- "accuracy": 0.9866666666666667,
588
- "count": 75
589
- },
590
- "UB": {
591
- "accuracy": 0.9948892674616695,
592
- "count": 587
593
- }
594
- }
595
- },
596
- "sub_M2": {
597
- "full_accuracy": 0.896,
598
- "n_examples": 250,
599
- "per_subtask": {
600
- "MD": {
601
- "accuracy": 0.9909747292418772,
602
- "count": 554
603
- },
604
- "MB": {
605
- "accuracy": 0.9743589743589743,
606
- "count": 273
607
- },
608
- "ME": {
609
- "accuracy": 0.9908675799086758,
610
- "count": 219
611
- },
612
- "UB": {
613
- "accuracy": 0.9674418604651163,
614
- "count": 430
615
- },
616
- "UD": {
617
- "accuracy": 0.9963503649635036,
618
- "count": 274
619
- }
620
- }
621
- },
622
- "sub_M3": {
623
- "full_accuracy": 0.272,
624
- "n_examples": 250,
625
- "per_subtask": {
626
- "MD": {
627
- "accuracy": 0.9978165938864629,
628
- "count": 458
629
- },
630
- "MB": {
631
- "accuracy": 0.9923371647509579,
632
- "count": 261
633
- },
634
- "ME": {
635
- "accuracy": 1.0,
636
- "count": 124
637
- },
638
- "UB": {
639
- "accuracy": 0.5438144329896907,
640
- "count": 388
641
- },
642
- "UD": {
643
- "accuracy": 0.9730250481695568,
644
- "count": 519
645
- }
646
- }
647
- },
648
- "sub_M4": {
649
- "full_accuracy": 0.06,
650
- "n_examples": 250,
651
- "per_subtask": {
652
- "MD": {
653
- "accuracy": 1.0,
654
- "count": 500
655
- },
656
- "MB": {
657
- "accuracy": 0.98,
658
- "count": 250
659
- },
660
- "UB": {
661
- "accuracy": 0.192,
662
- "count": 250
663
- },
664
- "UD": {
665
- "accuracy": 0.6426666666666667,
666
- "count": 750
667
- }
668
- }
669
- },
670
- "sub_M5": {
671
- "full_accuracy": 0.016,
672
- "n_examples": 250,
673
- "per_subtask": {
674
- "MD": {
675
- "accuracy": 1.0,
676
- "count": 250
677
- },
678
- "MB": {
679
- "accuracy": 0.984,
680
- "count": 250
681
- },
682
- "UB": {
683
- "accuracy": 0.096,
684
- "count": 250
685
- },
686
- "UD": {
687
- "accuracy": 0.476,
688
- "count": 1000
689
- }
690
- }
691
- },
692
- "sub_random": {
693
- "full_accuracy": 0.935,
694
- "n_examples": 200,
695
- "per_subtask": {
696
- "MD": {
697
- "accuracy": 0.993103448275862,
698
- "count": 580
699
- },
700
- "MB": {
701
- "accuracy": 0.9887640449438202,
702
- "count": 267
703
- },
704
- "ME": {
705
- "accuracy": 1.0,
706
- "count": 63
707
- },
708
- "UB": {
709
- "accuracy": 0.984375,
710
- "count": 448
711
- },
712
- "UD": {
713
- "accuracy": 1.0,
714
- "count": 42
715
- }
716
- }
717
- },
718
- "sub_B3": {
719
- "full_accuracy": 0.724,
720
- "n_examples": 250,
721
- "per_subtask": {
722
- "MD": {
723
- "accuracy": 0.9986666666666667,
724
- "count": 750
725
- },
726
- "MB": {
727
- "accuracy": 0.996,
728
- "count": 250
729
- },
730
- "UB": {
731
- "accuracy": 0.8700787401574803,
732
- "count": 508
733
- },
734
- "UD": {
735
- "accuracy": 0.9628099173553719,
736
- "count": 242
737
- }
738
- }
739
- },
740
- "sub_B4": {
741
- "full_accuracy": 0.56,
742
- "n_examples": 250,
743
- "per_subtask": {
744
- "MD": {
745
- "accuracy": 0.988,
746
- "count": 500
747
- },
748
- "MB": {
749
- "accuracy": 0.976,
750
- "count": 250
751
- },
752
- "UB": {
753
- "accuracy": 0.8257328990228013,
754
- "count": 614
755
- },
756
- "UD": {
757
- "accuracy": 0.8860103626943006,
758
- "count": 386
759
- }
760
- }
761
- },
762
- "sub_B5": {
763
- "full_accuracy": 0.52,
764
- "n_examples": 250,
765
- "per_subtask": {
766
- "MD": {
767
- "accuracy": 1.0,
768
- "count": 250
769
- },
770
- "MB": {
771
- "accuracy": 0.992,
772
- "count": 250
773
- },
774
- "UB": {
775
- "accuracy": 0.8597168597168597,
776
- "count": 777
777
- },
778
- "UD": {
779
- "accuracy": 0.8752642706131079,
780
- "count": 473
781
- }
782
- }
783
- }
784
- },
785
- "summary": {
786
- "overall_accuracy": 0.6812962962962963,
787
- "total_examples": 5400,
788
- "n_splits": 22
789
- }
790
- },
791
- "sorl_eval": {
792
- "config": {
793
- "ops": "add_sub",
794
- "K": 1,
795
- "mode": "sorl",
796
- "n_digits": 6,
797
- "n_per_split": 250
798
- },
799
- "splits": {
800
- "add_S0": {
801
- "full_accuracy": 0.0,
802
- "n_examples": 250,
803
- "per_subtask": {
804
- "SA": {
805
- "accuracy": 0.2593565331582403,
806
- "count": 1523
807
- },
808
- "SS": {
809
- "accuracy": 0.23348017621145375,
810
- "count": 227
811
- }
812
- }
813
- },
814
- "add_S1": {
815
- "full_accuracy": 0.0,
816
- "n_examples": 250,
817
- "per_subtask": {
818
- "SA": {
819
- "accuracy": 0.31365313653136534,
820
- "count": 542
821
- },
822
- "SC": {
823
- "accuracy": 0.09112709832134293,
824
- "count": 417
825
- },
826
- "SS": {
827
- "accuracy": 0.12857142857142856,
828
- "count": 70
829
- },
830
- "UC": {
831
- "accuracy": 0.2565880721220527,
832
- "count": 721
833
- }
834
- }
835
- },
836
- "add_S2": {
837
- "full_accuracy": 0.0,
838
- "n_examples": 250,
839
- "per_subtask": {
840
- "SA": {
841
- "accuracy": 0.29891304347826086,
842
- "count": 368
843
- },
844
- "SC": {
845
- "accuracy": 0.13707165109034267,
846
- "count": 321
847
- },
848
- "SS": {
849
- "accuracy": 0.20614035087719298,
850
- "count": 228
851
- },
852
- "UC": {
853
- "accuracy": 0.3465160075329567,
854
- "count": 531
855
- },
856
- "US": {
857
- "accuracy": 0.0695364238410596,
858
- "count": 302
859
- }
860
- }
861
- },
862
- "add_S3": {
863
- "full_accuracy": 0.0,
864
- "n_examples": 250,
865
- "per_subtask": {
866
- "SA": {
867
- "accuracy": 0.2931596091205212,
868
- "count": 307
869
- },
870
- "SC": {
871
- "accuracy": 0.12027491408934708,
872
- "count": 291
873
- },
874
- "SS": {
875
- "accuracy": 0.3185840707964602,
876
- "count": 113
877
- },
878
- "UC": {
879
- "accuracy": 0.40993788819875776,
880
- "count": 483
881
- },
882
- "US": {
883
- "accuracy": 0.04316546762589928,
884
- "count": 556
885
- }
886
- }
887
- },
888
- "add_S4": {
889
- "full_accuracy": 0.0,
890
- "n_examples": 250,
891
- "per_subtask": {
892
- "SA": {
893
- "accuracy": 0.2857142857142857,
894
- "count": 238
895
- },
896
- "SC": {
897
- "accuracy": 0.08487084870848709,
898
- "count": 271
899
- },
900
- "SS": {
901
- "accuracy": 0.4576271186440678,
902
- "count": 59
903
- },
904
- "UC": {
905
- "accuracy": 0.5308641975308642,
906
- "count": 405
907
- },
908
- "US": {
909
- "accuracy": 0.04504504504504504,
910
- "count": 777
911
- }
912
- }
913
- },
914
- "add_S5": {
915
- "full_accuracy": 0.0,
916
- "n_examples": 250,
917
- "per_subtask": {
918
- "SA": {
919
- "accuracy": 0.588,
920
- "count": 250
921
- },
922
- "SC": {
923
- "accuracy": 0.084,
924
- "count": 250
925
- },
926
- "UC": {
927
- "accuracy": 0.488,
928
- "count": 250
929
- },
930
- "US": {
931
- "accuracy": 0.059,
932
- "count": 1000
933
- }
934
- }
935
- },
936
- "add_S6": {
937
- "full_accuracy": 0.0,
938
- "n_examples": 250,
939
- "per_subtask": {
940
- "SC": {
941
- "accuracy": 0.08,
942
- "count": 250
943
- },
944
- "UC": {
945
- "accuracy": 0.992,
946
- "count": 250
947
- },
948
- "US": {
949
- "accuracy": 0.1616,
950
- "count": 1250
951
- }
952
- }
953
- },
954
- "add_random": {
955
- "full_accuracy": 0.0,
956
- "n_examples": 200,
957
- "per_subtask": {
958
- "SA": {
959
- "accuracy": 0.2925170068027211,
960
- "count": 441
961
- },
962
- "SC": {
963
- "accuracy": 0.11356466876971609,
964
- "count": 317
965
- },
966
- "SS": {
967
- "accuracy": 0.2962962962962963,
968
- "count": 54
969
- },
970
- "UC": {
971
- "accuracy": 0.2725563909774436,
972
- "count": 532
973
- },
974
- "US": {
975
- "accuracy": 0.07142857142857142,
976
- "count": 56
977
- }
978
- }
979
- },
980
- "add_C3": {
981
- "full_accuracy": 0.0,
982
- "n_examples": 250,
983
- "per_subtask": {
984
- "SA": {
985
- "accuracy": 0.33866666666666667,
986
- "count": 750
987
- },
988
- "SC": {
989
- "accuracy": 0.044,
990
- "count": 250
991
- },
992
- "UC": {
993
- "accuracy": 0.17016806722689076,
994
- "count": 476
995
- },
996
- "US": {
997
- "accuracy": 0.0036496350364963502,
998
- "count": 274
999
- }
1000
- }
1001
- },
1002
- "add_C4": {
1003
- "full_accuracy": 0.0,
1004
- "n_examples": 250,
1005
- "per_subtask": {
1006
- "SA": {
1007
- "accuracy": 0.412,
1008
- "count": 500
1009
- },
1010
- "SC": {
1011
- "accuracy": 0.056,
1012
- "count": 250
1013
- },
1014
- "UC": {
1015
- "accuracy": 0.21331316187594554,
1016
- "count": 661
1017
- },
1018
- "US": {
1019
- "accuracy": 0.0,
1020
- "count": 339
1021
- }
1022
- }
1023
- },
1024
- "add_C5": {
1025
- "full_accuracy": 0.0,
1026
- "n_examples": 250,
1027
- "per_subtask": {
1028
- "SA": {
1029
- "accuracy": 0.62,
1030
- "count": 250
1031
- },
1032
- "SC": {
1033
- "accuracy": 0.092,
1034
- "count": 250
1035
- },
1036
- "UC": {
1037
- "accuracy": 0.22814207650273224,
1038
- "count": 732
1039
- },
1040
- "US": {
1041
- "accuracy": 0.0694980694980695,
1042
- "count": 518
1043
- }
1044
- }
1045
- },
1046
- "add_C6": {
1047
- "full_accuracy": 0.0,
1048
- "n_examples": 250,
1049
- "per_subtask": {
1050
- "SC": {
1051
- "accuracy": 0.1,
1052
- "count": 250
1053
- },
1054
- "UC": {
1055
- "accuracy": 0.3536036036036036,
1056
- "count": 888
1057
- },
1058
- "US": {
1059
- "accuracy": 0.1323529411764706,
1060
- "count": 612
1061
- }
1062
- }
1063
- },
1064
- "sub_M0": {
1065
- "full_accuracy": 0.0,
1066
- "n_examples": 250,
1067
- "per_subtask": {
1068
- "MD": {
1069
- "accuracy": 0.3282392026578073,
1070
- "count": 1505
1071
- },
1072
- "ME": {
1073
- "accuracy": 0.5918367346938775,
1074
- "count": 245
1075
- }
1076
- }
1077
- },
1078
- "sub_M1": {
1079
- "full_accuracy": 0.0,
1080
- "n_examples": 250,
1081
- "per_subtask": {
1082
- "MD": {
1083
- "accuracy": 0.4887955182072829,
1084
- "count": 714
1085
- },
1086
- "MB": {
1087
- "accuracy": 0.26737967914438504,
1088
- "count": 374
1089
- },
1090
- "ME": {
1091
- "accuracy": 0.52,
1092
- "count": 75
1093
- },
1094
- "UB": {
1095
- "accuracy": 0.1839863713798978,
1096
- "count": 587
1097
- }
1098
- }
1099
- },
1100
- "sub_M2": {
1101
- "full_accuracy": 0.0,
1102
- "n_examples": 250,
1103
- "per_subtask": {
1104
- "MD": {
1105
- "accuracy": 0.5379061371841155,
1106
- "count": 554
1107
- },
1108
- "MB": {
1109
- "accuracy": 0.18681318681318682,
1110
- "count": 273
1111
- },
1112
- "ME": {
1113
- "accuracy": 0.410958904109589,
1114
- "count": 219
1115
- },
1116
- "UB": {
1117
- "accuracy": 0.19534883720930232,
1118
- "count": 430
1119
- },
1120
- "UD": {
1121
- "accuracy": 0.4124087591240876,
1122
- "count": 274
1123
- }
1124
- }
1125
- },
1126
- "sub_M3": {
1127
- "full_accuracy": 0.0,
1128
- "n_examples": 250,
1129
- "per_subtask": {
1130
- "MD": {
1131
- "accuracy": 0.611353711790393,
1132
- "count": 458
1133
- },
1134
- "MB": {
1135
- "accuracy": 0.19157088122605365,
1136
- "count": 261
1137
- },
1138
- "ME": {
1139
- "accuracy": 0.41935483870967744,
1140
- "count": 124
1141
- },
1142
- "UB": {
1143
- "accuracy": 0.18814432989690721,
1144
- "count": 388
1145
- },
1146
- "UD": {
1147
- "accuracy": 0.23121387283236994,
1148
- "count": 519
1149
- }
1150
- }
1151
- },
1152
- "sub_M4": {
1153
- "full_accuracy": 0.0,
1154
- "n_examples": 250,
1155
- "per_subtask": {
1156
- "MD": {
1157
- "accuracy": 0.602,
1158
- "count": 500
1159
- },
1160
- "MB": {
1161
- "accuracy": 0.16,
1162
- "count": 250
1163
- },
1164
- "UB": {
1165
- "accuracy": 0.06,
1166
- "count": 250
1167
- },
1168
- "UD": {
1169
- "accuracy": 0.13466666666666666,
1170
- "count": 750
1171
- }
1172
- }
1173
- },
1174
- "sub_M5": {
1175
- "full_accuracy": 0.0,
1176
- "n_examples": 250,
1177
- "per_subtask": {
1178
- "MD": {
1179
- "accuracy": 1.0,
1180
- "count": 250
1181
- },
1182
- "MB": {
1183
- "accuracy": 0.072,
1184
- "count": 250
1185
- },
1186
- "UB": {
1187
- "accuracy": 0.084,
1188
- "count": 250
1189
- },
1190
- "UD": {
1191
- "accuracy": 0.413,
1192
- "count": 1000
1193
- }
1194
- }
1195
- },
1196
- "sub_random": {
1197
- "full_accuracy": 0.0,
1198
- "n_examples": 200,
1199
- "per_subtask": {
1200
- "MD": {
1201
- "accuracy": 0.47586206896551725,
1202
- "count": 580
1203
- },
1204
- "MB": {
1205
- "accuracy": 0.20599250936329588,
1206
- "count": 267
1207
- },
1208
- "ME": {
1209
- "accuracy": 0.5714285714285714,
1210
- "count": 63
1211
- },
1212
- "UB": {
1213
- "accuracy": 0.18080357142857142,
1214
- "count": 448
1215
- },
1216
- "UD": {
1217
- "accuracy": 0.30952380952380953,
1218
- "count": 42
1219
- }
1220
- }
1221
- },
1222
- "sub_B3": {
1223
- "full_accuracy": 0.0,
1224
- "n_examples": 250,
1225
- "per_subtask": {
1226
- "MD": {
1227
- "accuracy": 0.44,
1228
- "count": 750
1229
- },
1230
- "MB": {
1231
- "accuracy": 0.212,
1232
- "count": 250
1233
- },
1234
- "UB": {
1235
- "accuracy": 0.20078740157480315,
1236
- "count": 508
1237
- },
1238
- "UD": {
1239
- "accuracy": 0.16942148760330578,
1240
- "count": 242
1241
- }
1242
- }
1243
- },
1244
- "sub_B4": {
1245
- "full_accuracy": 0.0,
1246
- "n_examples": 250,
1247
- "per_subtask": {
1248
- "MD": {
1249
- "accuracy": 0.622,
1250
- "count": 500
1251
- },
1252
- "MB": {
1253
- "accuracy": 0.184,
1254
- "count": 250
1255
- },
1256
- "UB": {
1257
- "accuracy": 0.14006514657980457,
1258
- "count": 614
1259
- },
1260
- "UD": {
1261
- "accuracy": 0.19948186528497408,
1262
- "count": 386
1263
- }
1264
- }
1265
- },
1266
- "sub_B5": {
1267
- "full_accuracy": 0.0,
1268
- "n_examples": 250,
1269
- "per_subtask": {
1270
- "MD": {
1271
- "accuracy": 1.0,
1272
- "count": 250
1273
- },
1274
- "MB": {
1275
- "accuracy": 0.068,
1276
- "count": 250
1277
- },
1278
- "UB": {
1279
- "accuracy": 0.17631917631917632,
1280
- "count": 777
1281
- },
1282
- "UD": {
1283
- "accuracy": 0.3678646934460888,
1284
- "count": 473
1285
- }
1286
- }
1287
- }
1288
- },
1289
- "summary": {
1290
- "overall_accuracy": 0.0,
1291
- "total_examples": 5400,
1292
- "n_splits": 22
1293
- }
1294
- },
1295
- "sorl_overall_accuracy": 0.0,
1296
- "sft_overall_accuracy": 0.6812962962962963
1297
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
add_sub_sorl_v1_abs10_K1_10K/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdc469755c92ef1653fb3f6ae13eac568959c2ae6791d4bfece6a727903c7bc8
3
- size 650303660
 
 
 
 
add_sub_sorl_v1_abs10_K1_10K/train_config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "mode": "sorl",
3
- "ops": "add_sub",
4
- "n_digits": 6,
5
- "n_layer": 2,
6
- "n_head": 3,
7
- "n_embd": 510,
8
- "abs_vocab": 10,
9
- "K": 1,
10
- "alpha_info_gain": 10.0,
11
- "alpha_abs": 0.1,
12
- "alpha_soft_zipf": 1.0,
13
- "batch_size": 64,
14
- "num_epochs": 10,
15
- "dataset_size": 10000,
16
- "lr": 8e-05,
17
- "output_dir": "ckpt/sweep/as_sorl_abs10_K1_10K",
18
- "device": "cuda",
19
- "push_to_hub": true,
20
- "no_wandb": false,
21
- "n_params": 162499262,
22
- "run_name": "add_sub_sorl_v1_abs10_K1_10K",
23
- "git_commit": "800625019270114adcda289bbd550c4f1109a514",
24
- "timestamp": "2026-04-11T21:33:55.579231+00:00",
25
- "tokenizer": "Qwen/Qwen3-0.6B",
26
- "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
- "dataset_config": "add_sub_6digit",
28
- "model_repo": "thoughtworks/arithmetic-sorl",
29
- "trainer_version": "v1",
30
- "wandb_run_id": "ulvxvuq3",
31
- "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/ulvxvuq3",
32
- "final_accuracy": 0.0,
33
- "sft_accuracy": 0.6812962962962963,
34
- "eval_method": "ArithmeticEvaluator"
35
- }