amirali1985 commited on
Commit
f7a9e93
·
verified ·
1 Parent(s): f9256cb

Delete folder add_sub_sorl_v1_abs10_25K with huggingface_hub

Browse files
add_sub_sorl_v1_abs10_25K/config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "architectures": [
3
- "SorlModelWrapper"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": null,
8
- "dtype": "float32",
9
- "eos_token_id": null,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 510,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 2040,
15
- "layer_types": [
16
- "full_attention",
17
- "full_attention"
18
- ],
19
- "max_position_embeddings": 128,
20
- "max_window_layers": 28,
21
- "model_type": "qwen3",
22
- "num_attention_heads": 3,
23
- "num_hidden_layers": 2,
24
- "num_key_value_heads": 3,
25
- "pad_token_id": null,
26
- "rms_norm_eps": 1e-06,
27
- "rope_parameters": {
28
- "rope_theta": 10000.0,
29
- "rope_type": "default"
30
- },
31
- "sliding_window": null,
32
- "tie_word_embeddings": false,
33
- "transformers_version": "5.5.0",
34
- "use_cache": true,
35
- "use_sliding_window": false,
36
- "vocab_size": 151654
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
add_sub_sorl_v1_abs10_25K/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "output_attentions": false,
4
- "output_hidden_states": false,
5
- "transformers_version": "5.5.0",
6
- "use_cache": true
7
- }
 
 
 
 
 
 
 
 
add_sub_sorl_v1_abs10_25K/metrics.json DELETED
@@ -1,1617 +0,0 @@
1
- {
2
- "history": {
3
- "step": [
4
- 50,
5
- 100,
6
- 150,
7
- 200,
8
- 250,
9
- 300,
10
- 350,
11
- 441,
12
- 491,
13
- 541,
14
- 591,
15
- 641,
16
- 691,
17
- 741,
18
- 832,
19
- 882,
20
- 932,
21
- 982,
22
- 1032,
23
- 1082,
24
- 1132,
25
- 1223,
26
- 1273,
27
- 1323,
28
- 1373,
29
- 1423,
30
- 1473,
31
- 1523,
32
- 1614,
33
- 1664,
34
- 1714,
35
- 1764,
36
- 1814,
37
- 1864,
38
- 1914,
39
- 2005,
40
- 2055,
41
- 2105,
42
- 2155,
43
- 2205,
44
- 2255,
45
- 2305,
46
- 2396,
47
- 2446,
48
- 2496,
49
- 2546,
50
- 2596,
51
- 2646,
52
- 2696,
53
- 2787,
54
- 2837,
55
- 2887,
56
- 2937,
57
- 2987,
58
- 3037,
59
- 3087,
60
- 3178,
61
- 3228,
62
- 3278,
63
- 3328,
64
- 3378,
65
- 3428,
66
- 3478,
67
- 3569,
68
- 3619,
69
- 3669,
70
- 3719,
71
- 3769,
72
- 3819,
73
- 3869
74
- ],
75
- "loss": [
76
- 8.62352180480957,
77
- 3.690471649169922,
78
- 3.1680498123168945,
79
- 2.650057792663574,
80
- 2.4546055793762207,
81
- 2.2286038398742676,
82
- 2.2743122577667236,
83
- 1.4608670473098755,
84
- 1.8613238334655762,
85
- 2.046741485595703,
86
- 1.4064021110534668,
87
- 1.2203834056854248,
88
- 0.418628990650177,
89
- -0.6658121347427368,
90
- -6.851266860961914,
91
- -8.283329010009766,
92
- -9.706988334655762,
93
- -10.902172088623047,
94
- -11.940679550170898,
95
- -12.042911529541016,
96
- -13.00434398651123,
97
- -12.94082260131836,
98
- -13.265328407287598,
99
- -13.784520149230957,
100
- -13.905801773071289,
101
- -13.609413146972656,
102
- -13.715292930603027,
103
- -14.133524894714355,
104
- -14.123821258544922,
105
- -14.485600471496582,
106
- -13.75143814086914,
107
- -14.315217018127441,
108
- -14.236422538757324,
109
- -13.65060806274414,
110
- -13.904733657836914,
111
- -12.59363842010498,
112
- -10.049785614013672,
113
- -6.803697109222412,
114
- -5.973206043243408,
115
- -5.757765293121338,
116
- -5.669365406036377,
117
- -5.0058441162109375,
118
- -5.555595397949219,
119
- -5.24139404296875,
120
- -5.709163665771484,
121
- -5.130561351776123,
122
- -4.8264265060424805,
123
- -4.819240570068359,
124
- -4.386547565460205,
125
- -4.0266289710998535,
126
- -3.970654010772705,
127
- -3.916658639907837,
128
- -3.5560050010681152,
129
- -3.3773417472839355,
130
- -3.473850727081299,
131
- -3.083798408508301,
132
- -3.1898467540740967,
133
- -2.947633981704712,
134
- -2.676503896713257,
135
- -2.8399879932403564,
136
- -2.484224319458008,
137
- -2.9060072898864746,
138
- -2.8863208293914795,
139
- -2.229196310043335,
140
- -2.3845486640930176,
141
- -2.294647216796875,
142
- -2.293665647506714,
143
- -2.026386022567749,
144
- -2.0004825592041016,
145
- -2.2800354957580566
146
- ],
147
- "base_loss": [
148
- 6.053040504455566,
149
- 2.548206090927124,
150
- 1.8746731281280518,
151
- 1.9164220094680786,
152
- 1.9539568424224854,
153
- 1.8462482690811157,
154
- 1.808115005493164,
155
- 1.893051266670227,
156
- 1.8119051456451416,
157
- 1.7294063568115234,
158
- 1.7713595628738403,
159
- 1.796744704246521,
160
- 1.7207460403442383,
161
- 1.7722727060317993,
162
- 1.8661140203475952,
163
- 1.768760323524475,
164
- 1.732330083847046,
165
- 1.7520471811294556,
166
- 1.7128150463104248,
167
- 1.6991585493087769,
168
- 1.7630579471588135,
169
- 1.6526309251785278,
170
- 1.7100765705108643,
171
- 1.7131121158599854,
172
- 1.7331666946411133,
173
- 1.635122299194336,
174
- 1.6380380392074585,
175
- 1.6857143640518188,
176
- 1.6658002138137817,
177
- 1.696215271949768,
178
- 1.5709830522537231,
179
- 1.6710329055786133,
180
- 1.6389342546463013,
181
- 1.6025257110595703,
182
- 1.59456467628479,
183
- 1.466896653175354,
184
- 1.178689956665039,
185
- 0.8142209053039551,
186
- 0.7484551668167114,
187
- 0.7151921987533569,
188
- 0.6592262983322144,
189
- 0.6459842324256897,
190
- 0.6727160215377808,
191
- 0.6381062269210815,
192
- 0.6567009687423706,
193
- 0.5900193452835083,
194
- 0.5629978775978088,
195
- 0.5609976649284363,
196
- 0.5069475769996643,
197
- 0.49048009514808655,
198
- 0.4646568298339844,
199
- 0.4576375186443329,
200
- 0.4242294132709503,
201
- 0.4161135256290436,
202
- 0.4000745117664337,
203
- 0.3608091175556183,
204
- 0.375986248254776,
205
- 0.3414735198020935,
206
- 0.3162396252155304,
207
- 0.3193356692790985,
208
- 0.3032837212085724,
209
- 0.3419392704963684,
210
- 0.3334921896457672,
211
- 0.2505092918872833,
212
- 0.26888325810432434,
213
- 0.2601575553417206,
214
- 0.26341256499290466,
215
- 0.2315676510334015,
216
- 0.23400746285915375,
217
- 0.263536274433136
218
- ],
219
- "info_loss": [
220
- -0.1942458152770996,
221
- -0.09092450141906738,
222
- -0.061556458473205566,
223
- -0.11549854278564453,
224
- -0.13766586780548096,
225
- -0.149794340133667,
226
- -0.14053452014923096,
227
- -0.22515976428985596,
228
- -0.16753578186035156,
229
- -0.13045012950897217,
230
- -0.18136727809906006,
231
- -0.15206265449523926,
232
- -0.1525585651397705,
233
- -0.2565147876739502,
234
- -0.888375461101532,
235
- -1.0233092308044434,
236
- -1.1549559831619263,
237
- -1.2769179344177246,
238
- -1.3757619857788086,
239
- -1.3825740814208984,
240
- -1.485039234161377,
241
- -1.4669829607009888,
242
- -1.505444049835205,
243
- -1.5557503700256348,
244
- -1.5698777437210083,
245
- -1.5294302701950073,
246
- -1.5414831638336182,
247
- -1.5889732837677002,
248
- -1.583970308303833,
249
- -1.624945044517517,
250
- -1.5382747650146484,
251
- -1.604049801826477,
252
- -1.5935652256011963,
253
- -1.5305627584457397,
254
- -1.556932806968689,
255
- -1.4123308658599854,
256
- -1.128017544746399,
257
- -0.7671623229980469,
258
- -0.6771054863929749,
259
- -0.6523333191871643,
260
- -0.6362993121147156,
261
- -0.5681959390640259,
262
- -0.6264157891273499,
263
- -0.5906312465667725,
264
- -0.6391703486442566,
265
- -0.57457435131073,
266
- -0.5411813259124756,
267
- -0.5413545966148376,
268
- -0.49299928545951843,
269
- -0.4549095034599304,
270
- -0.44715359807014465,
271
- -0.44042420387268066,
272
- -0.3996829688549042,
273
- -0.38102421164512634,
274
- -0.38971224427223206,
275
- -0.34713271260261536,
276
- -0.3601379096508026,
277
- -0.3310556411743164,
278
- -0.30128398537635803,
279
- -0.3171699047088623,
280
- -0.27994272112846375,
281
- -0.3267364501953125,
282
- -0.32436403632164,
283
- -0.24935299158096313,
284
- -0.26726189255714417,
285
- -0.257479190826416,
286
- -0.259423166513443,
287
- -0.22976627945899963,
288
- -0.22536085546016693,
289
- -0.25629565119743347
290
- ],
291
- "abs_loss": [
292
- 2.104123115539551,
293
- 1.8524173498153687,
294
- 1.8307222127914429,
295
- 1.8496146202087402,
296
- 1.7867794036865234,
297
- 1.8567798137664795,
298
- 1.8096632957458496,
299
- 1.64436674118042,
300
- 1.5395714044570923,
301
- 1.423869013786316,
302
- 1.209989309310913,
303
- 0.868823766708374,
304
- 0.4312920570373535,
305
- 0.49663904309272766,
306
- 0.513419508934021,
307
- 0.4742699861526489,
308
- 0.4636421799659729,
309
- 0.4861566424369812,
310
- 0.40548932552337646,
311
- 0.32250314950942993,
312
- 0.33680111169815063,
313
- 0.2677531838417053,
314
- 0.28694623708724976,
315
- 0.23017606139183044,
316
- 0.2637716829776764,
317
- 0.2513338029384613,
318
- 0.3043477237224579,
319
- 0.3003198504447937,
320
- 0.2580410838127136,
321
- 0.28733474016189575,
322
- 0.34056228399276733,
323
- 0.31581076979637146,
324
- 0.2850354313850403,
325
- 0.27487286925315857,
326
- 0.22147104144096375,
327
- 0.2475605010986328,
328
- 0.29944801330566406,
329
- 0.3343636691570282,
330
- 0.24871471524238586,
331
- 0.23551389575004578,
332
- 0.16640277206897736,
333
- 0.09375327825546265,
334
- 0.09696707129478455,
335
- 0.10278100520372391,
336
- 0.05310269445180893,
337
- 0.04737993702292442,
338
- 0.08958013355731964,
339
- 0.04776681959629059,
340
- 0.06892611086368561,
341
- 0.04011531174182892,
342
- 0.040765777230262756,
343
- 0.040000010281801224,
344
- 0.04502242058515549,
345
- 0.031056642532348633,
346
- 0.0263521671295166,
347
- 0.028736591339111328,
348
- 0.05925783887505531,
349
- 0.015155295841395855,
350
- 0.0065899500623345375,
351
- 0.01911187544465065,
352
- 0.011467419564723969,
353
- 0.0392417348921299,
354
- 0.03916466236114502,
355
- 0.014848295599222183,
356
- 0.011679599061608315,
357
- 0.01484740898013115,
358
- 0.009665056131780148,
359
- 0.009428434073925018,
360
- 0.009266960434615612,
361
- 0.01994548924267292
362
- ],
363
- "zipf_loss": [
364
- 4.30252742767334,
365
- 1.8662687540054321,
366
- 1.725869059562683,
367
- 1.7036597728729248,
368
- 1.69862961769104,
369
- 1.6946208477020264,
370
- 1.690576195716858,
371
- 1.654976725578308,
372
- 1.57081937789917,
373
- 1.4794496297836304,
374
- 1.32771635055542,
375
- 0.8573828339576721,
376
- 0.18033941090106964,
377
- 0.07739913463592529,
378
- 0.11503171920776367,
379
- 0.13357549905776978,
380
- 0.06387755274772644,
381
- 0.06634452939033508,
382
- 0.06357603520154953,
383
- 0.0514199323952198,
384
- 0.049310360103845596,
385
- 0.04960052669048309,
386
- 0.050341784954071045,
387
- 0.03685367852449417,
388
- 0.03343234211206436,
389
- 0.024634700268507004,
390
- 0.031066298484802246,
391
- 0.04046095907688141,
392
- 0.02427714876830578,
393
- 0.03890116512775421,
394
- 0.026270316913723946,
395
- 0.022668270394206047,
396
- 0.03179142251610756,
397
- 0.0250064916908741,
398
- 0.0478830523788929,
399
- 0.03801628574728966,
400
- 0.021755080670118332,
401
- 0.02026906982064247,
402
- 0.0245220847427845,
403
- 0.026824040338397026,
404
- 0.017761364579200745,
405
- 0.02075556106865406,
406
- 0.02614957094192505,
407
- 0.016534019261598587,
408
- 0.02052861452102661,
409
- 0.02042551152408123,
410
- 0.013430889695882797,
411
- 0.028531325981020927,
412
- 0.029604997485876083,
413
- 0.027974452823400497,
414
- 0.032148949801921844,
415
- 0.02594580501317978,
416
- 0.01209304854273796,
417
- 0.013681085780262947,
418
- 0.0205619428306818,
419
- 0.02384582720696926,
420
- 0.029620153829455376,
421
- 0.019933151081204414,
422
- 0.01943730190396309,
423
- 0.010464150458574295,
424
- 0.010772374458611012,
425
- 0.015493839979171753,
426
- 0.019910695031285286,
427
- 0.012339326553046703,
428
- 0.018018856644630432,
429
- 0.01850241795182228,
430
- 0.03618703410029411,
431
- 0.038766246289014816,
432
- 0.018191780894994736,
433
- 0.01739019714295864
434
- ],
435
- "denoise_loss": [],
436
- "ortho_loss": [
437
- 0.21182507276535034,
438
- 0.08677027374505997,
439
- 0.0639583095908165,
440
- 0.049765028059482574,
441
- 0.04796711727976799,
442
- 0.04935513809323311,
443
- 0.06076208874583244,
444
- 0.0743061751127243,
445
- 0.0790928527712822,
446
- 0.08323631435632706,
447
- 0.08230259269475937,
448
- 0.08976364135742188,
449
- 0.09978095442056656,
450
- 0.10736638307571411,
451
- 0.10517165064811707,
452
- 0.10643617808818817,
453
- 0.10798347741365433,
454
- 0.10828098654747009,
455
- 0.11405002325773239,
456
- 0.12065489590167999,
457
- 0.12608422338962555,
458
- 0.1312580555677414,
459
- 0.13284286856651306,
460
- 0.13367591798305511,
461
- 0.135928213596344,
462
- 0.13187600672245026,
463
- 0.12774690985679626,
464
- 0.13383778929710388,
465
- 0.12453404814004898,
466
- 0.12623989582061768,
467
- 0.12295584380626678,
468
- 0.12713001668453217,
469
- 0.1253580003976822,
470
- 0.12130653113126755,
471
- 0.12213171273469925,
472
- 0.13524377346038818,
473
- 0.13765917718410492,
474
- 0.13814543187618256,
475
- 0.13984543085098267,
476
- 0.14626188576221466,
477
- 0.14723531901836395,
478
- 0.14748001098632812,
479
- 0.14777269959449768,
480
- 0.1560324877500534,
481
- 0.15685218572616577,
482
- 0.16409175097942352,
483
- 0.16843700408935547,
484
- 0.16872097551822662,
485
- 0.16776931285858154,
486
- 0.16298750042915344,
487
- 0.16492323577404022,
488
- 0.16564075648784637,
489
- 0.17178279161453247,
490
- 0.17316776514053345,
491
- 0.17469987273216248,
492
- 0.1731606125831604,
493
- 0.17828720808029175,
494
- 0.1789001077413559,
495
- 0.18193966150283813,
496
- 0.18010179698467255,
497
- 0.18138062953948975,
498
- 0.18183080852031708,
499
- 0.18499328196048737,
500
- 0.18659844994544983,
501
- 0.18622401356697083,
502
- 0.19045618176460266,
503
- 0.1923171579837799,
504
- 0.1990203708410263,
505
- 0.2016829252243042,
506
- 0.20334956049919128
507
- ],
508
- "lr": [
509
- 7.840000000000001e-05,
510
- 8e-05,
511
- 8e-05,
512
- 8e-05,
513
- 8e-05,
514
- 8e-05,
515
- 8e-05,
516
- 8e-05,
517
- 8e-05,
518
- 8e-05,
519
- 8e-05,
520
- 8e-05,
521
- 8e-05,
522
- 8e-05,
523
- 8e-05,
524
- 8e-05,
525
- 8e-05,
526
- 8e-05,
527
- 8e-05,
528
- 8e-05,
529
- 8e-05,
530
- 8e-05,
531
- 8e-05,
532
- 8e-05,
533
- 8e-05,
534
- 8e-05,
535
- 8e-05,
536
- 8e-05,
537
- 8e-05,
538
- 8e-05,
539
- 8e-05,
540
- 8e-05,
541
- 8e-05,
542
- 8e-05,
543
- 8e-05,
544
- 8e-05,
545
- 8e-05,
546
- 8e-05,
547
- 8e-05,
548
- 8e-05,
549
- 8e-05,
550
- 8e-05,
551
- 7.864766839378239e-05,
552
- 7.63160621761658e-05,
553
- 7.398445595854923e-05,
554
- 7.165284974093265e-05,
555
- 6.932124352331606e-05,
556
- 6.69896373056995e-05,
557
- 6.465803108808292e-05,
558
- 6.041450777202072e-05,
559
- 5.8082901554404154e-05,
560
- 5.5751295336787566e-05,
561
- 5.3419689119171e-05,
562
- 5.108808290155441e-05,
563
- 4.8756476683937825e-05,
564
- 4.642487046632125e-05,
565
- 4.218134715025906e-05,
566
- 3.98497409326425e-05,
567
- 3.7518134715025914e-05,
568
- 3.518652849740933e-05,
569
- 3.285492227979275e-05,
570
- 3.0523316062176166e-05,
571
- 2.8191709844559595e-05,
572
- 2.3948186528497416e-05,
573
- 2.1616580310880825e-05,
574
- 1.9284974093264255e-05,
575
- 1.6953367875647667e-05,
576
- 1.4621761658031097e-05,
577
- 1.2290155440414508e-05,
578
- 9.958549222797919e-06
579
- ],
580
- "emb_lr": [],
581
- "eval_step": [
582
- 350,
583
- 741,
584
- 1132,
585
- 1523,
586
- 1914,
587
- 2305,
588
- 2696,
589
- 3087,
590
- 3478,
591
- 3869
592
- ],
593
- "eval_accuracy": [
594
- 0.03,
595
- 0.03,
596
- 0.53,
597
- 0.7,
598
- 0.88,
599
- 0.92,
600
- 0.96,
601
- 0.92,
602
- 0.97,
603
- 0.96
604
- ]
605
- },
606
- "final_accuracy": 0.95,
607
- "sft_eval": {
608
- "config": {
609
- "ops": "add_sub",
610
- "K": null,
611
- "mode": "sft",
612
- "n_digits": 6,
613
- "n_per_split": 50
614
- },
615
- "splits": {
616
- "add_S0": {
617
- "full_accuracy": 0.76,
618
- "n_examples": 50,
619
- "per_subtask": {
620
- "SA": {
621
- "accuracy": 0.9661016949152542,
622
- "count": 295
623
- },
624
- "SS": {
625
- "accuracy": 0.9454545454545454,
626
- "count": 55
627
- }
628
- }
629
- },
630
- "add_S1": {
631
- "full_accuracy": 0.7,
632
- "n_examples": 50,
633
- "per_subtask": {
634
- "SA": {
635
- "accuracy": 0.9761904761904762,
636
- "count": 126
637
- },
638
- "SC": {
639
- "accuracy": 0.9746835443037974,
640
- "count": 79
641
- },
642
- "SS": {
643
- "accuracy": 0.9523809523809523,
644
- "count": 21
645
- },
646
- "UC": {
647
- "accuracy": 0.9274193548387096,
648
- "count": 124
649
- }
650
- }
651
- },
652
- "add_S2": {
653
- "full_accuracy": 0.4,
654
- "n_examples": 50,
655
- "per_subtask": {
656
- "SA": {
657
- "accuracy": 0.9466666666666667,
658
- "count": 75
659
- },
660
- "SC": {
661
- "accuracy": 0.8548387096774194,
662
- "count": 62
663
- },
664
- "SS": {
665
- "accuracy": 0.717948717948718,
666
- "count": 39
667
- },
668
- "UC": {
669
- "accuracy": 0.8378378378378378,
670
- "count": 111
671
- },
672
- "US": {
673
- "accuracy": 0.9365079365079365,
674
- "count": 63
675
- }
676
- }
677
- },
678
- "add_S3": {
679
- "full_accuracy": 0.28,
680
- "n_examples": 50,
681
- "per_subtask": {
682
- "SA": {
683
- "accuracy": 0.9666666666666667,
684
- "count": 60
685
- },
686
- "SC": {
687
- "accuracy": 0.8596491228070176,
688
- "count": 57
689
- },
690
- "SS": {
691
- "accuracy": 0.9473684210526315,
692
- "count": 19
693
- },
694
- "UC": {
695
- "accuracy": 0.7980769230769231,
696
- "count": 104
697
- },
698
- "US": {
699
- "accuracy": 0.8090909090909091,
700
- "count": 110
701
- }
702
- }
703
- },
704
- "add_S4": {
705
- "full_accuracy": 0.4,
706
- "n_examples": 50,
707
- "per_subtask": {
708
- "SA": {
709
- "accuracy": 1.0,
710
- "count": 48
711
- },
712
- "SC": {
713
- "accuracy": 0.9423076923076923,
714
- "count": 52
715
- },
716
- "SS": {
717
- "accuracy": 0.8571428571428571,
718
- "count": 7
719
- },
720
- "UC": {
721
- "accuracy": 0.7528089887640449,
722
- "count": 89
723
- },
724
- "US": {
725
- "accuracy": 0.7857142857142857,
726
- "count": 154
727
- }
728
- }
729
- },
730
- "add_S5": {
731
- "full_accuracy": 0.36,
732
- "n_examples": 50,
733
- "per_subtask": {
734
- "SA": {
735
- "accuracy": 1.0,
736
- "count": 50
737
- },
738
- "SC": {
739
- "accuracy": 1.0,
740
- "count": 50
741
- },
742
- "UC": {
743
- "accuracy": 0.52,
744
- "count": 50
745
- },
746
- "US": {
747
- "accuracy": 0.63,
748
- "count": 200
749
- }
750
- }
751
- },
752
- "add_S6": {
753
- "full_accuracy": 0.66,
754
- "n_examples": 50,
755
- "per_subtask": {
756
- "SC": {
757
- "accuracy": 1.0,
758
- "count": 50
759
- },
760
- "UC": {
761
- "accuracy": 0.76,
762
- "count": 50
763
- },
764
- "US": {
765
- "accuracy": 0.772,
766
- "count": 250
767
- }
768
- }
769
- },
770
- "add_random": {
771
- "full_accuracy": 0.74,
772
- "n_examples": 200,
773
- "per_subtask": {
774
- "SA": {
775
- "accuracy": 0.9675174013921114,
776
- "count": 431
777
- },
778
- "SC": {
779
- "accuracy": 0.9525316455696202,
780
- "count": 316
781
- },
782
- "SS": {
783
- "accuracy": 0.9487179487179487,
784
- "count": 39
785
- },
786
- "UC": {
787
- "accuracy": 0.9553571428571429,
788
- "count": 560
789
- },
790
- "US": {
791
- "accuracy": 0.9444444444444444,
792
- "count": 54
793
- }
794
- }
795
- },
796
- "add_C3": {
797
- "full_accuracy": 0.6,
798
- "n_examples": 50,
799
- "per_subtask": {
800
- "SA": {
801
- "accuracy": 0.9933333333333333,
802
- "count": 150
803
- },
804
- "SC": {
805
- "accuracy": 1.0,
806
- "count": 50
807
- },
808
- "UC": {
809
- "accuracy": 0.7692307692307693,
810
- "count": 104
811
- },
812
- "US": {
813
- "accuracy": 0.9565217391304348,
814
- "count": 46
815
- }
816
- }
817
- },
818
- "add_C4": {
819
- "full_accuracy": 0.52,
820
- "n_examples": 50,
821
- "per_subtask": {
822
- "SA": {
823
- "accuracy": 0.99,
824
- "count": 100
825
- },
826
- "SC": {
827
- "accuracy": 0.98,
828
- "count": 50
829
- },
830
- "UC": {
831
- "accuracy": 0.8373983739837398,
832
- "count": 123
833
- },
834
- "US": {
835
- "accuracy": 0.7662337662337663,
836
- "count": 77
837
- }
838
- }
839
- },
840
- "add_C5": {
841
- "full_accuracy": 0.4,
842
- "n_examples": 50,
843
- "per_subtask": {
844
- "SA": {
845
- "accuracy": 1.0,
846
- "count": 50
847
- },
848
- "SC": {
849
- "accuracy": 0.94,
850
- "count": 50
851
- },
852
- "UC": {
853
- "accuracy": 0.8181818181818182,
854
- "count": 154
855
- },
856
- "US": {
857
- "accuracy": 0.78125,
858
- "count": 96
859
- }
860
- }
861
- },
862
- "add_C6": {
863
- "full_accuracy": 0.5,
864
- "n_examples": 50,
865
- "per_subtask": {
866
- "SC": {
867
- "accuracy": 1.0,
868
- "count": 50
869
- },
870
- "UC": {
871
- "accuracy": 0.8846153846153846,
872
- "count": 182
873
- },
874
- "US": {
875
- "accuracy": 0.8050847457627118,
876
- "count": 118
877
- }
878
- }
879
- },
880
- "sub_M0": {
881
- "full_accuracy": 0.96,
882
- "n_examples": 50,
883
- "per_subtask": {
884
- "MD": {
885
- "accuracy": 0.9931972789115646,
886
- "count": 294
887
- },
888
- "ME": {
889
- "accuracy": 1.0,
890
- "count": 56
891
- }
892
- }
893
- },
894
- "sub_M1": {
895
- "full_accuracy": 0.86,
896
- "n_examples": 50,
897
- "per_subtask": {
898
- "MD": {
899
- "accuracy": 0.986013986013986,
900
- "count": 143
901
- },
902
- "MB": {
903
- "accuracy": 1.0,
904
- "count": 69
905
- },
906
- "ME": {
907
- "accuracy": 1.0,
908
- "count": 15
909
- },
910
- "UB": {
911
- "accuracy": 0.959349593495935,
912
- "count": 123
913
- }
914
- }
915
- },
916
- "sub_M2": {
917
- "full_accuracy": 0.3,
918
- "n_examples": 50,
919
- "per_subtask": {
920
- "MD": {
921
- "accuracy": 1.0,
922
- "count": 108
923
- },
924
- "MB": {
925
- "accuracy": 0.9807692307692307,
926
- "count": 52
927
- },
928
- "ME": {
929
- "accuracy": 1.0,
930
- "count": 52
931
- },
932
- "UB": {
933
- "accuracy": 0.5862068965517241,
934
- "count": 87
935
- },
936
- "UD": {
937
- "accuracy": 0.9803921568627451,
938
- "count": 51
939
- }
940
- }
941
- },
942
- "sub_M3": {
943
- "full_accuracy": 0.1,
944
- "n_examples": 50,
945
- "per_subtask": {
946
- "MD": {
947
- "accuracy": 1.0,
948
- "count": 94
949
- },
950
- "MB": {
951
- "accuracy": 0.9411764705882353,
952
- "count": 51
953
- },
954
- "ME": {
955
- "accuracy": 1.0,
956
- "count": 25
957
- },
958
- "UB": {
959
- "accuracy": 0.46153846153846156,
960
- "count": 78
961
- },
962
- "UD": {
963
- "accuracy": 0.5882352941176471,
964
- "count": 102
965
- }
966
- }
967
- },
968
- "sub_M4": {
969
- "full_accuracy": 0.04,
970
- "n_examples": 50,
971
- "per_subtask": {
972
- "MD": {
973
- "accuracy": 1.0,
974
- "count": 100
975
- },
976
- "MB": {
977
- "accuracy": 0.98,
978
- "count": 50
979
- },
980
- "UB": {
981
- "accuracy": 0.22,
982
- "count": 50
983
- },
984
- "UD": {
985
- "accuracy": 0.34,
986
- "count": 150
987
- }
988
- }
989
- },
990
- "sub_M5": {
991
- "full_accuracy": 0.0,
992
- "n_examples": 50,
993
- "per_subtask": {
994
- "MD": {
995
- "accuracy": 1.0,
996
- "count": 50
997
- },
998
- "MB": {
999
- "accuracy": 1.0,
1000
- "count": 50
1001
- },
1002
- "UB": {
1003
- "accuracy": 0.26,
1004
- "count": 50
1005
- },
1006
- "UD": {
1007
- "accuracy": 0.275,
1008
- "count": 200
1009
- }
1010
- }
1011
- },
1012
- "sub_random": {
1013
- "full_accuracy": 0.745,
1014
- "n_examples": 200,
1015
- "per_subtask": {
1016
- "MD": {
1017
- "accuracy": 0.9948979591836735,
1018
- "count": 588
1019
- },
1020
- "MB": {
1021
- "accuracy": 0.9738805970149254,
1022
- "count": 268
1023
- },
1024
- "ME": {
1025
- "accuracy": 0.9666666666666667,
1026
- "count": 60
1027
- },
1028
- "UB": {
1029
- "accuracy": 0.8903803131991052,
1030
- "count": 447
1031
- },
1032
- "UD": {
1033
- "accuracy": 0.918918918918919,
1034
- "count": 37
1035
- }
1036
- }
1037
- },
1038
- "sub_B3": {
1039
- "full_accuracy": 0.48,
1040
- "n_examples": 50,
1041
- "per_subtask": {
1042
- "MD": {
1043
- "accuracy": 1.0,
1044
- "count": 150
1045
- },
1046
- "MB": {
1047
- "accuracy": 0.96,
1048
- "count": 50
1049
- },
1050
- "UB": {
1051
- "accuracy": 0.7663551401869159,
1052
- "count": 107
1053
- },
1054
- "UD": {
1055
- "accuracy": 0.8604651162790697,
1056
- "count": 43
1057
- }
1058
- }
1059
- },
1060
- "sub_B4": {
1061
- "full_accuracy": 0.3,
1062
- "n_examples": 50,
1063
- "per_subtask": {
1064
- "MD": {
1065
- "accuracy": 1.0,
1066
- "count": 100
1067
- },
1068
- "MB": {
1069
- "accuracy": 1.0,
1070
- "count": 50
1071
- },
1072
- "UB": {
1073
- "accuracy": 0.7192982456140351,
1074
- "count": 114
1075
- },
1076
- "UD": {
1077
- "accuracy": 0.627906976744186,
1078
- "count": 86
1079
- }
1080
- }
1081
- },
1082
- "sub_B5": {
1083
- "full_accuracy": 0.22,
1084
- "n_examples": 50,
1085
- "per_subtask": {
1086
- "MD": {
1087
- "accuracy": 1.0,
1088
- "count": 50
1089
- },
1090
- "MB": {
1091
- "accuracy": 0.98,
1092
- "count": 50
1093
- },
1094
- "UB": {
1095
- "accuracy": 0.7124183006535948,
1096
- "count": 153
1097
- },
1098
- "UD": {
1099
- "accuracy": 0.6391752577319587,
1100
- "count": 97
1101
- }
1102
- }
1103
- }
1104
- },
1105
- "summary": {
1106
- "overall_accuracy": 0.5278571428571428,
1107
- "total_examples": 1400,
1108
- "n_splits": 22
1109
- }
1110
- },
1111
- "sorl_eval": {
1112
- "config": {
1113
- "ops": "add_sub",
1114
- "K": 4,
1115
- "mode": "sorl",
1116
- "n_digits": 6,
1117
- "n_per_split": 50
1118
- },
1119
- "splits": {
1120
- "add_S0": {
1121
- "full_accuracy": 1.0,
1122
- "n_examples": 50,
1123
- "per_subtask": {
1124
- "SA": {
1125
- "accuracy": 1.0,
1126
- "count": 295
1127
- },
1128
- "SS": {
1129
- "accuracy": 1.0,
1130
- "count": 55
1131
- }
1132
- }
1133
- },
1134
- "add_S1": {
1135
- "full_accuracy": 1.0,
1136
- "n_examples": 50,
1137
- "per_subtask": {
1138
- "SA": {
1139
- "accuracy": 1.0,
1140
- "count": 126
1141
- },
1142
- "SC": {
1143
- "accuracy": 1.0,
1144
- "count": 79
1145
- },
1146
- "SS": {
1147
- "accuracy": 1.0,
1148
- "count": 21
1149
- },
1150
- "UC": {
1151
- "accuracy": 1.0,
1152
- "count": 124
1153
- }
1154
- }
1155
- },
1156
- "add_S2": {
1157
- "full_accuracy": 1.0,
1158
- "n_examples": 50,
1159
- "per_subtask": {
1160
- "SA": {
1161
- "accuracy": 1.0,
1162
- "count": 75
1163
- },
1164
- "SC": {
1165
- "accuracy": 1.0,
1166
- "count": 62
1167
- },
1168
- "SS": {
1169
- "accuracy": 1.0,
1170
- "count": 39
1171
- },
1172
- "UC": {
1173
- "accuracy": 1.0,
1174
- "count": 111
1175
- },
1176
- "US": {
1177
- "accuracy": 1.0,
1178
- "count": 63
1179
- }
1180
- }
1181
- },
1182
- "add_S3": {
1183
- "full_accuracy": 0.96,
1184
- "n_examples": 50,
1185
- "per_subtask": {
1186
- "SA": {
1187
- "accuracy": 1.0,
1188
- "count": 60
1189
- },
1190
- "SC": {
1191
- "accuracy": 1.0,
1192
- "count": 57
1193
- },
1194
- "SS": {
1195
- "accuracy": 1.0,
1196
- "count": 19
1197
- },
1198
- "UC": {
1199
- "accuracy": 0.9807692307692307,
1200
- "count": 104
1201
- },
1202
- "US": {
1203
- "accuracy": 1.0,
1204
- "count": 110
1205
- }
1206
- }
1207
- },
1208
- "add_S4": {
1209
- "full_accuracy": 0.96,
1210
- "n_examples": 50,
1211
- "per_subtask": {
1212
- "SA": {
1213
- "accuracy": 1.0,
1214
- "count": 48
1215
- },
1216
- "SC": {
1217
- "accuracy": 1.0,
1218
- "count": 52
1219
- },
1220
- "SS": {
1221
- "accuracy": 1.0,
1222
- "count": 7
1223
- },
1224
- "UC": {
1225
- "accuracy": 0.9775280898876404,
1226
- "count": 89
1227
- },
1228
- "US": {
1229
- "accuracy": 0.9935064935064936,
1230
- "count": 154
1231
- }
1232
- }
1233
- },
1234
- "add_S5": {
1235
- "full_accuracy": 0.68,
1236
- "n_examples": 50,
1237
- "per_subtask": {
1238
- "SA": {
1239
- "accuracy": 1.0,
1240
- "count": 50
1241
- },
1242
- "SC": {
1243
- "accuracy": 1.0,
1244
- "count": 50
1245
- },
1246
- "UC": {
1247
- "accuracy": 0.7,
1248
- "count": 50
1249
- },
1250
- "US": {
1251
- "accuracy": 0.97,
1252
- "count": 200
1253
- }
1254
- }
1255
- },
1256
- "add_S6": {
1257
- "full_accuracy": 0.8,
1258
- "n_examples": 50,
1259
- "per_subtask": {
1260
- "SC": {
1261
- "accuracy": 1.0,
1262
- "count": 50
1263
- },
1264
- "UC": {
1265
- "accuracy": 0.8,
1266
- "count": 50
1267
- },
1268
- "US": {
1269
- "accuracy": 0.944,
1270
- "count": 250
1271
- }
1272
- }
1273
- },
1274
- "add_random": {
1275
- "full_accuracy": 1.0,
1276
- "n_examples": 200,
1277
- "per_subtask": {
1278
- "SA": {
1279
- "accuracy": 1.0,
1280
- "count": 431
1281
- },
1282
- "SC": {
1283
- "accuracy": 1.0,
1284
- "count": 316
1285
- },
1286
- "SS": {
1287
- "accuracy": 1.0,
1288
- "count": 39
1289
- },
1290
- "UC": {
1291
- "accuracy": 1.0,
1292
- "count": 560
1293
- },
1294
- "US": {
1295
- "accuracy": 1.0,
1296
- "count": 54
1297
- }
1298
- }
1299
- },
1300
- "add_C3": {
1301
- "full_accuracy": 1.0,
1302
- "n_examples": 50,
1303
- "per_subtask": {
1304
- "SA": {
1305
- "accuracy": 1.0,
1306
- "count": 150
1307
- },
1308
- "SC": {
1309
- "accuracy": 1.0,
1310
- "count": 50
1311
- },
1312
- "UC": {
1313
- "accuracy": 1.0,
1314
- "count": 104
1315
- },
1316
- "US": {
1317
- "accuracy": 1.0,
1318
- "count": 46
1319
- }
1320
- }
1321
- },
1322
- "add_C4": {
1323
- "full_accuracy": 0.94,
1324
- "n_examples": 50,
1325
- "per_subtask": {
1326
- "SA": {
1327
- "accuracy": 1.0,
1328
- "count": 100
1329
- },
1330
- "SC": {
1331
- "accuracy": 1.0,
1332
- "count": 50
1333
- },
1334
- "UC": {
1335
- "accuracy": 0.975609756097561,
1336
- "count": 123
1337
- },
1338
- "US": {
1339
- "accuracy": 1.0,
1340
- "count": 77
1341
- }
1342
- }
1343
- },
1344
- "add_C5": {
1345
- "full_accuracy": 0.98,
1346
- "n_examples": 50,
1347
- "per_subtask": {
1348
- "SA": {
1349
- "accuracy": 1.0,
1350
- "count": 50
1351
- },
1352
- "SC": {
1353
- "accuracy": 1.0,
1354
- "count": 50
1355
- },
1356
- "UC": {
1357
- "accuracy": 0.9935064935064936,
1358
- "count": 154
1359
- },
1360
- "US": {
1361
- "accuracy": 1.0,
1362
- "count": 96
1363
- }
1364
- }
1365
- },
1366
- "add_C6": {
1367
- "full_accuracy": 0.94,
1368
- "n_examples": 50,
1369
- "per_subtask": {
1370
- "SC": {
1371
- "accuracy": 1.0,
1372
- "count": 50
1373
- },
1374
- "UC": {
1375
- "accuracy": 0.9835164835164835,
1376
- "count": 182
1377
- },
1378
- "US": {
1379
- "accuracy": 1.0,
1380
- "count": 118
1381
- }
1382
- }
1383
- },
1384
- "sub_M0": {
1385
- "full_accuracy": 1.0,
1386
- "n_examples": 50,
1387
- "per_subtask": {
1388
- "MD": {
1389
- "accuracy": 1.0,
1390
- "count": 294
1391
- },
1392
- "ME": {
1393
- "accuracy": 1.0,
1394
- "count": 56
1395
- }
1396
- }
1397
- },
1398
- "sub_M1": {
1399
- "full_accuracy": 1.0,
1400
- "n_examples": 50,
1401
- "per_subtask": {
1402
- "MD": {
1403
- "accuracy": 1.0,
1404
- "count": 143
1405
- },
1406
- "MB": {
1407
- "accuracy": 1.0,
1408
- "count": 69
1409
- },
1410
- "ME": {
1411
- "accuracy": 1.0,
1412
- "count": 15
1413
- },
1414
- "UB": {
1415
- "accuracy": 1.0,
1416
- "count": 123
1417
- }
1418
- }
1419
- },
1420
- "sub_M2": {
1421
- "full_accuracy": 1.0,
1422
- "n_examples": 50,
1423
- "per_subtask": {
1424
- "MD": {
1425
- "accuracy": 1.0,
1426
- "count": 108
1427
- },
1428
- "MB": {
1429
- "accuracy": 1.0,
1430
- "count": 52
1431
- },
1432
- "ME": {
1433
- "accuracy": 1.0,
1434
- "count": 52
1435
- },
1436
- "UB": {
1437
- "accuracy": 1.0,
1438
- "count": 87
1439
- },
1440
- "UD": {
1441
- "accuracy": 1.0,
1442
- "count": 51
1443
- }
1444
- }
1445
- },
1446
- "sub_M3": {
1447
- "full_accuracy": 0.98,
1448
- "n_examples": 50,
1449
- "per_subtask": {
1450
- "MD": {
1451
- "accuracy": 1.0,
1452
- "count": 94
1453
- },
1454
- "MB": {
1455
- "accuracy": 1.0,
1456
- "count": 51
1457
- },
1458
- "ME": {
1459
- "accuracy": 1.0,
1460
- "count": 25
1461
- },
1462
- "UB": {
1463
- "accuracy": 0.9871794871794872,
1464
- "count": 78
1465
- },
1466
- "UD": {
1467
- "accuracy": 1.0,
1468
- "count": 102
1469
- }
1470
- }
1471
- },
1472
- "sub_M4": {
1473
- "full_accuracy": 0.84,
1474
- "n_examples": 50,
1475
- "per_subtask": {
1476
- "MD": {
1477
- "accuracy": 1.0,
1478
- "count": 100
1479
- },
1480
- "MB": {
1481
- "accuracy": 1.0,
1482
- "count": 50
1483
- },
1484
- "UB": {
1485
- "accuracy": 0.84,
1486
- "count": 50
1487
- },
1488
- "UD": {
1489
- "accuracy": 0.9933333333333333,
1490
- "count": 150
1491
- }
1492
- }
1493
- },
1494
- "sub_M5": {
1495
- "full_accuracy": 0.72,
1496
- "n_examples": 50,
1497
- "per_subtask": {
1498
- "MD": {
1499
- "accuracy": 1.0,
1500
- "count": 50
1501
- },
1502
- "MB": {
1503
- "accuracy": 1.0,
1504
- "count": 50
1505
- },
1506
- "UB": {
1507
- "accuracy": 0.72,
1508
- "count": 50
1509
- },
1510
- "UD": {
1511
- "accuracy": 0.995,
1512
- "count": 200
1513
- }
1514
- }
1515
- },
1516
- "sub_random": {
1517
- "full_accuracy": 1.0,
1518
- "n_examples": 200,
1519
- "per_subtask": {
1520
- "MD": {
1521
- "accuracy": 1.0,
1522
- "count": 588
1523
- },
1524
- "MB": {
1525
- "accuracy": 1.0,
1526
- "count": 268
1527
- },
1528
- "ME": {
1529
- "accuracy": 1.0,
1530
- "count": 60
1531
- },
1532
- "UB": {
1533
- "accuracy": 1.0,
1534
- "count": 447
1535
- },
1536
- "UD": {
1537
- "accuracy": 1.0,
1538
- "count": 37
1539
- }
1540
- }
1541
- },
1542
- "sub_B3": {
1543
- "full_accuracy": 0.96,
1544
- "n_examples": 50,
1545
- "per_subtask": {
1546
- "MD": {
1547
- "accuracy": 1.0,
1548
- "count": 150
1549
- },
1550
- "MB": {
1551
- "accuracy": 1.0,
1552
- "count": 50
1553
- },
1554
- "UB": {
1555
- "accuracy": 0.9813084112149533,
1556
- "count": 107
1557
- },
1558
- "UD": {
1559
- "accuracy": 1.0,
1560
- "count": 43
1561
- }
1562
- }
1563
- },
1564
- "sub_B4": {
1565
- "full_accuracy": 0.92,
1566
- "n_examples": 50,
1567
- "per_subtask": {
1568
- "MD": {
1569
- "accuracy": 1.0,
1570
- "count": 100
1571
- },
1572
- "MB": {
1573
- "accuracy": 1.0,
1574
- "count": 50
1575
- },
1576
- "UB": {
1577
- "accuracy": 0.9649122807017544,
1578
- "count": 114
1579
- },
1580
- "UD": {
1581
- "accuracy": 1.0,
1582
- "count": 86
1583
- }
1584
- }
1585
- },
1586
- "sub_B5": {
1587
- "full_accuracy": 0.92,
1588
- "n_examples": 50,
1589
- "per_subtask": {
1590
- "MD": {
1591
- "accuracy": 1.0,
1592
- "count": 50
1593
- },
1594
- "MB": {
1595
- "accuracy": 1.0,
1596
- "count": 50
1597
- },
1598
- "UB": {
1599
- "accuracy": 0.9869281045751634,
1600
- "count": 153
1601
- },
1602
- "UD": {
1603
- "accuracy": 0.979381443298969,
1604
- "count": 97
1605
- }
1606
- }
1607
- }
1608
- },
1609
- "summary": {
1610
- "overall_accuracy": 0.95,
1611
- "total_examples": 1400,
1612
- "n_splits": 22
1613
- }
1614
- },
1615
- "sorl_overall_accuracy": 0.95,
1616
- "sft_overall_accuracy": 0.5278571428571428
1617
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
add_sub_sorl_v1_abs10_25K/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d40fff70ff6d0020577cd516415675f4669c580b180636378d076a80f6caede
3
- size 650303660
 
 
 
 
add_sub_sorl_v1_abs10_25K/train_config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "mode": "sorl",
3
- "ops": "add_sub",
4
- "n_digits": 6,
5
- "n_layer": 2,
6
- "n_head": 3,
7
- "n_embd": 510,
8
- "abs_vocab": 10,
9
- "K": 4,
10
- "alpha_info_gain": 10.0,
11
- "alpha_abs": 0.1,
12
- "alpha_soft_zipf": 1.0,
13
- "batch_size": 64,
14
- "num_epochs": 10,
15
- "dataset_size": 25000,
16
- "lr": 8e-05,
17
- "output_dir": "ckpt/sweep/as_sorl_abs10_K4_25K",
18
- "device": "cuda",
19
- "push_to_hub": true,
20
- "no_wandb": false,
21
- "n_params": 162499262,
22
- "run_name": "add_sub_sorl_v1_abs10_25K",
23
- "git_commit": "800625019270114adcda289bbd550c4f1109a514",
24
- "timestamp": "2026-04-12T02:37:37.086430+00:00",
25
- "tokenizer": "Qwen/Qwen3-0.6B",
26
- "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
- "dataset_config": "add_sub_6digit",
28
- "model_repo": "thoughtworks/arithmetic-sorl",
29
- "trainer_version": "v1",
30
- "wandb_run_id": "iblkjg9n",
31
- "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/iblkjg9n",
32
- "final_accuracy": 0.95,
33
- "sft_accuracy": 0.5278571428571428,
34
- "eval_method": "ArithmeticEvaluator"
35
- }