amirali1985 commited on
Commit
8a6cdf5
·
verified ·
1 Parent(s): dfd35ae

Upload add_sub_sorl_v1_abs10_K1_25K_1L3H510d

Browse files
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/metrics.json CHANGED
@@ -143,857 +143,857 @@
143
  7779
144
  ],
145
  "loss": [
146
- -2.59329891204834,
147
- 7.806329250335693,
148
- 4.664865016937256,
149
- 3.5807652473449707,
150
- 3.15908145904541,
151
- 2.915864944458008,
152
- 3.148441791534424,
153
- 0.36000847816467285,
154
- -1.757477879524231,
155
- -3.409620523452759,
156
- -5.0319366455078125,
157
- -7.095430374145508,
158
- -8.759235382080078,
159
- -8.767191886901855,
160
- -8.945306777954102,
161
- -5.980461597442627,
162
- -5.657682418823242,
163
- -5.555356502532959,
164
- -6.722297668457031,
165
- -4.748482704162598,
166
- -3.466064929962158,
167
- -0.8357032537460327,
168
- -0.1377798318862915,
169
- -0.3907341957092285,
170
- 0.29442957043647766,
171
- -0.10398596525192261,
172
- -0.4516690969467163,
173
- 0.37417611479759216,
174
- -0.44442543387413025,
175
- -0.3337244689464569,
176
- -0.25515031814575195,
177
- -0.3856278657913208,
178
- -0.22291865944862366,
179
- -0.17850244045257568,
180
- -0.4978151321411133,
181
- -0.4693702161312103,
182
- -0.44592294096946716,
183
- -0.2621464133262634,
184
- -0.382561594247818,
185
- -0.5315982699394226,
186
- -0.3435952961444855,
187
- -0.329256534576416,
188
- -0.7089931964874268,
189
- -0.6072415113449097,
190
- -0.5729288458824158,
191
- -0.4047969579696655,
192
- -0.4896378219127655,
193
- -0.2720121741294861,
194
- -0.518795907497406,
195
- -0.3424687385559082,
196
- -0.6721977591514587,
197
- -0.3405545949935913,
198
- -0.34829750657081604,
199
- -0.25543728470802307,
200
- -0.5398901700973511,
201
- -0.6598642468452454,
202
- -0.459503173828125,
203
- -0.5198010802268982,
204
- -0.38155192136764526,
205
- -0.5738978385925293,
206
- -0.6543811559677124,
207
- -0.6683095693588257,
208
- -0.37674030661582947,
209
- -0.2265310287475586,
210
- -0.8545238971710205,
211
- -0.2876709997653961,
212
- -0.9567681550979614,
213
- -0.35820356011390686,
214
- -0.499785840511322,
215
- -0.33761483430862427,
216
- -0.40017256140708923,
217
- -0.417964905500412,
218
- -0.2706613540649414,
219
- -0.4005718231201172,
220
- -0.5140082240104675,
221
- -0.22472795844078064,
222
- -0.6303619742393494,
223
- -0.380489319562912,
224
- -0.43577492237091064,
225
- 0.025171242654323578,
226
- -0.4277156889438629,
227
- -0.5149685144424438,
228
- -0.3809179961681366,
229
- -0.3976118862628937,
230
- -0.5983619689941406,
231
- -0.6481726765632629,
232
- -0.6451705098152161,
233
- -0.2953732907772064,
234
- -0.3128582835197449,
235
- -0.5169304013252258,
236
- -0.44698846340179443,
237
- -0.6919875144958496,
238
- -0.13136500120162964,
239
- -0.7428823709487915,
240
- -0.26386335492134094,
241
- -0.5457929968833923,
242
- -0.48326900601387024,
243
- -0.354233980178833,
244
- -0.3980136811733246,
245
- -0.4296197295188904,
246
- -0.43872061371803284,
247
- -0.5549288988113403,
248
- -0.5576112270355225,
249
- -0.4448431432247162,
250
- -0.6850564479827881,
251
- -0.08526279032230377,
252
- -0.49758049845695496,
253
- -0.528289794921875,
254
- -0.4233647882938385,
255
- -0.27047842741012573,
256
- -0.5958406925201416,
257
- -0.24503447115421295,
258
- -0.5884737968444824,
259
- -0.2966604232788086,
260
- -0.11186803877353668,
261
- -0.49717044830322266,
262
- -0.5715808868408203,
263
- -0.310215026140213,
264
- -0.2626461088657379,
265
- -0.3557012379169464,
266
- -0.4658515751361847,
267
- -0.07596707344055176,
268
- -0.22857414186000824,
269
- -0.4917992651462555,
270
- 0.018878456205129623,
271
- -0.4699482321739197,
272
- -0.19665612280368805,
273
- -0.45886844396591187,
274
- -0.5627478957176208,
275
- -0.020080439746379852,
276
- -0.28417420387268066,
277
- -0.10383808612823486,
278
- -0.4331938624382019,
279
- -0.027989462018013,
280
- -0.572080135345459,
281
- -0.3479749858379364,
282
- -0.3199583888053894,
283
- -0.14209412038326263,
284
- -0.3254658579826355,
285
- -0.3599850535392761
286
  ],
287
  "base_loss": [
288
- 10.196748733520508,
289
- 6.539699077606201,
290
- 3.8725855350494385,
291
- 2.0681710243225098,
292
- 1.9675366878509521,
293
- 1.9196069240570068,
294
- 1.8533780574798584,
295
- 1.951110601425171,
296
- 2.0042569637298584,
297
- 1.8366247415542603,
298
- 1.7919471263885498,
299
- 1.7820709943771362,
300
- 1.7885769605636597,
301
- 1.7231441736221313,
302
- 1.7234703302383423,
303
- 1.3017929792404175,
304
- 1.300842523574829,
305
- 1.2232086658477783,
306
- 1.3535493612289429,
307
- 1.1036676168441772,
308
- 0.9509031176567078,
309
- 0.5975162386894226,
310
- 0.47567373514175415,
311
- 0.5592564344406128,
312
- 0.518327534198761,
313
- 0.4585016071796417,
314
- 0.4952283203601837,
315
- 0.3824222981929779,
316
- 0.42844533920288086,
317
- 0.45068588852882385,
318
- 0.2935049831867218,
319
- 0.39543598890304565,
320
- 0.3400234878063202,
321
- 0.3020298182964325,
322
- 0.41195863485336304,
323
- 0.3548106253147125,
324
- 0.33770138025283813,
325
- 0.30280718207359314,
326
- 0.325740247964859,
327
- 0.3177395462989807,
328
- 0.28193262219429016,
329
- 0.2978305518627167,
330
- 0.3271159529685974,
331
- 0.33548837900161743,
332
- 0.30633482336997986,
333
- 0.27675923705101013,
334
- 0.21956755220890045,
335
- 0.29375362396240234,
336
- 0.2891789972782135,
337
- 0.32527607679367065,
338
- 0.30730611085891724,
339
- 0.2632617652416229,
340
- 0.25507208704948425,
341
- 0.2266814410686493,
342
- 0.263988196849823,
343
- 0.2839556336402893,
344
- 0.2557508945465088,
345
- 0.24259057641029358,
346
- 0.2714501917362213,
347
- 0.21652953326702118,
348
- 0.27574509382247925,
349
- 0.2669479250907898,
350
- 0.2079036980867386,
351
- 0.19827929139137268,
352
- 0.2644518315792084,
353
- 0.24134400486946106,
354
- 0.3041193187236786,
355
- 0.2472688853740692,
356
- 0.24810060858726501,
357
- 0.2032974809408188,
358
- 0.2518690824508667,
359
- 0.1975613385438919,
360
- 0.19218994677066803,
361
- 0.22987064719200134,
362
- 0.21874548494815826,
363
- 0.20414327085018158,
364
- 0.1903039664030075,
365
- 0.20824790000915527,
366
- 0.25246912240982056,
367
- 0.17222297191619873,
368
- 0.22310911118984222,
369
- 0.19762837886810303,
370
- 0.2397158443927765,
371
- 0.2757701277732849,
372
- 0.2096136063337326,
373
- 0.19665583968162537,
374
- 0.21887147426605225,
375
- 0.25790169835090637,
376
- 0.20052920281887054,
377
- 0.1768275797367096,
378
- 0.20910362899303436,
379
- 0.23822860419750214,
380
- 0.19321534037590027,
381
- 0.2246156632900238,
382
- 0.1836516112089157,
383
- 0.17393173277378082,
384
- 0.1797213852405548,
385
- 0.1975935846567154,
386
- 0.17399263381958008,
387
- 0.16129735112190247,
388
- 0.22429142892360687,
389
- 0.22638773918151855,
390
- 0.14325550198554993,
391
- 0.16796715557575226,
392
- 0.22135309875011444,
393
- 0.16441737115383148,
394
- 0.22248207032680511,
395
- 0.14299893379211426,
396
- 0.1808304786682129,
397
- 0.14688751101493835,
398
- 0.17729976773262024,
399
- 0.15668703615665436,
400
- 0.19968543946743011,
401
- 0.1618441343307495,
402
- 0.13713142275810242,
403
- 0.15012170374393463,
404
- 0.18270301818847656,
405
- 0.13032755255699158,
406
- 0.13019099831581116,
407
- 0.15042230486869812,
408
- 0.18148109316825867,
409
- 0.1746334582567215,
410
- 0.1649618148803711,
411
- 0.15130789577960968,
412
- 0.1465824693441391,
413
- 0.14087851345539093,
414
- 0.14302660524845123,
415
- 0.15380451083183289,
416
- 0.15758132934570312,
417
- 0.12044531852006912,
418
- 0.15550915896892548,
419
- 0.15038736164569855,
420
- 0.1784907430410385,
421
- 0.0905509814620018,
422
- 0.14896859228610992,
423
- 0.14195513725280762,
424
- 0.15365047752857208,
425
- 0.13856948912143707,
426
- 0.11941786110401154,
427
- 0.11631196737289429
428
  ],
429
  "info_loss": [
430
- -2.1446304321289062,
431
- -0.32575225830078125,
432
- -0.13529229164123535,
433
- -0.0395050048828125,
434
- -0.06857085227966309,
435
- -0.0878446102142334,
436
- -0.05781435966491699,
437
- -0.3329533338546753,
438
- -0.5450968742370605,
439
- -0.6930801868438721,
440
- -0.8512004017829895,
441
- -1.0552778244018555,
442
- -1.2221078872680664,
443
- -1.2155961990356445,
444
- -1.2316124439239502,
445
- -0.8885735273361206,
446
- -0.8468337059020996,
447
- -0.8221979141235352,
448
- -0.9432716965675354,
449
- -0.7168070077896118,
450
- -0.5614590644836426,
451
- -0.24384543299674988,
452
- -0.15267881751060486,
453
- -0.17980945110321045,
454
- -0.10525521636009216,
455
- -0.13857501745224,
456
- -0.17203545570373535,
457
- -0.07036077976226807,
458
- -0.1279563307762146,
459
- -0.10729706287384033,
460
- -0.07537534832954407,
461
- -0.09179061651229858,
462
- -0.06815844774246216,
463
- -0.0584297776222229,
464
- -0.10185521841049194,
465
- -0.09001076221466064,
466
- -0.08385393023490906,
467
- -0.06273548305034637,
468
- -0.07663877308368683,
469
- -0.09079703688621521,
470
- -0.06756959855556488,
471
- -0.06724132597446442,
472
- -0.10811074078083038,
473
- -0.09930633008480072,
474
- -0.09256221354007721,
475
- -0.07206308841705322,
476
- -0.07471893727779388,
477
- -0.06005033850669861,
478
- -0.08448712527751923,
479
- -0.0711066722869873,
480
- -0.10168524086475372,
481
- -0.06479458510875702,
482
- -0.06599287688732147,
483
- -0.052838653326034546,
484
- -0.08438156545162201,
485
- -0.09715546667575836,
486
- -0.07497446238994598,
487
- -0.08055919408798218,
488
- -0.06902116537094116,
489
- -0.08227460086345673,
490
- -0.09579434990882874,
491
- -0.09616953134536743,
492
- -0.06199820339679718,
493
- -0.04617848992347717,
494
- -0.11456663906574249,
495
- -0.05601660907268524,
496
- -0.12871989607810974,
497
- -0.06328007578849792,
498
- -0.07753662765026093,
499
- -0.05645681917667389,
500
- -0.0682779997587204,
501
- -0.06401722133159637,
502
- -0.048893705010414124,
503
- -0.06537330150604248,
504
- -0.07560169696807861,
505
- -0.045035943388938904,
506
- -0.08440657705068588,
507
- -0.06127925217151642,
508
- -0.07112739980220795,
509
- -0.016663089394569397,
510
- -0.06749387085437775,
511
- -0.07337355613708496,
512
- -0.06437526643276215,
513
- -0.0701342225074768,
514
- -0.0825263261795044,
515
- -0.08599703758955002,
516
- -0.08863493800163269,
517
- -0.05710361897945404,
518
- -0.05359072983264923,
519
- -0.07135023176670074,
520
- -0.06739573180675507,
521
- -0.09494532644748688,
522
- -0.03425717353820801,
523
- -0.09863488376140594,
524
- -0.04627199470996857,
525
- -0.07348194718360901,
526
- -0.06861130893230438,
527
- -0.056984856724739075,
528
- -0.05861884355545044,
529
- -0.06040181219577789,
530
- -0.06799885630607605,
531
- -0.0795242041349411,
532
- -0.07134532183408737,
533
- -0.06287021934986115,
534
- -0.09188182651996613,
535
- -0.026334315538406372,
536
- -0.07318030297756195,
537
- -0.06862989813089371,
538
- -0.06157950311899185,
539
- -0.04337166249752045,
540
- -0.07880505174398422,
541
- -0.041538387537002563,
542
- -0.07995711266994476,
543
- -0.04752259701490402,
544
- -0.02607489377260208,
545
- -0.06598459929227829,
546
- -0.07677405327558517,
547
- -0.04549660533666611,
548
- -0.040807873010635376,
549
- -0.05197954922914505,
550
- -0.06574590504169464,
551
- -0.02596910297870636,
552
- -0.04056032747030258,
553
- -0.06558245420455933,
554
- -0.013828963041305542,
555
- -0.06204167753458023,
556
- -0.03477794677019119,
557
- -0.061917684972286224,
558
- -0.07310272008180618,
559
- -0.01492491364479065,
560
- -0.04469820111989975,
561
- -0.02633901685476303,
562
- -0.061958588659763336,
563
- -0.012616284191608429,
564
- -0.07271414995193481,
565
- -0.04950237274169922,
566
- -0.04833126813173294,
567
- -0.0290987491607666,
568
- -0.0452541783452034,
569
- -0.04826049506664276
570
  ],
571
  "abs_loss": [
572
- 2.2833845615386963,
573
- 2.1349236965179443,
574
- 1.878380298614502,
575
- 1.8563865423202515,
576
- 1.8354262113571167,
577
- 1.8503063917160034,
578
- 1.8424841165542603,
579
- 1.4707140922546387,
580
- 1.4383527040481567,
581
- 1.38104248046875,
582
- 1.3808784484863281,
583
- 1.3518728017807007,
584
- 1.3353508710861206,
585
- 1.2989592552185059,
586
- 1.2422951459884644,
587
- 1.1344904899597168,
588
- 1.0563679933547974,
589
- 1.0411640405654907,
590
- 1.009015679359436,
591
- 0.9455917477607727,
592
- 0.853651225566864,
593
- 0.6120807528495789,
594
- 0.611920177936554,
595
- 0.5180513858795166,
596
- 0.5024454593658447,
597
- 0.46294403076171875,
598
- 0.4781595766544342,
599
- 0.45887699723243713,
600
- 0.3136500120162964,
601
- 0.28113308548927307,
602
- 0.31741151213645935,
603
- 0.29155611991882324,
604
- 0.2602435350418091,
605
- 0.24715155363082886,
606
- 0.22644563019275665,
607
- 0.1930122822523117,
608
- 0.18013298511505127,
609
- 0.16504378616809845,
610
- 0.1325383186340332,
611
- 0.14479666948318481,
612
- 0.14696170389652252,
613
- 0.1302548050880432,
614
- 0.1305399388074875,
615
- 0.12119248509407043,
616
- 0.12439487129449844,
617
- 0.14270903170108795,
618
- 0.11244238168001175,
619
- 0.09046735614538193,
620
- 0.12737874686717987,
621
- 0.09776115417480469,
622
- 0.11427365988492966,
623
- 0.09447143226861954,
624
- 0.07885978370904922,
625
- 0.0879351869225502,
626
- 0.08410171419382095,
627
- 0.07964954525232315,
628
- 0.0896737352013588,
629
- 0.0699366107583046,
630
- 0.10011555999517441,
631
- 0.0949270948767662,
632
- 0.06844495981931686,
633
- 0.06861484050750732,
634
- 0.06629729270935059,
635
- 0.06975804269313812,
636
- 0.07258649915456772,
637
- 0.0637349858880043,
638
- 0.10536119341850281,
639
- 0.06596854329109192,
640
- 0.05788467451930046,
641
- 0.07418698817491531,
642
- 0.07811196893453598,
643
- 0.08665874600410461,
644
- 0.06501919031143188,
645
- 0.05476010963320732,
646
- 0.0577680766582489,
647
- 0.0607517808675766,
648
- 0.07218018174171448,
649
- 0.05529147386550903,
650
- 0.07476102560758591,
651
- 0.06581460684537888,
652
- 0.055221568793058395,
653
- 0.057920634746551514,
654
- 0.0611116886138916,
655
- 0.05538506433367729,
656
- 0.05207012966275215,
657
- 0.05038874223828316,
658
- 0.050429850816726685,
659
- 0.07674344629049301,
660
- 0.049894899129867554,
661
- 0.0522899366915226,
662
- 0.03763778880238533,
663
- 0.053936153650283813,
664
- 0.04713640734553337,
665
- 0.05666078254580498,
666
- 0.043657347559928894,
667
- 0.04682837426662445,
668
- 0.051996469497680664,
669
- 0.0656459853053093,
670
- 0.05155832692980766,
671
- 0.04402616247534752,
672
- 0.04095534607768059,
673
- 0.033731523901224136,
674
- 0.04032720625400543,
675
- 0.0352306105196476,
676
- 0.047472912818193436,
677
- 0.03717735409736633,
678
- 0.04514874890446663,
679
- 0.034755889326334,
680
- 0.05225687846541405,
681
- 0.06242929399013519,
682
- 0.06236463785171509,
683
- 0.03574991598725319,
684
- 0.03173217549920082,
685
- 0.04282982647418976,
686
- 0.047402460128068924,
687
- 0.04407930746674538,
688
- 0.050904735922813416,
689
- 0.03305795416235924,
690
- 0.049665722995996475,
691
- 0.05792101100087166,
692
- 0.048123691231012344,
693
- 0.0334598682820797,
694
- 0.02673262171447277,
695
- 0.03756292536854744,
696
- 0.04128307104110718,
697
- 0.03372636064887047,
698
- 0.04200674965977669,
699
- 0.03905116394162178,
700
- 0.041437190026044846,
701
- 0.04416963458061218,
702
- 0.03729776665568352,
703
- 0.04110940173268318,
704
- 0.03317956626415253,
705
- 0.03276738524436951,
706
- 0.03161971643567085,
707
- 0.026516050100326538,
708
- 0.04109155386686325,
709
- 0.03907163813710213,
710
- 0.021675271913409233,
711
- 0.04133140295743942
712
  ],
713
  "zipf_loss": [
714
- 8.427918434143066,
715
- 4.310660362243652,
716
- 1.9573644399642944,
717
- 1.7220056056976318,
718
- 1.6937105655670166,
719
- 1.6896733045578003,
720
- 1.6889588832855225,
721
- 1.5913598537445068,
722
- 1.5453985929489136,
723
- 1.546452283859253,
724
- 1.550032138824463,
725
- 1.54008948802948,
726
- 1.5397311449050903,
727
- 1.5357297658920288,
728
- 1.5231165885925293,
729
- 1.4900315999984741,
730
- 1.4041755199432373,
731
- 1.3392977714538574,
732
- 1.2559688091278076,
733
- 1.2213599681854248,
734
- 1.112257480621338,
735
- 0.9440267086029053,
736
- 0.8521426916122437,
737
- 0.7962987422943115,
738
- 0.7784097194671631,
739
- 0.776968240737915,
740
- 0.7256412506103516,
741
- 0.6494739055633545,
742
- 0.37532755732536316,
743
- 0.2604469954967499,
744
- 0.1733570396900177,
745
- 0.10768669098615646,
746
- 0.09261798858642578,
747
- 0.07905035465955734,
748
- 0.08613384515047073,
749
- 0.05662556737661362,
750
- 0.03690169379115105,
751
- 0.04589689522981644,
752
- 0.0448320172727108,
753
- 0.04415290430188179,
754
- 0.03547190874814987,
755
- 0.03230065479874611,
756
- 0.03194422274827957,
757
- 0.03821423277258873,
758
- 0.03391891345381737,
759
- 0.02480378933250904,
760
- 0.026739798486232758,
761
- 0.025690853595733643,
762
- 0.0241585411131382,
763
- 0.03354579582810402,
764
- 0.025921151041984558,
765
- 0.03468235209584236,
766
- 0.04867320880293846,
767
- 0.037474267184734344,
768
- 0.03152717649936676,
769
- 0.019769782200455666,
770
- 0.025523221120238304,
771
- 0.03620664402842522,
772
- 0.027197975665330887,
773
- 0.022825945168733597,
774
- 0.02097274735569954,
775
- 0.019576314836740494,
776
- 0.028708282858133316,
777
- 0.029998784884810448,
778
- 0.01943192072212696,
779
- 0.02477763406932354,
780
- 0.015775376930832863,
781
- 0.02073144167661667,
782
- 0.021691380068659782,
783
- 0.016237128525972366,
784
- 0.022927194833755493,
785
- 0.015980161726474762,
786
- 0.01958383247256279,
787
- 0.017814546823501587,
788
- 0.017486432567238808,
789
- 0.015413029119372368,
790
- 0.016181830316781998,
791
- 0.018526144325733185,
792
- 0.015553897246718407,
793
- 0.012997702695429325,
794
- 0.01859170012176037,
795
- 0.015346556901931763,
796
- 0.017007671296596527,
797
- 0.02242172136902809,
798
- 0.012080667540431023,
799
- 0.010103044100105762,
800
- 0.01726445183157921,
801
- 0.0100869070738554,
802
- 0.01753026247024536,
803
- 0.014515362679958344,
804
- 0.014101480133831501,
805
- 0.013843490742146969,
806
- 0.01327776163816452,
807
- 0.013184715062379837,
808
- 0.010839240625500679,
809
- 0.010411908850073814,
810
- 0.017923086881637573,
811
- 0.011456401087343693,
812
- 0.00902628991752863,
813
- 0.008698401972651482,
814
- 0.012880988419055939,
815
- 0.010552301071584225,
816
- 0.008553767576813698,
817
- 0.012368827126920223,
818
- 0.0076613957062363625,
819
- 0.009945258498191833,
820
- 0.00722561264410615,
821
- 0.011534623801708221,
822
- 0.006374058313667774,
823
- 0.010107763111591339,
824
- 0.0086736511439085,
825
- 0.010087384842336178,
826
- 0.008238681592047215,
827
- 0.012438456527888775,
828
- 0.007009214721620083,
829
- 0.008145928382873535,
830
- 0.008366182446479797,
831
- 0.011117709800601006,
832
- 0.01027502678334713,
833
- 0.0078798308968544,
834
- 0.005313994362950325,
835
- 0.005744512192904949,
836
- 0.009394066408276558,
837
- 0.008961104787886143,
838
- 0.006457311101257801,
839
- 0.006217380054295063,
840
- 0.0038960399106144905,
841
- 0.002598780207335949,
842
- 0.006554239895194769,
843
- 0.0043064141646027565,
844
- 0.003568857442587614,
845
- 0.005053797736763954,
846
- 0.004583287984132767,
847
- 0.0043456582352519035,
848
- 0.0029307929798960686,
849
- 0.0024419911205768585,
850
- 0.00559467077255249,
851
- 0.00641672033816576,
852
- 0.005490543320775032,
853
- 0.002174779772758484
854
  ],
855
  "denoise_loss": [],
856
  "ortho_loss": [
857
- 0.7092215418815613,
858
- 0.46743276715278625,
859
- 0.3644844591617584,
860
- 0.25443753600120544,
861
- 0.23383183777332306,
862
- 0.20093315839767456,
863
- 0.20074476301670074,
864
- 0.23113077878952026,
865
- 0.22607871890068054,
866
- 0.2465149462223053,
867
- 0.2687225341796875,
868
- 0.24249237775802612,
869
- 0.2577590048313141,
870
- 0.266035795211792,
871
- 0.2552751898765564,
872
- 0.2689358592033386,
873
- 0.269536554813385,
874
- 0.2752798795700073,
875
- 0.28143933415412903,
876
- 0.2838402986526489,
877
- 0.27934321761131287,
878
- 0.27792537212371826,
879
- 0.278600811958313,
880
- 0.2771250605583191,
881
- 0.2715357840061188,
882
- 0.269247442483902,
883
- 0.27251896262168884,
884
- 0.26525673270225525,
885
- 0.266304612159729,
886
- 0.2764833867549896,
887
- 0.26652833819389343,
888
- 0.2704552710056305,
889
- 0.2712864279747009,
890
- 0.2649822235107422,
891
- 0.27131664752960205,
892
- 0.26084455847740173,
893
- 0.2620142698287964,
894
- 0.258016437292099,
895
- 0.24880310893058777,
896
- 0.24396663904190063,
897
- 0.23598477244377136,
898
- 0.2386299967765808,
899
- 0.23005273938179016,
900
- 0.22665658593177795,
901
- 0.22420556843280792,
902
- 0.2224186211824417,
903
- 0.22427774965763092,
904
- 0.21841591596603394,
905
- 0.22097480297088623,
906
- 0.21527259051799774,
907
- 0.2151358276605606,
908
- 0.2146364003419876,
909
- 0.20944494009017944,
910
- 0.21144999563694,
911
- 0.20593683421611786,
912
- 0.20386382937431335,
913
- 0.19745765626430511,
914
- 0.19894321262836456,
915
- 0.19742432236671448,
916
- 0.19608557224273682,
917
- 0.18835635483264923,
918
- 0.18813860416412354,
919
- 0.18744516372680664,
920
- 0.18192793428897858,
921
- 0.1776714324951172,
922
- 0.18126605451107025,
923
- 0.18171174824237823,
924
- 0.18377967178821564,
925
- 0.18195678293704987,
926
- 0.18419532477855682,
927
- 0.1809682846069336,
928
- 0.17715398967266083,
929
- 0.1780245155096054,
930
- 0.17764145135879517,
931
- 0.17595191299915314,
932
- 0.17716969549655914,
933
- 0.17705252766609192,
934
- 0.17322954535484314,
935
- 0.1746148020029068,
936
- 0.17197285592556,
937
- 0.17283350229263306,
938
- 0.17129376530647278,
939
- 0.1728857010602951,
940
- 0.17178265750408173,
941
- 0.16882957518100739,
942
- 0.16890282928943634,
943
- 0.17142780125141144,
944
- 0.17491458356380463,
945
- 0.1703752726316452,
946
- 0.16940584778785706,
947
- 0.1734863966703415,
948
- 0.1734330803155899,
949
- 0.17536111176013947,
950
- 0.17814159393310547,
951
- 0.18004973232746124,
952
- 0.1793045997619629,
953
- 0.18151059746742249,
954
- 0.17672353982925415,
955
- 0.17982426285743713,
956
- 0.18007126450538635,
957
- 0.17900310456752777,
958
- 0.18146604299545288,
959
- 0.1807563602924347,
960
- 0.17732296884059906,
961
- 0.18004903197288513,
962
- 0.18047833442687988,
963
- 0.18000200390815735,
964
- 0.1829339563846588,
965
- 0.18131765723228455,
966
- 0.18021774291992188,
967
- 0.17930588126182556,
968
- 0.1804979145526886,
969
- 0.18043546378612518,
970
- 0.18063659965991974,
971
- 0.18216735124588013,
972
- 0.1802050769329071,
973
- 0.1801634132862091,
974
- 0.18029654026031494,
975
- 0.18130487203598022,
976
- 0.18213582038879395,
977
- 0.1824827641248703,
978
- 0.18340104818344116,
979
- 0.18511319160461426,
980
- 0.185243159532547,
981
- 0.18528737127780914,
982
- 0.18479134142398834,
983
- 0.18610772490501404,
984
- 0.18649695813655853,
985
- 0.18489259481430054,
986
- 0.18530786037445068,
987
- 0.18532846868038177,
988
- 0.1857357919216156,
989
- 0.18599264323711395,
990
- 0.18614186346530914,
991
- 0.18604278564453125,
992
- 0.1854981780052185,
993
- 0.18638573586940765,
994
- 0.18637895584106445,
995
- 0.18691058456897736,
996
- 0.18711933493614197
997
  ],
998
  "lr": [
999
  1.6752136752136756e-05,
@@ -1161,7 +1161,7 @@
1161
  7779
1162
  ],
1163
  "eval_accuracy": [
1164
- 0.01,
1165
  0.0,
1166
  0.0,
1167
  0.0,
@@ -1183,573 +1183,573 @@
1183
  0.0
1184
  ]
1185
  },
1186
- "final_accuracy": 0.5895833333333333,
1187
  "sft_eval": {
1188
  "config": {
1189
  "ops": "add_sub",
1190
  "K": null,
1191
  "mode": "sft",
1192
  "n_digits": 6,
1193
- "n_per_split": 50
1194
  },
1195
  "splits": {
1196
  "add_S0": {
1197
- "full_accuracy": 0.88,
1198
- "digit_accuracy": 0.9828571428571429,
1199
- "n_examples": 50,
1200
  "per_subtask": {
1201
  "SA": {
1202
- "accuracy": 0.9796610169491525,
1203
- "count": 295
1204
  },
1205
  "SS": {
1206
  "accuracy": 1.0,
1207
- "count": 55
1208
  }
1209
  }
1210
  },
1211
  "add_S1": {
1212
- "full_accuracy": 0.82,
1213
- "digit_accuracy": 0.9714285714285714,
1214
- "n_examples": 50,
1215
  "per_subtask": {
1216
  "SA": {
1217
- "accuracy": 0.9920634920634921,
1218
- "count": 126
1219
  },
1220
  "SC": {
1221
- "accuracy": 0.9873417721518988,
1222
- "count": 79
1223
  },
1224
  "SS": {
1225
- "accuracy": 0.9523809523809523,
1226
- "count": 21
1227
  },
1228
  "UC": {
1229
- "accuracy": 0.9435483870967742,
1230
- "count": 124
1231
  }
1232
  }
1233
  },
1234
  "add_S2": {
1235
- "full_accuracy": 0.66,
1236
- "digit_accuracy": 0.9371428571428572,
1237
- "n_examples": 50,
1238
  "per_subtask": {
1239
  "SA": {
1240
- "accuracy": 0.9733333333333334,
1241
- "count": 75
1242
  },
1243
  "SC": {
1244
- "accuracy": 0.9516129032258065,
1245
- "count": 62
1246
  },
1247
  "SS": {
1248
- "accuracy": 0.9230769230769231,
1249
- "count": 39
1250
  },
1251
  "UC": {
1252
- "accuracy": 0.8828828828828829,
1253
- "count": 111
1254
  },
1255
  "US": {
1256
- "accuracy": 0.9841269841269841,
1257
- "count": 63
1258
  }
1259
  }
1260
  },
1261
  "add_S3": {
1262
- "full_accuracy": 0.58,
1263
- "digit_accuracy": 0.9285714285714286,
1264
- "n_examples": 50,
1265
  "per_subtask": {
1266
  "SA": {
1267
  "accuracy": 1.0,
1268
- "count": 60
1269
  },
1270
  "SC": {
1271
- "accuracy": 0.9649122807017544,
1272
- "count": 57
1273
  },
1274
  "SS": {
1275
  "accuracy": 1.0,
1276
- "count": 19
1277
  },
1278
  "UC": {
1279
- "accuracy": 0.8076923076923077,
1280
- "count": 104
1281
  },
1282
  "US": {
1283
- "accuracy": 0.9727272727272728,
1284
- "count": 110
1285
  }
1286
  }
1287
  },
1288
  "add_S4": {
1289
- "full_accuracy": 0.46,
1290
- "digit_accuracy": 0.8628571428571429,
1291
- "n_examples": 50,
1292
  "per_subtask": {
1293
  "SA": {
1294
  "accuracy": 1.0,
1295
- "count": 48
1296
  },
1297
  "SC": {
1298
  "accuracy": 1.0,
1299
- "count": 52
1300
  },
1301
  "SS": {
1302
  "accuracy": 1.0,
1303
- "count": 7
1304
  },
1305
  "UC": {
1306
- "accuracy": 0.7528089887640449,
1307
- "count": 89
1308
  },
1309
  "US": {
1310
- "accuracy": 0.8311688311688312,
1311
- "count": 154
1312
  }
1313
  }
1314
  },
1315
  "add_S5": {
1316
- "full_accuracy": 0.28,
1317
- "digit_accuracy": 0.6857142857142857,
1318
- "n_examples": 50,
1319
  "per_subtask": {
1320
  "SA": {
1321
  "accuracy": 1.0,
1322
- "count": 50
1323
  },
1324
  "SC": {
1325
  "accuracy": 1.0,
1326
- "count": 50
1327
  },
1328
  "UC": {
1329
- "accuracy": 0.56,
1330
- "count": 50
1331
  },
1332
  "US": {
1333
- "accuracy": 0.56,
1334
- "count": 200
1335
  }
1336
  }
1337
  },
1338
  "add_S6": {
1339
- "full_accuracy": 0.64,
1340
- "digit_accuracy": 0.7914285714285715,
1341
- "n_examples": 50,
1342
  "per_subtask": {
1343
  "SC": {
1344
  "accuracy": 1.0,
1345
- "count": 50
1346
  },
1347
  "UC": {
1348
- "accuracy": 0.76,
1349
- "count": 50
1350
  },
1351
  "US": {
1352
- "accuracy": 0.756,
1353
- "count": 250
1354
  }
1355
  }
1356
  },
1357
  "add_random": {
1358
- "full_accuracy": 0.85,
1359
- "digit_accuracy": 0.9764285714285714,
1360
  "n_examples": 200,
1361
  "per_subtask": {
1362
  "SA": {
1363
- "accuracy": 0.988399071925754,
1364
- "count": 431
1365
  },
1366
  "SC": {
1367
- "accuracy": 0.9810126582278481,
1368
- "count": 316
1369
  },
1370
  "SS": {
1371
- "accuracy": 0.9743589743589743,
1372
- "count": 39
1373
  },
1374
  "UC": {
1375
- "accuracy": 0.9625,
1376
- "count": 560
1377
  },
1378
  "US": {
1379
- "accuracy": 1.0,
1380
- "count": 54
1381
  }
1382
  }
1383
  },
1384
  "add_C1": {
1385
- "full_accuracy": 0.84,
1386
- "digit_accuracy": 0.9771428571428571,
1387
- "n_examples": 50,
1388
  "per_subtask": {
1389
  "SA": {
1390
  "accuracy": 1.0,
1391
- "count": 250
1392
  },
1393
  "SC": {
1394
  "accuracy": 1.0,
1395
- "count": 50
1396
  },
1397
  "UC": {
1398
- "accuracy": 0.84,
1399
- "count": 50
1400
  }
1401
  }
1402
  },
1403
  "add_C2": {
1404
  "full_accuracy": 0.84,
1405
- "digit_accuracy": 0.9657142857142857,
1406
- "n_examples": 50,
1407
  "per_subtask": {
1408
  "SA": {
1409
  "accuracy": 1.0,
1410
- "count": 200
1411
  },
1412
  "SC": {
1413
  "accuracy": 1.0,
1414
- "count": 50
1415
  },
1416
  "UC": {
1417
- "accuracy": 0.9156626506024096,
1418
- "count": 83
1419
  },
1420
  "US": {
1421
- "accuracy": 0.7058823529411765,
1422
- "count": 17
1423
  }
1424
  }
1425
  },
1426
  "add_C3": {
1427
- "full_accuracy": 0.56,
1428
- "digit_accuracy": 0.9257142857142857,
1429
- "n_examples": 50,
1430
  "per_subtask": {
1431
  "SA": {
1432
  "accuracy": 1.0,
1433
- "count": 150
1434
  },
1435
  "SC": {
1436
  "accuracy": 1.0,
1437
- "count": 50
1438
  },
1439
  "UC": {
1440
- "accuracy": 0.85,
1441
- "count": 100
1442
  },
1443
  "US": {
1444
- "accuracy": 0.78,
1445
- "count": 50
1446
  }
1447
  }
1448
  },
1449
  "add_C4": {
1450
- "full_accuracy": 0.7,
1451
- "digit_accuracy": 0.9514285714285714,
1452
- "n_examples": 50,
1453
  "per_subtask": {
1454
  "SA": {
1455
  "accuracy": 1.0,
1456
- "count": 100
1457
  },
1458
  "SC": {
1459
  "accuracy": 1.0,
1460
- "count": 50
1461
  },
1462
  "UC": {
1463
- "accuracy": 0.9090909090909091,
1464
- "count": 132
1465
  },
1466
  "US": {
1467
- "accuracy": 0.9264705882352942,
1468
- "count": 68
1469
  }
1470
  }
1471
  },
1472
  "add_C5": {
1473
- "full_accuracy": 0.6,
1474
- "digit_accuracy": 0.9228571428571428,
1475
- "n_examples": 50,
1476
  "per_subtask": {
1477
  "SA": {
1478
  "accuracy": 1.0,
1479
- "count": 50
1480
  },
1481
  "SC": {
1482
  "accuracy": 1.0,
1483
- "count": 50
1484
  },
1485
  "UC": {
1486
- "accuracy": 0.9041095890410958,
1487
- "count": 146
1488
  },
1489
  "US": {
1490
- "accuracy": 0.875,
1491
- "count": 104
1492
  }
1493
  }
1494
  },
1495
  "add_C6": {
1496
- "full_accuracy": 0.56,
1497
- "digit_accuracy": 0.9142857142857143,
1498
- "n_examples": 50,
1499
  "per_subtask": {
1500
  "SC": {
1501
  "accuracy": 1.0,
1502
- "count": 50
1503
  },
1504
  "UC": {
1505
- "accuracy": 0.8994708994708994,
1506
- "count": 189
1507
  },
1508
  "US": {
1509
- "accuracy": 0.9009009009009009,
1510
- "count": 111
1511
  }
1512
  }
1513
  },
1514
  "sub_M0": {
1515
- "full_accuracy": 0.86,
1516
- "digit_accuracy": 0.98,
1517
- "n_examples": 50,
1518
  "per_subtask": {
1519
  "MD": {
1520
- "accuracy": 0.976897689768977,
1521
- "count": 303
1522
  },
1523
  "ME": {
1524
  "accuracy": 1.0,
1525
- "count": 47
1526
  }
1527
  }
1528
  },
1529
  "sub_M1": {
1530
- "full_accuracy": 0.72,
1531
- "digit_accuracy": 0.9571428571428572,
1532
- "n_examples": 50,
1533
  "per_subtask": {
1534
  "MD": {
1535
- "accuracy": 1.0,
1536
- "count": 141
1537
  },
1538
  "MB": {
1539
- "accuracy": 0.9722222222222222,
1540
- "count": 72
1541
  },
1542
  "ME": {
1543
- "accuracy": 0.8888888888888888,
1544
- "count": 18
1545
  },
1546
  "UB": {
1547
- "accuracy": 0.907563025210084,
1548
- "count": 119
1549
  }
1550
  }
1551
  },
1552
  "sub_M2": {
1553
- "full_accuracy": 0.3,
1554
- "digit_accuracy": 0.8714285714285714,
1555
- "n_examples": 50,
1556
  "per_subtask": {
1557
  "MD": {
1558
- "accuracy": 0.9732142857142857,
1559
- "count": 112
1560
  },
1561
  "MB": {
1562
- "accuracy": 0.9056603773584906,
1563
- "count": 53
1564
  },
1565
  "ME": {
1566
- "accuracy": 0.9574468085106383,
1567
- "count": 47
1568
  },
1569
  "UB": {
1570
- "accuracy": 0.6470588235294118,
1571
- "count": 85
1572
  },
1573
  "UD": {
1574
- "accuracy": 0.9056603773584906,
1575
- "count": 53
1576
  }
1577
  }
1578
  },
1579
  "sub_M3": {
1580
- "full_accuracy": 0.2,
1581
- "digit_accuracy": 0.8,
1582
- "n_examples": 50,
1583
  "per_subtask": {
1584
  "MD": {
1585
- "accuracy": 0.9896907216494846,
1586
- "count": 97
1587
  },
1588
  "MB": {
1589
- "accuracy": 0.9607843137254902,
1590
- "count": 51
1591
  },
1592
  "ME": {
1593
  "accuracy": 1.0,
1594
- "count": 27
1595
  },
1596
  "UB": {
1597
- "accuracy": 0.5405405405405406,
1598
- "count": 74
1599
  },
1600
  "UD": {
1601
- "accuracy": 0.6732673267326733,
1602
- "count": 101
1603
  }
1604
  }
1605
  },
1606
  "sub_M4": {
1607
- "full_accuracy": 0.0,
1608
- "digit_accuracy": 0.6428571428571429,
1609
- "n_examples": 50,
1610
  "per_subtask": {
1611
  "MD": {
1612
  "accuracy": 1.0,
1613
- "count": 100
1614
  },
1615
  "MB": {
1616
  "accuracy": 1.0,
1617
- "count": 50
1618
  },
1619
  "UB": {
1620
- "accuracy": 0.26,
1621
- "count": 50
1622
  },
1623
  "UD": {
1624
- "accuracy": 0.41333333333333333,
1625
- "count": 150
1626
  }
1627
  }
1628
  },
1629
  "sub_M5": {
1630
- "full_accuracy": 0.02,
1631
- "digit_accuracy": 0.44285714285714284,
1632
- "n_examples": 50,
1633
  "per_subtask": {
1634
  "MD": {
1635
  "accuracy": 1.0,
1636
- "count": 50
1637
  },
1638
  "MB": {
1639
  "accuracy": 1.0,
1640
- "count": 50
1641
  },
1642
  "UB": {
1643
- "accuracy": 0.32,
1644
- "count": 50
1645
  },
1646
  "UD": {
1647
- "accuracy": 0.195,
1648
- "count": 200
1649
  }
1650
  }
1651
  },
1652
  "sub_random": {
1653
- "full_accuracy": 0.765,
1654
- "digit_accuracy": 0.9628571428571429,
1655
  "n_examples": 200,
1656
  "per_subtask": {
1657
  "MD": {
1658
- "accuracy": 0.9894736842105263,
1659
- "count": 570
1660
  },
1661
  "MB": {
1662
- "accuracy": 0.9566787003610109,
1663
- "count": 277
1664
  },
1665
  "ME": {
1666
  "accuracy": 1.0,
1667
  "count": 53
1668
  },
1669
  "UB": {
1670
- "accuracy": 0.9299363057324841,
1671
- "count": 471
1672
  },
1673
  "UD": {
1674
- "accuracy": 0.9655172413793104,
1675
- "count": 29
1676
  }
1677
  }
1678
  },
1679
  "sub_B3": {
1680
- "full_accuracy": 0.48,
1681
- "digit_accuracy": 0.8971428571428571,
1682
- "n_examples": 50,
1683
  "per_subtask": {
1684
  "MD": {
1685
- "accuracy": 1.0,
1686
- "count": 150
1687
  },
1688
  "MB": {
1689
- "accuracy": 1.0,
1690
- "count": 50
1691
  },
1692
  "UB": {
1693
- "accuracy": 0.7623762376237624,
1694
- "count": 101
1695
  },
1696
  "UD": {
1697
- "accuracy": 0.7551020408163265,
1698
- "count": 49
1699
  }
1700
  }
1701
  },
1702
  "sub_B4": {
1703
- "full_accuracy": 0.24,
1704
- "digit_accuracy": 0.82,
1705
- "n_examples": 50,
1706
  "per_subtask": {
1707
  "MD": {
1708
- "accuracy": 1.0,
1709
- "count": 100
1710
  },
1711
  "MB": {
1712
- "accuracy": 1.0,
1713
- "count": 50
1714
  },
1715
  "UB": {
1716
- "accuracy": 0.6942148760330579,
1717
- "count": 121
1718
  },
1719
  "UD": {
1720
- "accuracy": 0.6708860759493671,
1721
- "count": 79
1722
  }
1723
  }
1724
  },
1725
  "sub_B5": {
1726
- "full_accuracy": 0.14,
1727
- "digit_accuracy": 0.7714285714285715,
1728
- "n_examples": 50,
1729
  "per_subtask": {
1730
  "MD": {
1731
  "accuracy": 1.0,
1732
- "count": 50
1733
  },
1734
  "MB": {
1735
  "accuracy": 1.0,
1736
- "count": 50
1737
  },
1738
  "UB": {
1739
- "accuracy": 0.6907894736842105,
1740
- "count": 152
1741
  },
1742
  "UD": {
1743
- "accuracy": 0.6632653061224489,
1744
- "count": 98
1745
  }
1746
  }
1747
  }
1748
  },
1749
  "summary": {
1750
- "overall_accuracy": 0.594,
1751
- "digit_accuracy": 0.8919047619047619,
1752
- "total_examples": 1500,
1753
  "n_splits": 24
1754
  }
1755
  },
@@ -1759,569 +1759,569 @@
1759
  "K": 1,
1760
  "mode": "sorl",
1761
  "n_digits": 6,
1762
- "n_per_split": 50
1763
  },
1764
  "splits": {
1765
  "add_S0": {
1766
- "full_accuracy": 0.94,
1767
- "digit_accuracy": 0.9914285714285714,
1768
- "n_examples": 50,
1769
  "per_subtask": {
1770
  "SA": {
1771
- "accuracy": 0.9898305084745763,
1772
- "count": 295
1773
  },
1774
  "SS": {
1775
- "accuracy": 1.0,
1776
- "count": 55
1777
  }
1778
  }
1779
  },
1780
  "add_S1": {
1781
- "full_accuracy": 0.9,
1782
- "digit_accuracy": 0.9857142857142858,
1783
- "n_examples": 50,
1784
  "per_subtask": {
1785
  "SA": {
1786
- "accuracy": 0.9920634920634921,
1787
- "count": 126
1788
  },
1789
  "SC": {
1790
- "accuracy": 0.9620253164556962,
1791
- "count": 79
1792
  },
1793
  "SS": {
1794
  "accuracy": 1.0,
1795
- "count": 21
1796
  },
1797
  "UC": {
1798
- "accuracy": 0.9919354838709677,
1799
- "count": 124
1800
  }
1801
  }
1802
  },
1803
  "add_S2": {
1804
- "full_accuracy": 0.7,
1805
- "digit_accuracy": 0.9457142857142857,
1806
- "n_examples": 50,
1807
  "per_subtask": {
1808
  "SA": {
1809
- "accuracy": 0.9866666666666667,
1810
- "count": 75
1811
  },
1812
  "SC": {
1813
- "accuracy": 0.9193548387096774,
1814
- "count": 62
1815
  },
1816
  "SS": {
1817
- "accuracy": 0.8974358974358975,
1818
- "count": 39
1819
  },
1820
  "UC": {
1821
- "accuracy": 0.918918918918919,
1822
- "count": 111
1823
  },
1824
  "US": {
1825
  "accuracy": 1.0,
1826
- "count": 63
1827
  }
1828
  }
1829
  },
1830
  "add_S3": {
1831
- "full_accuracy": 0.6,
1832
- "digit_accuracy": 0.9285714285714286,
1833
- "n_examples": 50,
1834
  "per_subtask": {
1835
  "SA": {
1836
- "accuracy": 0.9833333333333333,
1837
- "count": 60
1838
  },
1839
  "SC": {
1840
- "accuracy": 0.9824561403508771,
1841
- "count": 57
1842
  },
1843
  "SS": {
1844
- "accuracy": 0.9473684210526315,
1845
- "count": 19
1846
  },
1847
  "UC": {
1848
- "accuracy": 0.8269230769230769,
1849
- "count": 104
1850
  },
1851
  "US": {
1852
- "accuracy": 0.9636363636363636,
1853
- "count": 110
1854
  }
1855
  }
1856
  },
1857
  "add_S4": {
1858
- "full_accuracy": 0.68,
1859
- "digit_accuracy": 0.8971428571428571,
1860
- "n_examples": 50,
1861
  "per_subtask": {
1862
  "SA": {
1863
  "accuracy": 1.0,
1864
- "count": 48
1865
  },
1866
  "SC": {
1867
  "accuracy": 1.0,
1868
- "count": 52
1869
  },
1870
  "SS": {
1871
  "accuracy": 1.0,
1872
- "count": 7
1873
  },
1874
  "UC": {
1875
- "accuracy": 0.8651685393258427,
1876
- "count": 89
1877
  },
1878
  "US": {
1879
- "accuracy": 0.8441558441558441,
1880
- "count": 154
1881
  }
1882
  }
1883
  },
1884
  "add_S5": {
1885
- "full_accuracy": 0.4,
1886
- "digit_accuracy": 0.7514285714285714,
1887
- "n_examples": 50,
1888
  "per_subtask": {
1889
  "SA": {
1890
  "accuracy": 1.0,
1891
- "count": 50
1892
  },
1893
  "SC": {
1894
  "accuracy": 1.0,
1895
- "count": 50
1896
  },
1897
  "UC": {
1898
- "accuracy": 0.62,
1899
- "count": 50
1900
  },
1901
  "US": {
1902
- "accuracy": 0.66,
1903
- "count": 200
1904
  }
1905
  }
1906
  },
1907
  "add_S6": {
1908
- "full_accuracy": 0.48,
1909
- "digit_accuracy": 0.76,
1910
- "n_examples": 50,
1911
  "per_subtask": {
1912
  "SC": {
1913
  "accuracy": 1.0,
1914
- "count": 50
1915
  },
1916
  "UC": {
1917
- "accuracy": 0.64,
1918
- "count": 50
1919
  },
1920
  "US": {
1921
- "accuracy": 0.736,
1922
- "count": 250
1923
  }
1924
  }
1925
  },
1926
  "add_random": {
1927
- "full_accuracy": 0.9,
1928
- "digit_accuracy": 0.9842857142857143,
1929
  "n_examples": 200,
1930
  "per_subtask": {
1931
  "SA": {
1932
- "accuracy": 0.9953596287703016,
1933
- "count": 431
1934
  },
1935
  "SC": {
1936
- "accuracy": 0.990506329113924,
1937
- "count": 316
1938
  },
1939
  "SS": {
1940
- "accuracy": 0.9743589743589743,
1941
- "count": 39
1942
  },
1943
  "UC": {
1944
- "accuracy": 0.9732142857142857,
1945
- "count": 560
1946
  },
1947
  "US": {
1948
- "accuracy": 0.9814814814814815,
1949
- "count": 54
1950
  }
1951
  }
1952
  },
1953
  "add_C1": {
1954
- "full_accuracy": 0.96,
1955
- "digit_accuracy": 0.9942857142857143,
1956
- "n_examples": 50,
1957
  "per_subtask": {
1958
  "SA": {
1959
  "accuracy": 1.0,
1960
- "count": 250
1961
  },
1962
  "SC": {
1963
  "accuracy": 1.0,
1964
- "count": 50
1965
  },
1966
  "UC": {
1967
- "accuracy": 0.96,
1968
- "count": 50
1969
  }
1970
  }
1971
  },
1972
  "add_C2": {
1973
- "full_accuracy": 0.9,
1974
- "digit_accuracy": 0.98,
1975
- "n_examples": 50,
1976
  "per_subtask": {
1977
  "SA": {
1978
  "accuracy": 1.0,
1979
- "count": 200
1980
  },
1981
  "SC": {
1982
  "accuracy": 1.0,
1983
- "count": 50
1984
  },
1985
  "UC": {
1986
- "accuracy": 0.963855421686747,
1987
- "count": 83
1988
  },
1989
  "US": {
1990
- "accuracy": 0.7647058823529411,
1991
- "count": 17
1992
  }
1993
  }
1994
  },
1995
  "add_C3": {
1996
  "full_accuracy": 0.58,
1997
- "digit_accuracy": 0.9142857142857143,
1998
- "n_examples": 50,
1999
  "per_subtask": {
2000
  "SA": {
2001
  "accuracy": 1.0,
2002
- "count": 150
2003
  },
2004
  "SC": {
2005
  "accuracy": 1.0,
2006
- "count": 50
2007
  },
2008
  "UC": {
2009
- "accuracy": 0.85,
2010
- "count": 100
2011
  },
2012
  "US": {
2013
- "accuracy": 0.7,
2014
- "count": 50
2015
  }
2016
  }
2017
  },
2018
  "add_C4": {
2019
- "full_accuracy": 0.6,
2020
- "digit_accuracy": 0.9285714285714286,
2021
- "n_examples": 50,
2022
  "per_subtask": {
2023
  "SA": {
2024
  "accuracy": 1.0,
2025
- "count": 100
2026
  },
2027
  "SC": {
2028
  "accuracy": 1.0,
2029
- "count": 50
2030
  },
2031
  "UC": {
2032
- "accuracy": 0.8712121212121212,
2033
- "count": 132
2034
  },
2035
  "US": {
2036
- "accuracy": 0.8823529411764706,
2037
- "count": 68
2038
  }
2039
  }
2040
  },
2041
  "add_C5": {
2042
- "full_accuracy": 0.62,
2043
- "digit_accuracy": 0.9085714285714286,
2044
- "n_examples": 50,
2045
  "per_subtask": {
2046
  "SA": {
2047
  "accuracy": 1.0,
2048
- "count": 50
2049
  },
2050
  "SC": {
2051
  "accuracy": 1.0,
2052
- "count": 50
2053
  },
2054
  "UC": {
2055
- "accuracy": 0.8698630136986302,
2056
- "count": 146
2057
  },
2058
  "US": {
2059
- "accuracy": 0.875,
2060
- "count": 104
2061
  }
2062
  }
2063
  },
2064
  "add_C6": {
2065
- "full_accuracy": 0.62,
2066
- "digit_accuracy": 0.9171428571428571,
2067
- "n_examples": 50,
2068
  "per_subtask": {
2069
  "SC": {
2070
  "accuracy": 1.0,
2071
- "count": 50
2072
  },
2073
  "UC": {
2074
- "accuracy": 0.8994708994708994,
2075
- "count": 189
2076
  },
2077
  "US": {
2078
- "accuracy": 0.9099099099099099,
2079
- "count": 111
2080
  }
2081
  }
2082
  },
2083
  "sub_M0": {
2084
- "full_accuracy": 0.76,
2085
- "digit_accuracy": 0.96,
2086
- "n_examples": 50,
2087
  "per_subtask": {
2088
  "MD": {
2089
- "accuracy": 0.9570957095709571,
2090
- "count": 303
2091
  },
2092
  "ME": {
2093
- "accuracy": 0.9787234042553191,
2094
- "count": 47
2095
  }
2096
  }
2097
  },
2098
  "sub_M1": {
2099
- "full_accuracy": 0.82,
2100
- "digit_accuracy": 0.9657142857142857,
2101
- "n_examples": 50,
2102
  "per_subtask": {
2103
  "MD": {
2104
- "accuracy": 0.9858156028368794,
2105
- "count": 141
2106
  },
2107
  "MB": {
2108
  "accuracy": 0.9861111111111112,
2109
- "count": 72
2110
  },
2111
  "ME": {
2112
- "accuracy": 0.8888888888888888,
2113
- "count": 18
2114
  },
2115
  "UB": {
2116
- "accuracy": 0.9411764705882353,
2117
- "count": 119
2118
  }
2119
  }
2120
  },
2121
  "sub_M2": {
2122
- "full_accuracy": 0.32,
2123
- "digit_accuracy": 0.8828571428571429,
2124
- "n_examples": 50,
2125
  "per_subtask": {
2126
  "MD": {
2127
- "accuracy": 0.9464285714285714,
2128
- "count": 112
2129
  },
2130
  "MB": {
2131
- "accuracy": 0.9433962264150944,
2132
- "count": 53
2133
  },
2134
  "ME": {
2135
- "accuracy": 0.9787234042553191,
2136
- "count": 47
2137
  },
2138
  "UB": {
2139
- "accuracy": 0.6705882352941176,
2140
- "count": 85
2141
  },
2142
  "UD": {
2143
- "accuracy": 0.9433962264150944,
2144
- "count": 53
2145
  }
2146
  }
2147
  },
2148
  "sub_M3": {
2149
- "full_accuracy": 0.22,
2150
- "digit_accuracy": 0.7885714285714286,
2151
- "n_examples": 50,
2152
  "per_subtask": {
2153
  "MD": {
2154
- "accuracy": 0.9896907216494846,
2155
- "count": 97
2156
  },
2157
  "MB": {
2158
- "accuracy": 0.9607843137254902,
2159
- "count": 51
2160
  },
2161
  "ME": {
2162
- "accuracy": 0.8888888888888888,
2163
- "count": 27
2164
  },
2165
  "UB": {
2166
- "accuracy": 0.5405405405405406,
2167
- "count": 74
2168
  },
2169
  "UD": {
2170
- "accuracy": 0.6633663366336634,
2171
- "count": 101
2172
  }
2173
  }
2174
  },
2175
  "sub_M4": {
2176
- "full_accuracy": 0.08,
2177
- "digit_accuracy": 0.6914285714285714,
2178
- "n_examples": 50,
2179
  "per_subtask": {
2180
  "MD": {
2181
  "accuracy": 1.0,
2182
- "count": 100
2183
  },
2184
  "MB": {
2185
  "accuracy": 1.0,
2186
- "count": 50
2187
  },
2188
  "UB": {
2189
- "accuracy": 0.44,
2190
- "count": 50
2191
  },
2192
  "UD": {
2193
- "accuracy": 0.4666666666666667,
2194
- "count": 150
2195
  }
2196
  }
2197
  },
2198
  "sub_M5": {
2199
- "full_accuracy": 0.24,
2200
- "digit_accuracy": 0.6028571428571429,
2201
- "n_examples": 50,
2202
  "per_subtask": {
2203
  "MD": {
2204
  "accuracy": 1.0,
2205
- "count": 50
2206
  },
2207
  "MB": {
2208
  "accuracy": 1.0,
2209
- "count": 50
2210
  },
2211
  "UB": {
2212
- "accuracy": 0.56,
2213
- "count": 50
2214
  },
2215
  "UD": {
2216
- "accuracy": 0.415,
2217
- "count": 200
2218
  }
2219
  }
2220
  },
2221
  "sub_random": {
2222
- "full_accuracy": 0.825,
2223
- "digit_accuracy": 0.9735714285714285,
2224
  "n_examples": 200,
2225
  "per_subtask": {
2226
  "MD": {
2227
- "accuracy": 0.9894736842105263,
2228
- "count": 570
2229
  },
2230
  "MB": {
2231
- "accuracy": 0.9819494584837545,
2232
- "count": 277
2233
  },
2234
  "ME": {
2235
  "accuracy": 1.0,
2236
  "count": 53
2237
  },
2238
  "UB": {
2239
- "accuracy": 0.9490445859872612,
2240
- "count": 471
2241
  },
2242
  "UD": {
2243
- "accuracy": 0.9310344827586207,
2244
- "count": 29
2245
  }
2246
  }
2247
  },
2248
  "sub_B3": {
2249
- "full_accuracy": 0.46,
2250
- "digit_accuracy": 0.9,
2251
- "n_examples": 50,
2252
  "per_subtask": {
2253
  "MD": {
2254
  "accuracy": 0.9933333333333333,
2255
- "count": 150
2256
  },
2257
  "MB": {
2258
  "accuracy": 1.0,
2259
- "count": 50
2260
  },
2261
  "UB": {
2262
- "accuracy": 0.7722772277227723,
2263
- "count": 101
2264
  },
2265
  "UD": {
2266
- "accuracy": 0.7755102040816326,
2267
- "count": 49
2268
  }
2269
  }
2270
  },
2271
  "sub_B4": {
2272
- "full_accuracy": 0.32,
2273
- "digit_accuracy": 0.8285714285714286,
2274
- "n_examples": 50,
2275
  "per_subtask": {
2276
  "MD": {
2277
- "accuracy": 1.0,
2278
- "count": 100
2279
  },
2280
  "MB": {
2281
- "accuracy": 0.98,
2282
- "count": 50
2283
  },
2284
  "UB": {
2285
- "accuracy": 0.6859504132231405,
2286
- "count": 121
2287
  },
2288
  "UD": {
2289
- "accuracy": 0.7341772151898734,
2290
- "count": 79
2291
  }
2292
  }
2293
  },
2294
  "sub_B5": {
2295
- "full_accuracy": 0.18,
2296
- "digit_accuracy": 0.7657142857142857,
2297
- "n_examples": 50,
2298
  "per_subtask": {
2299
  "MD": {
2300
  "accuracy": 1.0,
2301
- "count": 50
2302
  },
2303
  "MB": {
2304
  "accuracy": 1.0,
2305
- "count": 50
2306
  },
2307
  "UB": {
2308
- "accuracy": 0.6578947368421053,
2309
- "count": 152
2310
  },
2311
  "UD": {
2312
- "accuracy": 0.6938775510204082,
2313
- "count": 98
2314
  }
2315
  }
2316
  }
2317
  },
2318
  "summary": {
2319
- "overall_accuracy": 0.642,
2320
- "digit_accuracy": 0.9039047619047619,
2321
- "total_examples": 1500,
2322
  "n_splits": 24
2323
  }
2324
  },
2325
- "sorl_overall_accuracy": 0.5895833333333333,
2326
- "sft_overall_accuracy": 0.5416666666666666
2327
  }
 
143
  7779
144
  ],
145
  "loss": [
146
+ -1.4991626739501953,
147
+ 7.734500885009766,
148
+ 5.436001777648926,
149
+ 3.4542782306671143,
150
+ 3.120439052581787,
151
+ 3.0616321563720703,
152
+ 3.3218164443969727,
153
+ 3.004723072052002,
154
+ 1.9910811185836792,
155
+ 0.6555660963058472,
156
+ -0.9676400423049927,
157
+ -2.3662240505218506,
158
+ -5.7794880867004395,
159
+ -7.444696426391602,
160
+ -9.871407508850098,
161
+ -9.68606948852539,
162
+ -10.215792655944824,
163
+ -10.611030578613281,
164
+ -10.236133575439453,
165
+ -11.204968452453613,
166
+ -11.310162544250488,
167
+ -12.506915092468262,
168
+ -12.20246410369873,
169
+ -11.986822128295898,
170
+ -12.56383991241455,
171
+ -13.523144721984863,
172
+ -12.998540878295898,
173
+ -12.551019668579102,
174
+ -14.125226974487305,
175
+ -14.020819664001465,
176
+ -13.266901016235352,
177
+ -13.1586275100708,
178
+ -13.030251502990723,
179
+ -12.78680419921875,
180
+ -12.259639739990234,
181
+ -12.242656707763672,
182
+ -11.402668952941895,
183
+ -12.361416816711426,
184
+ -11.902348518371582,
185
+ -11.421567916870117,
186
+ -10.673042297363281,
187
+ -11.382230758666992,
188
+ -6.558827877044678,
189
+ -6.498293876647949,
190
+ -4.6974992752075195,
191
+ -5.150529861450195,
192
+ -4.423306941986084,
193
+ -3.7705271244049072,
194
+ -3.8951430320739746,
195
+ -2.417325735092163,
196
+ -2.104322671890259,
197
+ -2.5029518604278564,
198
+ -1.6507831811904907,
199
+ -1.296630620956421,
200
+ -1.3789325952529907,
201
+ -1.2700506448745728,
202
+ -0.47501254081726074,
203
+ -1.239183783531189,
204
+ -1.1124522686004639,
205
+ -1.035009741783142,
206
+ -0.9158291816711426,
207
+ -0.7348793745040894,
208
+ -0.6639032959938049,
209
+ -0.8863333463668823,
210
+ -1.0262635946273804,
211
+ -0.49884942173957825,
212
+ -0.6950386166572571,
213
+ -0.4942106306552887,
214
+ -0.2716549038887024,
215
+ -0.5381484031677246,
216
+ -0.8825717568397522,
217
+ -0.6018041968345642,
218
+ -0.33367589116096497,
219
+ -0.5229771137237549,
220
+ -0.2667657732963562,
221
+ -0.5897171497344971,
222
+ -0.33587557077407837,
223
+ -0.4438111484050751,
224
+ -0.29231590032577515,
225
+ -0.37558865547180176,
226
+ -0.7385783195495605,
227
+ -0.29125189781188965,
228
+ -0.13102558255195618,
229
+ -0.24600398540496826,
230
+ -0.23949313163757324,
231
+ -0.4070977568626404,
232
+ -0.2605511546134949,
233
+ -0.3812254071235657,
234
+ -0.22385181486606598,
235
+ -0.3658450245857239,
236
+ -0.467978298664093,
237
+ -0.14255213737487793,
238
+ -0.173037588596344,
239
+ -0.43948355317115784,
240
+ -0.01865462213754654,
241
+ -0.3687412738800049,
242
+ -0.023503027856349945,
243
+ -0.7068221569061279,
244
+ -0.25219908356666565,
245
+ 0.1554774045944214,
246
+ -0.5628666877746582,
247
+ -0.577951192855835,
248
+ -0.44770199060440063,
249
+ 0.13656851649284363,
250
+ -0.4713708758354187,
251
+ -0.13712182641029358,
252
+ -0.20912747085094452,
253
+ -0.480000376701355,
254
+ -0.4113278090953827,
255
+ -0.4733881652355194,
256
+ -0.27355921268463135,
257
+ -0.3161522448062897,
258
+ -0.3382975459098816,
259
+ 0.14343056082725525,
260
+ -0.26643693447113037,
261
+ -0.10106643289327621,
262
+ -0.18165260553359985,
263
+ -0.3992097079753876,
264
+ -0.0171702578663826,
265
+ -0.1860802173614502,
266
+ -0.3103711009025574,
267
+ -0.4309031665325165,
268
+ -0.13910165429115295,
269
+ -0.3424716591835022,
270
+ 0.11052711308002472,
271
+ -0.1522039771080017,
272
+ -0.19839590787887573,
273
+ -0.12273947149515152,
274
+ -0.0678120106458664,
275
+ -0.25733813643455505,
276
+ -0.07050967961549759,
277
+ -0.19385433197021484,
278
+ -0.12846776843070984,
279
+ -0.06633119285106659,
280
+ -0.24687248468399048,
281
+ -0.20333541929721832,
282
+ -0.18603983521461487,
283
+ -0.381289005279541,
284
+ -0.3737454414367676,
285
+ -0.32912370562553406
286
  ],
287
  "base_loss": [
288
+ 10.222891807556152,
289
+ 6.434610843658447,
290
+ 3.7198283672332764,
291
+ 1.9681453704833984,
292
+ 1.959210753440857,
293
+ 1.929343342781067,
294
+ 1.9306913614273071,
295
+ 1.8596428632736206,
296
+ 1.8303827047348022,
297
+ 1.9475268125534058,
298
+ 1.855019211769104,
299
+ 1.9119843244552612,
300
+ 1.8700278997421265,
301
+ 1.8823758363723755,
302
+ 1.8745900392532349,
303
+ 1.836315393447876,
304
+ 1.8635133504867554,
305
+ 1.8799570798873901,
306
+ 1.838736653327942,
307
+ 1.8712080717086792,
308
+ 1.8709423542022705,
309
+ 1.875689148902893,
310
+ 1.7823193073272705,
311
+ 1.7674901485443115,
312
+ 1.8198670148849487,
313
+ 1.844150424003601,
314
+ 1.8199037313461304,
315
+ 1.76785409450531,
316
+ 1.8722307682037354,
317
+ 1.8727575540542603,
318
+ 1.7651337385177612,
319
+ 1.7576688528060913,
320
+ 1.7199641466140747,
321
+ 1.6449308395385742,
322
+ 1.640900731086731,
323
+ 1.617160677909851,
324
+ 1.5190116167068481,
325
+ 1.594099998474121,
326
+ 1.506998062133789,
327
+ 1.4643415212631226,
328
+ 1.4485048055648804,
329
+ 1.45021653175354,
330
+ 0.9755846858024597,
331
+ 0.9805561900138855,
332
+ 0.8023272752761841,
333
+ 0.7398602366447449,
334
+ 0.6865811944007874,
335
+ 0.6157902479171753,
336
+ 0.6458702683448792,
337
+ 0.47322097420692444,
338
+ 0.41887447237968445,
339
+ 0.455700546503067,
340
+ 0.36339297890663147,
341
+ 0.31774353981018066,
342
+ 0.3184225559234619,
343
+ 0.2997283637523651,
344
+ 0.2608298361301422,
345
+ 0.2871330678462982,
346
+ 0.2896370589733124,
347
+ 0.25344473123550415,
348
+ 0.2470230907201767,
349
+ 0.20899748802185059,
350
+ 0.20870697498321533,
351
+ 0.23385517299175262,
352
+ 0.265758752822876,
353
+ 0.1959606260061264,
354
+ 0.23661544919013977,
355
+ 0.2046181857585907,
356
+ 0.2053668349981308,
357
+ 0.211956188082695,
358
+ 0.2493533343076706,
359
+ 0.2304404079914093,
360
+ 0.20379698276519775,
361
+ 0.2550745904445648,
362
+ 0.21894343197345734,
363
+ 0.24354372918605804,
364
+ 0.16568894684314728,
365
+ 0.22153237462043762,
366
+ 0.17909720540046692,
367
+ 0.14946947991847992,
368
+ 0.1980229616165161,
369
+ 0.16256201267242432,
370
+ 0.2212965339422226,
371
+ 0.14926958084106445,
372
+ 0.16306713223457336,
373
+ 0.14940468966960907,
374
+ 0.1395643949508667,
375
+ 0.18206553161144257,
376
+ 0.15634550154209137,
377
+ 0.16714580357074738,
378
+ 0.16745184361934662,
379
+ 0.11398492753505707,
380
+ 0.11608642339706421,
381
+ 0.1713183969259262,
382
+ 0.10937852412462234,
383
+ 0.1577722728252411,
384
+ 0.11792578548192978,
385
+ 0.1740894764661789,
386
+ 0.13912317156791687,
387
+ 0.16688987612724304,
388
+ 0.15285928547382355,
389
+ 0.16415275633335114,
390
+ 0.12775208055973053,
391
+ 0.1447954922914505,
392
+ 0.14248152077198029,
393
+ 0.11021719872951508,
394
+ 0.11120182275772095,
395
+ 0.13144712150096893,
396
+ 0.11101575940847397,
397
+ 0.15845659375190735,
398
+ 0.12258584052324295,
399
+ 0.1119571402668953,
400
+ 0.12098769098520279,
401
+ 0.12010190635919571,
402
+ 0.07548805326223373,
403
+ 0.0853903666138649,
404
+ 0.10983582586050034,
405
+ 0.10240059345960617,
406
+ 0.12491006404161453,
407
+ 0.09631884098052979,
408
+ 0.1358448565006256,
409
+ 0.10576845705509186,
410
+ 0.11273549497127533,
411
+ 0.15282641351222992,
412
+ 0.08400528877973557,
413
+ 0.12438462674617767,
414
+ 0.10317623615264893,
415
+ 0.09730804711580276,
416
+ 0.08032923936843872,
417
+ 0.08921071141958237,
418
+ 0.09194045513868332,
419
+ 0.07488199323415756,
420
+ 0.06639757007360458,
421
+ 0.0887700691819191,
422
+ 0.0883544310927391,
423
+ 0.09428595006465912,
424
+ 0.07859671115875244,
425
+ 0.10635203123092651,
426
+ 0.10783649235963821,
427
+ 0.08185470104217529
428
  ],
429
  "info_loss": [
430
+ -2.037433624267578,
431
+ -0.31572675704956055,
432
+ -0.05017352104187012,
433
+ -0.04210937023162842,
434
+ -0.07202112674713135,
435
+ -0.07436704635620117,
436
+ -0.04826033115386963,
437
+ -0.07273316383361816,
438
+ -0.1699002981185913,
439
+ -0.30787158012390137,
440
+ -0.4511171579360962,
441
+ -0.5922137498855591,
442
+ -0.9263088703155518,
443
+ -1.0923757553100586,
444
+ -1.3311564922332764,
445
+ -1.307382583618164,
446
+ -1.3608746528625488,
447
+ -1.3961358070373535,
448
+ -1.3470678329467773,
449
+ -1.4397988319396973,
450
+ -1.4440470933914185,
451
+ -1.5294926166534424,
452
+ -1.4736708402633667,
453
+ -1.438417673110962,
454
+ -1.4990330934524536,
455
+ -1.5943763256072998,
456
+ -1.5328370332717896,
457
+ -1.4824275970458984,
458
+ -1.6502817869186401,
459
+ -1.6358681917190552,
460
+ -1.54860520362854,
461
+ -1.5320099592208862,
462
+ -1.51332688331604,
463
+ -1.4763262271881104,
464
+ -1.4228277206420898,
465
+ -1.4195810556411743,
466
+ -1.3227262496948242,
467
+ -1.4174060821533203,
468
+ -1.360844612121582,
469
+ -1.3054325580596924,
470
+ -1.226885437965393,
471
+ -1.2967873811721802,
472
+ -0.7679556012153625,
473
+ -0.7592524290084839,
474
+ -0.5631743669509888,
475
+ -0.5992276072502136,
476
+ -0.5220549702644348,
477
+ -0.44866302609443665,
478
+ -0.4627205729484558,
479
+ -0.2990763783454895,
480
+ -0.26095426082611084,
481
+ -0.30547964572906494,
482
+ -0.20989865064620972,
483
+ -0.17013928294181824,
484
+ -0.1788894385099411,
485
+ -0.16711220145225525,
486
+ -0.08040527999401093,
487
+ -0.15986192226409912,
488
+ -0.1477246880531311,
489
+ -0.13738852739334106,
490
+ -0.12397092580795288,
491
+ -0.10177233070135117,
492
+ -0.09493035823106766,
493
+ -0.11996994912624359,
494
+ -0.1356387436389923,
495
+ -0.07737710326910019,
496
+ -0.10097216069698334,
497
+ -0.07617561519145966,
498
+ -0.05582185089588165,
499
+ -0.08229339122772217,
500
+ -0.11973747611045837,
501
+ -0.08974818885326385,
502
+ -0.060351938009262085,
503
+ -0.08523020148277283,
504
+ -0.0554652065038681,
505
+ -0.08915802836418152,
506
+ -0.05697622150182724,
507
+ -0.07321585714817047,
508
+ -0.05324859917163849,
509
+ -0.05828547477722168,
510
+ -0.10016466677188873,
511
+ -0.051857367157936096,
512
+ -0.040182337164878845,
513
+ -0.04722199589014053,
514
+ -0.04640142619609833,
515
+ -0.06200437247753143,
516
+ -0.04559026658535004,
517
+ -0.0621969997882843,
518
+ -0.04386666417121887,
519
+ -0.058832526206970215,
520
+ -0.06905774027109146,
521
+ -0.03173664212226868,
522
+ -0.034987613558769226,
523
+ -0.06714990735054016,
524
+ -0.018569253385066986,
525
+ -0.05842562019824982,
526
+ -0.02083367109298706,
527
+ -0.09449857473373413,
528
+ -0.04437990486621857,
529
+ -0.007008716464042664,
530
+ -0.07735760509967804,
531
+ -0.08071395009756088,
532
+ -0.06259468197822571,
533
+ -0.006447583436965942,
534
+ -0.06709475815296173,
535
+ -0.031158633530139923,
536
+ -0.03744358569383621,
537
+ -0.06632976233959198,
538
+ -0.057221584022045135,
539
+ -0.06812497973442078,
540
+ -0.04631491005420685,
541
+ -0.04810434579849243,
542
+ -0.05078253149986267,
543
+ -0.003159046173095703,
544
+ -0.03911986202001572,
545
+ -0.024468280375003815,
546
+ -0.03566398471593857,
547
+ -0.05539749562740326,
548
+ -0.01884063333272934,
549
+ -0.03409624472260475,
550
+ -0.05011004954576492,
551
+ -0.05908038467168808,
552
+ -0.03038986772298813,
553
+ -0.054623715579509735,
554
+ -0.0025235339999198914,
555
+ -0.03323037177324295,
556
+ -0.03580985218286514,
557
+ -0.0279695987701416,
558
+ -0.019559767097234726,
559
+ -0.039626412093639374,
560
+ -0.021337643265724182,
561
+ -0.03141063079237938,
562
+ -0.024501889944076538,
563
+ -0.020283721387386322,
564
+ -0.03983597829937935,
565
+ -0.03490420803427696,
566
+ -0.032450538128614426,
567
+ -0.05533437058329582,
568
+ -0.053110718727111816,
569
+ -0.046020276844501495
570
  ],
571
  "abs_loss": [
572
+ 2.2782208919525146,
573
+ 2.0877914428710938,
574
+ 1.8768965005874634,
575
+ 1.8503764867782593,
576
+ 1.860660433769226,
577
+ 1.8386014699935913,
578
+ 1.8303370475769043,
579
+ 1.837899088859558,
580
+ 1.8108211755752563,
581
+ 1.5847219228744507,
582
+ 1.391497015953064,
583
+ 1.3384944200515747,
584
+ 1.329427719116211,
585
+ 1.3017956018447876,
586
+ 1.2399191856384277,
587
+ 1.2146539688110352,
588
+ 1.135986328125,
589
+ 1.0552599430084229,
590
+ 0.887962818145752,
591
+ 0.8350238800048828,
592
+ 0.6903941631317139,
593
+ 0.5229440331459045,
594
+ 0.4153222143650055,
595
+ 0.3750406801700592,
596
+ 0.3795664310455322,
597
+ 0.30999478697776794,
598
+ 0.2965479791164398,
599
+ 0.3220847547054291,
600
+ 0.29502496123313904,
601
+ 0.29283303022384644,
602
+ 0.29946067929267883,
603
+ 0.2762465178966522,
604
+ 0.27067646384239197,
605
+ 0.2124723196029663,
606
+ 0.2123774290084839,
607
+ 0.27615517377853394,
608
+ 0.2274278849363327,
609
+ 0.1947956085205078,
610
+ 0.1658640205860138,
611
+ 0.14536798000335693,
612
+ 0.13485930860042572,
613
+ 0.1487635374069214,
614
+ 0.09887402504682541,
615
+ 0.08551109582185745,
616
+ 0.11851557344198227,
617
+ 0.07192469388246536,
618
+ 0.09643194824457169,
619
+ 0.07271024584770203,
620
+ 0.08401944488286972,
621
+ 0.0711170956492424,
622
+ 0.10404173284769058,
623
+ 0.07230190932750702,
624
+ 0.07444865256547928,
625
+ 0.057993996888399124,
626
+ 0.06331399083137512,
627
+ 0.06968239694833755,
628
+ 0.058420877903699875,
629
+ 0.05234828591346741,
630
+ 0.04469572380185127,
631
+ 0.060892462730407715,
632
+ 0.06377529352903366,
633
+ 0.05298902466893196,
634
+ 0.06325497478246689,
635
+ 0.0557883195579052,
636
+ 0.05224437639117241,
637
+ 0.04783019796013832,
638
+ 0.0495179109275341,
639
+ 0.0373246930539608,
640
+ 0.05701762065291405,
641
+ 0.03870227187871933,
642
+ 0.059414517134428024,
643
+ 0.029631739482283592,
644
+ 0.04484502971172333,
645
+ 0.042912598699331284,
646
+ 0.05295603349804878,
647
+ 0.04987824335694313,
648
+ 0.03641092777252197,
649
+ 0.040556978434324265,
650
+ 0.03168303146958351,
651
+ 0.028551781550049782,
652
+ 0.02967704087495804,
653
+ 0.027214542031288147,
654
+ 0.017562584951519966,
655
+ 0.024341275915503502,
656
+ 0.032353684306144714,
657
+ 0.037270668894052505,
658
+ 0.027527958154678345,
659
+ 0.025686120614409447,
660
+ 0.021791374310851097,
661
+ 0.02155947871506214,
662
+ 0.017588598653674126,
663
+ 0.019590383395552635,
664
+ 0.024403566494584084,
665
+ 0.040734563022851944,
666
+ 0.01853681355714798,
667
+ 0.022299962118268013,
668
+ 0.020661523565649986,
669
+ 0.018253402784466743,
670
+ 0.014955420047044754,
671
+ 0.01722128875553608,
672
+ 0.014421396888792515,
673
+ 0.022876715287566185,
674
+ 0.022903740406036377,
675
+ 0.019680185243487358,
676
+ 0.023799598217010498,
677
+ 0.02028706483542919,
678
+ 0.011872164905071259,
679
+ 0.019997278228402138,
680
+ 0.014807574450969696,
681
+ 0.01684972085058689,
682
+ 0.014521368779242039,
683
+ 0.014271489344537258,
684
+ 0.013847983442246914,
685
+ 0.02395080029964447,
686
+ 0.021941566839814186,
687
+ 0.017697757109999657,
688
+ 0.013051857240498066,
689
+ 0.01706533133983612,
690
+ 0.014984418638050556,
691
+ 0.012025459669530392,
692
+ 0.02180357836186886,
693
+ 0.007351210806518793,
694
+ 0.012332173995673656,
695
+ 0.0186754297465086,
696
+ 0.009667596779763699,
697
+ 0.01859714835882187,
698
+ 0.0180825162678957,
699
+ 0.009598099626600742,
700
+ 0.012128149159252644,
701
+ 0.009924765676259995,
702
+ 0.005770361050963402,
703
+ 0.017123397439718246,
704
+ 0.01288231834769249,
705
+ 0.006488412618637085,
706
+ 0.015862809494137764,
707
+ 0.016753973439335823,
708
+ 0.011881467886269093,
709
+ 0.014946346171200275,
710
+ 0.023875480517745018,
711
+ 0.01311760675162077
712
  ],
713
  "zipf_loss": [
714
+ 8.424459457397461,
715
+ 4.248378753662109,
716
+ 2.030219078063965,
717
+ 1.722188949584961,
718
+ 1.69537353515625,
719
+ 1.6920992136001587,
720
+ 1.6906945705413818,
721
+ 1.688621997833252,
722
+ 1.6786192655563354,
723
+ 1.628282904624939,
724
+ 1.5493625402450562,
725
+ 1.5100793838500977,
726
+ 1.4806294441223145,
727
+ 1.4665058851242065,
728
+ 1.4415757656097412,
729
+ 1.4299758672714233,
730
+ 1.415842056274414,
731
+ 1.3648438453674316,
732
+ 1.3070120811462402,
733
+ 1.2383089065551758,
734
+ 1.1903269290924072,
735
+ 0.8600262999534607,
736
+ 0.7103928327560425,
737
+ 0.5923600196838379,
738
+ 0.5686673521995544,
739
+ 0.5454696416854858,
740
+ 0.48027122020721436,
741
+ 0.47319427132606506,
742
+ 0.47585660219192505,
743
+ 0.435821533203125,
744
+ 0.42406991124153137,
745
+ 0.3761786222457886,
746
+ 0.3559854030609131,
747
+ 0.31027960777282715,
748
+ 0.3064996898174286,
749
+ 0.3083776831626892,
750
+ 0.2828387916088104,
751
+ 0.19906440377235413,
752
+ 0.18251287937164307,
753
+ 0.15387912094593048,
754
+ 0.13382180035114288,
755
+ 0.12054996937513351,
756
+ 0.13525637984275818,
757
+ 0.10512371361255646,
758
+ 0.1200651153922081,
759
+ 0.09469343721866608,
760
+ 0.10101855546236038,
761
+ 0.09304188936948776,
762
+ 0.07779079675674438,
763
+ 0.09310503304004669,
764
+ 0.07594139128923416,
765
+ 0.08891379088163376,
766
+ 0.0773656815290451,
767
+ 0.08121927082538605,
768
+ 0.08520787209272385,
769
+ 0.09437482804059982,
770
+ 0.06236838549375534,
771
+ 0.067067451775074,
772
+ 0.07068801671266556,
773
+ 0.07934156805276871,
774
+ 0.07047945261001587,
775
+ 0.06854751706123352,
776
+ 0.07036779075860977,
777
+ 0.07393214106559753,
778
+ 0.05914054065942764,
779
+ 0.07417795062065125,
780
+ 0.07311580330133438,
781
+ 0.0591949000954628,
782
+ 0.07549495995044708,
783
+ 0.06895903497934341,
784
+ 0.05950825661420822,
785
+ 0.062274135649204254,
786
+ 0.061562009155750275,
787
+ 0.06995910406112671,
788
+ 0.0636473149061203,
789
+ 0.053331539034843445,
790
+ 0.06455662846565247,
791
+ 0.06275936961174011,
792
+ 0.05790455639362335,
793
+ 0.05494140088558197,
794
+ 0.06207764893770218,
795
+ 0.06203828006982803,
796
+ 0.0477449893951416,
797
+ 0.07451224327087402,
798
+ 0.05821862816810608,
799
+ 0.059814199805259705,
800
+ 0.05303432047367096,
801
+ 0.0561104416847229,
802
+ 0.056290194392204285,
803
+ 0.05317850410938263,
804
+ 0.053388360887765884,
805
+ 0.05887032300233841,
806
+ 0.05831175297498703,
807
+ 0.05662363022565842,
808
+ 0.05580570548772812,
809
+ 0.05551263689994812,
810
+ 0.06484174728393555,
811
+ 0.06224876269698143,
812
+ 0.05098125338554382,
813
+ 0.056952573359012604,
814
+ 0.056407906115055084,
815
+ 0.06274788081645966,
816
+ 0.04820239916443825,
817
+ 0.054280832409858704,
818
+ 0.05471523106098175,
819
+ 0.062218599021434784,
820
+ 0.05291934311389923,
821
+ 0.04985041171312332,
822
+ 0.04839150980114937,
823
+ 0.0477200448513031,
824
+ 0.06555189192295074,
825
+ 0.05150694400072098,
826
+ 0.04715529829263687,
827
+ 0.05252403765916824,
828
+ 0.04707948490977287,
829
+ 0.05645621567964554,
830
+ 0.06384622305631638,
831
+ 0.050658129155635834,
832
+ 0.04482756927609444,
833
+ 0.05736084282398224,
834
+ 0.05270415171980858,
835
+ 0.05339711531996727,
836
+ 0.050828322768211365,
837
+ 0.0490715391933918,
838
+ 0.05079040676355362,
839
+ 0.053855396807193756,
840
+ 0.054718103259801865,
841
+ 0.05868866294622421,
842
+ 0.046243615448474884,
843
+ 0.04872281104326248,
844
+ 0.050349265336990356,
845
+ 0.043657638132572174,
846
+ 0.04886531084775925,
847
+ 0.047087110579013824,
848
+ 0.06154657155275345,
849
+ 0.049745336174964905,
850
+ 0.0586807057261467,
851
+ 0.06420803815126419,
852
+ 0.04713771492242813,
853
+ 0.047912608832120895
854
  ],
855
  "denoise_loss": [],
856
  "ortho_loss": [
857
+ 0.7175698280334473,
858
+ 0.42890316247940063,
859
+ 0.33829447627067566,
860
+ 0.22440843284130096,
861
+ 0.18137924373149872,
862
+ 0.2310202568769455,
863
+ 0.22335532307624817,
864
+ 0.21682365238666534,
865
+ 0.25529515743255615,
866
+ 0.30100017786026,
867
+ 0.3371667265892029,
868
+ 0.3791210651397705,
869
+ 0.3911088705062866,
870
+ 0.3969413936138153,
871
+ 0.4060496985912323,
872
+ 0.40077337622642517,
873
+ 0.39444079995155334,
874
+ 0.3958539366722107,
875
+ 0.4089014232158661,
876
+ 0.40195876359939575,
877
+ 0.3901920020580292,
878
+ 0.3879682719707489,
879
+ 0.37536731362342834,
880
+ 0.36621353030204773,
881
+ 0.3571617305278778,
882
+ 0.34676823019981384,
883
+ 0.3404388427734375,
884
+ 0.3386509418487549,
885
+ 0.3293488025665283,
886
+ 0.3230898678302765,
887
+ 0.31814470887184143,
888
+ 0.31388574838638306,
889
+ 0.30950528383255005,
890
+ 0.3056495189666748,
891
+ 0.30528897047042847,
892
+ 0.3057379424571991,
893
+ 0.3013770282268524,
894
+ 0.29826030135154724,
895
+ 0.29658982157707214,
896
+ 0.2873019576072693,
897
+ 0.2833050787448883,
898
+ 0.27994805574417114,
899
+ 0.2750612199306488,
900
+ 0.2668859660625458,
901
+ 0.25714653730392456,
902
+ 0.25182783603668213,
903
+ 0.25069114565849304,
904
+ 0.24311591684818268,
905
+ 0.24343299865722656,
906
+ 0.23794493079185486,
907
+ 0.23388665914535522,
908
+ 0.22817164659500122,
909
+ 0.22779357433319092,
910
+ 0.2294483184814453,
911
+ 0.22844713926315308,
912
+ 0.23184555768966675,
913
+ 0.22146819531917572,
914
+ 0.22426211833953857,
915
+ 0.22186093032360077,
916
+ 0.2249176949262619,
917
+ 0.22080540657043457,
918
+ 0.22399508953094482,
919
+ 0.2173675149679184,
920
+ 0.2208639681339264,
921
+ 0.22077171504497528,
922
+ 0.22138890624046326,
923
+ 0.22595320641994476,
924
+ 0.22017161548137665,
925
+ 0.21775898337364197,
926
+ 0.22427847981452942,
927
+ 0.22222276031970978,
928
+ 0.22420811653137207,
929
+ 0.22324113547801971,
930
+ 0.22908048331737518,
931
+ 0.22781133651733398,
932
+ 0.23045624792575836,
933
+ 0.23191936314105988,
934
+ 0.23010753095149994,
935
+ 0.2286965698003769,
936
+ 0.2256595641374588,
937
+ 0.22391031682491302,
938
+ 0.22720178961753845,
939
+ 0.22458872199058533,
940
+ 0.21908141672611237,
941
+ 0.21781863272190094,
942
+ 0.22157607972621918,
943
+ 0.22329872846603394,
944
+ 0.22436219453811646,
945
+ 0.22031615674495697,
946
+ 0.22133824229240417,
947
+ 0.21895863115787506,
948
+ 0.21569904685020447,
949
+ 0.21912707388401031,
950
+ 0.2237694412469864,
951
+ 0.22242595255374908,
952
+ 0.22354666888713837,
953
+ 0.22155342996120453,
954
+ 0.2200615555047989,
955
+ 0.22214236855506897,
956
+ 0.22286343574523926,
957
+ 0.21898429095745087,
958
+ 0.21795101463794708,
959
+ 0.2166079729795456,
960
+ 0.21775244176387787,
961
+ 0.21914467215538025,
962
+ 0.22057823836803436,
963
+ 0.21595270931720734,
964
+ 0.21475619077682495,
965
+ 0.2145148068666458,
966
+ 0.2125515639781952,
967
+ 0.21255271136760712,
968
+ 0.21542948484420776,
969
+ 0.21557892858982086,
970
+ 0.22325363755226135,
971
+ 0.22297166287899017,
972
+ 0.2213776558637619,
973
+ 0.22247739136219025,
974
+ 0.22220821678638458,
975
+ 0.2229536771774292,
976
+ 0.2249082773923874,
977
+ 0.22506743669509888,
978
+ 0.223460391163826,
979
+ 0.22405298054218292,
980
+ 0.22543157637119293,
981
+ 0.22438181936740875,
982
+ 0.22402642667293549,
983
+ 0.23021270334720612,
984
+ 0.22955596446990967,
985
+ 0.22911269962787628,
986
+ 0.23038555681705475,
987
+ 0.23054704070091248,
988
+ 0.23122508823871613,
989
+ 0.2306143045425415,
990
+ 0.23163563013076782,
991
+ 0.23052428662776947,
992
+ 0.2300809919834137,
993
+ 0.22999243438243866,
994
+ 0.23001831769943237,
995
+ 0.23135921359062195,
996
+ 0.23247401416301727
997
  ],
998
  "lr": [
999
  1.6752136752136756e-05,
 
1161
  7779
1162
  ],
1163
  "eval_accuracy": [
1164
+ 0.0,
1165
  0.0,
1166
  0.0,
1167
  0.0,
 
1183
  0.0
1184
  ]
1185
  },
1186
+ "final_accuracy": 0.6407692307692308,
1187
  "sft_eval": {
1188
  "config": {
1189
  "ops": "add_sub",
1190
  "K": null,
1191
  "mode": "sft",
1192
  "n_digits": 6,
1193
+ "n_per_split": 100
1194
  },
1195
  "splits": {
1196
  "add_S0": {
1197
+ "full_accuracy": 0.95,
1198
+ "digit_accuracy": 0.9928571428571429,
1199
+ "n_examples": 100,
1200
  "per_subtask": {
1201
  "SA": {
1202
+ "accuracy": 0.9917355371900827,
1203
+ "count": 605
1204
  },
1205
  "SS": {
1206
  "accuracy": 1.0,
1207
+ "count": 95
1208
  }
1209
  }
1210
  },
1211
  "add_S1": {
1212
+ "full_accuracy": 0.88,
1213
+ "digit_accuracy": 0.9828571428571429,
1214
+ "n_examples": 100,
1215
  "per_subtask": {
1216
  "SA": {
1217
+ "accuracy": 0.9901960784313726,
1218
+ "count": 204
1219
  },
1220
  "SC": {
1221
+ "accuracy": 0.9881656804733728,
1222
+ "count": 169
1223
  },
1224
  "SS": {
1225
+ "accuracy": 1.0,
1226
+ "count": 31
1227
  },
1228
  "UC": {
1229
+ "accuracy": 0.972972972972973,
1230
+ "count": 296
1231
  }
1232
  }
1233
  },
1234
  "add_S2": {
1235
+ "full_accuracy": 0.79,
1236
+ "digit_accuracy": 0.97,
1237
+ "n_examples": 100,
1238
  "per_subtask": {
1239
  "SA": {
1240
+ "accuracy": 0.9938650306748467,
1241
+ "count": 163
1242
  },
1243
  "SC": {
1244
+ "accuracy": 0.9615384615384616,
1245
+ "count": 130
1246
  },
1247
  "SS": {
1248
+ "accuracy": 0.9655172413793104,
1249
+ "count": 87
1250
  },
1251
  "UC": {
1252
+ "accuracy": 0.9556650246305419,
1253
+ "count": 203
1254
  },
1255
  "US": {
1256
+ "accuracy": 0.9743589743589743,
1257
+ "count": 117
1258
  }
1259
  }
1260
  },
1261
  "add_S3": {
1262
+ "full_accuracy": 0.47,
1263
+ "digit_accuracy": 0.91,
1264
+ "n_examples": 100,
1265
  "per_subtask": {
1266
  "SA": {
1267
  "accuracy": 1.0,
1268
+ "count": 121
1269
  },
1270
  "SC": {
1271
+ "accuracy": 0.9834710743801653,
1272
+ "count": 121
1273
  },
1274
  "SS": {
1275
  "accuracy": 1.0,
1276
+ "count": 49
1277
  },
1278
  "UC": {
1279
+ "accuracy": 0.7365591397849462,
1280
+ "count": 186
1281
  },
1282
  "US": {
1283
+ "accuracy": 0.9461883408071748,
1284
+ "count": 223
1285
  }
1286
  }
1287
  },
1288
  "add_S4": {
1289
+ "full_accuracy": 0.25,
1290
+ "digit_accuracy": 0.7671428571428571,
1291
+ "n_examples": 100,
1292
  "per_subtask": {
1293
  "SA": {
1294
  "accuracy": 1.0,
1295
+ "count": 104
1296
  },
1297
  "SC": {
1298
  "accuracy": 1.0,
1299
+ "count": 106
1300
  },
1301
  "SS": {
1302
  "accuracy": 1.0,
1303
+ "count": 23
1304
  },
1305
  "UC": {
1306
+ "accuracy": 0.60625,
1307
+ "count": 160
1308
  },
1309
  "US": {
1310
+ "accuracy": 0.6742671009771987,
1311
+ "count": 307
1312
  }
1313
  }
1314
  },
1315
  "add_S5": {
1316
+ "full_accuracy": 0.0,
1317
+ "digit_accuracy": 0.5,
1318
+ "n_examples": 100,
1319
  "per_subtask": {
1320
  "SA": {
1321
  "accuracy": 1.0,
1322
+ "count": 100
1323
  },
1324
  "SC": {
1325
  "accuracy": 1.0,
1326
+ "count": 100
1327
  },
1328
  "UC": {
1329
+ "accuracy": 0.08,
1330
+ "count": 100
1331
  },
1332
  "US": {
1333
+ "accuracy": 0.355,
1334
+ "count": 400
1335
  }
1336
  }
1337
  },
1338
  "add_S6": {
1339
+ "full_accuracy": 0.07,
1340
+ "digit_accuracy": 0.46714285714285714,
1341
+ "n_examples": 100,
1342
  "per_subtask": {
1343
  "SC": {
1344
  "accuracy": 1.0,
1345
+ "count": 100
1346
  },
1347
  "UC": {
1348
+ "accuracy": 0.27,
1349
+ "count": 100
1350
  },
1351
  "US": {
1352
+ "accuracy": 0.4,
1353
+ "count": 500
1354
  }
1355
  }
1356
  },
1357
  "add_random": {
1358
+ "full_accuracy": 0.905,
1359
+ "digit_accuracy": 0.9835714285714285,
1360
  "n_examples": 200,
1361
  "per_subtask": {
1362
  "SA": {
1363
+ "accuracy": 0.9932885906040269,
1364
+ "count": 447
1365
  },
1366
  "SC": {
1367
+ "accuracy": 0.996875,
1368
+ "count": 320
1369
  },
1370
  "SS": {
1371
+ "accuracy": 0.9821428571428571,
1372
+ "count": 56
1373
  },
1374
  "UC": {
1375
+ "accuracy": 0.9716446124763705,
1376
+ "count": 529
1377
  },
1378
  "US": {
1379
+ "accuracy": 0.9375,
1380
+ "count": 48
1381
  }
1382
  }
1383
  },
1384
  "add_C1": {
1385
+ "full_accuracy": 0.85,
1386
+ "digit_accuracy": 0.9785714285714285,
1387
+ "n_examples": 100,
1388
  "per_subtask": {
1389
  "SA": {
1390
  "accuracy": 1.0,
1391
+ "count": 500
1392
  },
1393
  "SC": {
1394
  "accuracy": 1.0,
1395
+ "count": 100
1396
  },
1397
  "UC": {
1398
+ "accuracy": 0.85,
1399
+ "count": 100
1400
  }
1401
  }
1402
  },
1403
  "add_C2": {
1404
  "full_accuracy": 0.84,
1405
+ "digit_accuracy": 0.9771428571428571,
1406
+ "n_examples": 100,
1407
  "per_subtask": {
1408
  "SA": {
1409
  "accuracy": 1.0,
1410
+ "count": 400
1411
  },
1412
  "SC": {
1413
  "accuracy": 1.0,
1414
+ "count": 100
1415
  },
1416
  "UC": {
1417
+ "accuracy": 0.9102564102564102,
1418
+ "count": 156
1419
  },
1420
  "US": {
1421
+ "accuracy": 0.9545454545454546,
1422
+ "count": 44
1423
  }
1424
  }
1425
  },
1426
  "add_C3": {
1427
+ "full_accuracy": 0.58,
1428
+ "digit_accuracy": 0.9271428571428572,
1429
+ "n_examples": 100,
1430
  "per_subtask": {
1431
  "SA": {
1432
  "accuracy": 1.0,
1433
+ "count": 300
1434
  },
1435
  "SC": {
1436
  "accuracy": 1.0,
1437
+ "count": 100
1438
  },
1439
  "UC": {
1440
+ "accuracy": 0.7889447236180904,
1441
+ "count": 199
1442
  },
1443
  "US": {
1444
+ "accuracy": 0.9108910891089109,
1445
+ "count": 101
1446
  }
1447
  }
1448
  },
1449
  "add_C4": {
1450
+ "full_accuracy": 0.53,
1451
+ "digit_accuracy": 0.9128571428571428,
1452
+ "n_examples": 100,
1453
  "per_subtask": {
1454
  "SA": {
1455
  "accuracy": 1.0,
1456
+ "count": 200
1457
  },
1458
  "SC": {
1459
  "accuracy": 1.0,
1460
+ "count": 100
1461
  },
1462
  "UC": {
1463
+ "accuracy": 0.8409090909090909,
1464
+ "count": 264
1465
  },
1466
  "US": {
1467
+ "accuracy": 0.8602941176470589,
1468
+ "count": 136
1469
  }
1470
  }
1471
  },
1472
  "add_C5": {
1473
+ "full_accuracy": 0.56,
1474
+ "digit_accuracy": 0.8928571428571429,
1475
+ "n_examples": 100,
1476
  "per_subtask": {
1477
  "SA": {
1478
  "accuracy": 1.0,
1479
+ "count": 100
1480
  },
1481
  "SC": {
1482
  "accuracy": 1.0,
1483
+ "count": 100
1484
  },
1485
  "UC": {
1486
+ "accuracy": 0.8580645161290322,
1487
+ "count": 310
1488
  },
1489
  "US": {
1490
+ "accuracy": 0.8368421052631579,
1491
+ "count": 190
1492
  }
1493
  }
1494
  },
1495
  "add_C6": {
1496
+ "full_accuracy": 0.63,
1497
+ "digit_accuracy": 0.8971428571428571,
1498
+ "n_examples": 100,
1499
  "per_subtask": {
1500
  "SC": {
1501
  "accuracy": 1.0,
1502
+ "count": 100
1503
  },
1504
  "UC": {
1505
+ "accuracy": 0.9027027027027027,
1506
+ "count": 370
1507
  },
1508
  "US": {
1509
+ "accuracy": 0.8434782608695652,
1510
+ "count": 230
1511
  }
1512
  }
1513
  },
1514
  "sub_M0": {
1515
+ "full_accuracy": 0.96,
1516
+ "digit_accuracy": 0.9942857142857143,
1517
+ "n_examples": 100,
1518
  "per_subtask": {
1519
  "MD": {
1520
+ "accuracy": 0.9934959349593496,
1521
+ "count": 615
1522
  },
1523
  "ME": {
1524
  "accuracy": 1.0,
1525
+ "count": 85
1526
  }
1527
  }
1528
  },
1529
  "sub_M1": {
1530
+ "full_accuracy": 0.85,
1531
+ "digit_accuracy": 0.9771428571428571,
1532
+ "n_examples": 100,
1533
  "per_subtask": {
1534
  "MD": {
1535
+ "accuracy": 0.9863013698630136,
1536
+ "count": 292
1537
  },
1538
  "MB": {
1539
+ "accuracy": 0.9861111111111112,
1540
+ "count": 144
1541
  },
1542
  "ME": {
1543
+ "accuracy": 1.0,
1544
+ "count": 25
1545
  },
1546
  "UB": {
1547
+ "accuracy": 0.9581589958158996,
1548
+ "count": 239
1549
  }
1550
  }
1551
  },
1552
  "sub_M2": {
1553
+ "full_accuracy": 0.59,
1554
+ "digit_accuracy": 0.9371428571428572,
1555
+ "n_examples": 100,
1556
  "per_subtask": {
1557
  "MD": {
1558
+ "accuracy": 0.990521327014218,
1559
+ "count": 211
1560
  },
1561
  "MB": {
1562
+ "accuracy": 0.9391304347826087,
1563
+ "count": 115
1564
  },
1565
  "ME": {
1566
+ "accuracy": 0.9882352941176471,
1567
+ "count": 85
1568
  },
1569
  "UB": {
1570
+ "accuracy": 0.8121546961325967,
1571
+ "count": 181
1572
  },
1573
  "UD": {
1574
+ "accuracy": 1.0,
1575
+ "count": 108
1576
  }
1577
  }
1578
  },
1579
  "sub_M3": {
1580
+ "full_accuracy": 0.3,
1581
+ "digit_accuracy": 0.8628571428571429,
1582
+ "n_examples": 100,
1583
  "per_subtask": {
1584
  "MD": {
1585
+ "accuracy": 0.9888268156424581,
1586
+ "count": 179
1587
  },
1588
  "MB": {
1589
+ "accuracy": 0.970873786407767,
1590
+ "count": 103
1591
  },
1592
  "ME": {
1593
  "accuracy": 1.0,
1594
+ "count": 56
1595
  },
1596
  "UB": {
1597
+ "accuracy": 0.5906040268456376,
1598
+ "count": 149
1599
  },
1600
  "UD": {
1601
+ "accuracy": 0.8591549295774648,
1602
+ "count": 213
1603
  }
1604
  }
1605
  },
1606
  "sub_M4": {
1607
+ "full_accuracy": 0.05,
1608
+ "digit_accuracy": 0.7171428571428572,
1609
+ "n_examples": 100,
1610
  "per_subtask": {
1611
  "MD": {
1612
  "accuracy": 1.0,
1613
+ "count": 200
1614
  },
1615
  "MB": {
1616
  "accuracy": 1.0,
1617
+ "count": 100
1618
  },
1619
  "UB": {
1620
+ "accuracy": 0.5,
1621
+ "count": 100
1622
  },
1623
  "UD": {
1624
+ "accuracy": 0.5066666666666667,
1625
+ "count": 300
1626
  }
1627
  }
1628
  },
1629
  "sub_M5": {
1630
+ "full_accuracy": 0.06,
1631
+ "digit_accuracy": 0.6242857142857143,
1632
+ "n_examples": 100,
1633
  "per_subtask": {
1634
  "MD": {
1635
  "accuracy": 1.0,
1636
+ "count": 100
1637
  },
1638
  "MB": {
1639
  "accuracy": 1.0,
1640
+ "count": 100
1641
  },
1642
  "UB": {
1643
+ "accuracy": 0.52,
1644
+ "count": 100
1645
  },
1646
  "UD": {
1647
+ "accuracy": 0.4625,
1648
+ "count": 400
1649
  }
1650
  }
1651
  },
1652
  "sub_random": {
1653
+ "full_accuracy": 0.86,
1654
+ "digit_accuracy": 0.9792857142857143,
1655
  "n_examples": 200,
1656
  "per_subtask": {
1657
  "MD": {
1658
+ "accuracy": 0.9966666666666667,
1659
+ "count": 600
1660
  },
1661
  "MB": {
1662
+ "accuracy": 0.9812734082397003,
1663
+ "count": 267
1664
  },
1665
  "ME": {
1666
  "accuracy": 1.0,
1667
  "count": 53
1668
  },
1669
  "UB": {
1670
+ "accuracy": 0.9521640091116174,
1671
+ "count": 439
1672
  },
1673
  "UD": {
1674
+ "accuracy": 0.975609756097561,
1675
+ "count": 41
1676
  }
1677
  }
1678
  },
1679
  "sub_B3": {
1680
+ "full_accuracy": 0.61,
1681
+ "digit_accuracy": 0.9314285714285714,
1682
+ "n_examples": 100,
1683
  "per_subtask": {
1684
  "MD": {
1685
+ "accuracy": 0.99,
1686
+ "count": 300
1687
  },
1688
  "MB": {
1689
+ "accuracy": 0.98,
1690
+ "count": 100
1691
  },
1692
  "UB": {
1693
+ "accuracy": 0.8527918781725888,
1694
+ "count": 197
1695
  },
1696
  "UD": {
1697
+ "accuracy": 0.8640776699029126,
1698
+ "count": 103
1699
  }
1700
  }
1701
  },
1702
  "sub_B4": {
1703
+ "full_accuracy": 0.5,
1704
+ "digit_accuracy": 0.9,
1705
+ "n_examples": 100,
1706
  "per_subtask": {
1707
  "MD": {
1708
+ "accuracy": 0.995,
1709
+ "count": 200
1710
  },
1711
  "MB": {
1712
+ "accuracy": 0.99,
1713
+ "count": 100
1714
  },
1715
  "UB": {
1716
+ "accuracy": 0.8299595141700404,
1717
+ "count": 247
1718
  },
1719
  "UD": {
1720
+ "accuracy": 0.8300653594771242,
1721
+ "count": 153
1722
  }
1723
  }
1724
  },
1725
  "sub_B5": {
1726
+ "full_accuracy": 0.5,
1727
+ "digit_accuracy": 0.8842857142857142,
1728
+ "n_examples": 100,
1729
  "per_subtask": {
1730
  "MD": {
1731
  "accuracy": 1.0,
1732
+ "count": 100
1733
  },
1734
  "MB": {
1735
  "accuracy": 1.0,
1736
+ "count": 100
1737
  },
1738
  "UB": {
1739
+ "accuracy": 0.87248322147651,
1740
+ "count": 298
1741
  },
1742
  "UD": {
1743
+ "accuracy": 0.7871287128712872,
1744
+ "count": 202
1745
  }
1746
  }
1747
  }
1748
  },
1749
  "summary": {
1750
+ "overall_accuracy": 0.59,
1751
+ "digit_accuracy": 0.8818131868131868,
1752
+ "total_examples": 2600,
1753
  "n_splits": 24
1754
  }
1755
  },
 
1759
  "K": 1,
1760
  "mode": "sorl",
1761
  "n_digits": 6,
1762
+ "n_per_split": 100
1763
  },
1764
  "splits": {
1765
  "add_S0": {
1766
+ "full_accuracy": 0.93,
1767
+ "digit_accuracy": 0.9885714285714285,
1768
+ "n_examples": 100,
1769
  "per_subtask": {
1770
  "SA": {
1771
+ "accuracy": 0.9884297520661157,
1772
+ "count": 605
1773
  },
1774
  "SS": {
1775
+ "accuracy": 0.9894736842105263,
1776
+ "count": 95
1777
  }
1778
  }
1779
  },
1780
  "add_S1": {
1781
+ "full_accuracy": 0.87,
1782
+ "digit_accuracy": 0.98,
1783
+ "n_examples": 100,
1784
  "per_subtask": {
1785
  "SA": {
1786
+ "accuracy": 0.9950980392156863,
1787
+ "count": 204
1788
  },
1789
  "SC": {
1790
+ "accuracy": 0.9763313609467456,
1791
+ "count": 169
1792
  },
1793
  "SS": {
1794
  "accuracy": 1.0,
1795
+ "count": 31
1796
  },
1797
  "UC": {
1798
+ "accuracy": 0.9695945945945946,
1799
+ "count": 296
1800
  }
1801
  }
1802
  },
1803
  "add_S2": {
1804
+ "full_accuracy": 0.85,
1805
+ "digit_accuracy": 0.9757142857142858,
1806
+ "n_examples": 100,
1807
  "per_subtask": {
1808
  "SA": {
1809
+ "accuracy": 0.9815950920245399,
1810
+ "count": 163
1811
  },
1812
  "SC": {
1813
+ "accuracy": 0.9769230769230769,
1814
+ "count": 130
1815
  },
1816
  "SS": {
1817
+ "accuracy": 0.9770114942528736,
1818
+ "count": 87
1819
  },
1820
  "UC": {
1821
+ "accuracy": 0.9556650246305419,
1822
+ "count": 203
1823
  },
1824
  "US": {
1825
  "accuracy": 1.0,
1826
+ "count": 117
1827
  }
1828
  }
1829
  },
1830
  "add_S3": {
1831
+ "full_accuracy": 0.64,
1832
+ "digit_accuracy": 0.9314285714285714,
1833
+ "n_examples": 100,
1834
  "per_subtask": {
1835
  "SA": {
1836
+ "accuracy": 0.9917355371900827,
1837
+ "count": 121
1838
  },
1839
  "SC": {
1840
+ "accuracy": 0.9752066115702479,
1841
+ "count": 121
1842
  },
1843
  "SS": {
1844
+ "accuracy": 1.0,
1845
+ "count": 49
1846
  },
1847
  "UC": {
1848
+ "accuracy": 0.8440860215053764,
1849
+ "count": 186
1850
  },
1851
  "US": {
1852
+ "accuracy": 0.9327354260089686,
1853
+ "count": 223
1854
  }
1855
  }
1856
  },
1857
  "add_S4": {
1858
+ "full_accuracy": 0.41,
1859
+ "digit_accuracy": 0.8485714285714285,
1860
+ "n_examples": 100,
1861
  "per_subtask": {
1862
  "SA": {
1863
  "accuracy": 1.0,
1864
+ "count": 104
1865
  },
1866
  "SC": {
1867
  "accuracy": 1.0,
1868
+ "count": 106
1869
  },
1870
  "SS": {
1871
  "accuracy": 1.0,
1872
+ "count": 23
1873
  },
1874
  "UC": {
1875
+ "accuracy": 0.7375,
1876
+ "count": 160
1877
  },
1878
  "US": {
1879
+ "accuracy": 0.7915309446254072,
1880
+ "count": 307
1881
  }
1882
  }
1883
  },
1884
  "add_S5": {
1885
+ "full_accuracy": 0.07,
1886
+ "digit_accuracy": 0.64,
1887
+ "n_examples": 100,
1888
  "per_subtask": {
1889
  "SA": {
1890
  "accuracy": 1.0,
1891
+ "count": 100
1892
  },
1893
  "SC": {
1894
  "accuracy": 1.0,
1895
+ "count": 100
1896
  },
1897
  "UC": {
1898
+ "accuracy": 0.3,
1899
+ "count": 100
1900
  },
1901
  "US": {
1902
+ "accuracy": 0.545,
1903
+ "count": 400
1904
  }
1905
  }
1906
  },
1907
  "add_S6": {
1908
+ "full_accuracy": 0.12,
1909
+ "digit_accuracy": 0.5871428571428572,
1910
+ "n_examples": 100,
1911
  "per_subtask": {
1912
  "SC": {
1913
  "accuracy": 1.0,
1914
+ "count": 100
1915
  },
1916
  "UC": {
1917
+ "accuracy": 0.3,
1918
+ "count": 100
1919
  },
1920
  "US": {
1921
+ "accuracy": 0.562,
1922
+ "count": 500
1923
  }
1924
  }
1925
  },
1926
  "add_random": {
1927
+ "full_accuracy": 0.93,
1928
+ "digit_accuracy": 0.9885714285714285,
1929
  "n_examples": 200,
1930
  "per_subtask": {
1931
  "SA": {
1932
+ "accuracy": 0.9955257270693513,
1933
+ "count": 447
1934
  },
1935
  "SC": {
1936
+ "accuracy": 0.99375,
1937
+ "count": 320
1938
  },
1939
  "SS": {
1940
+ "accuracy": 0.9821428571428571,
1941
+ "count": 56
1942
  },
1943
  "UC": {
1944
+ "accuracy": 0.9810964083175804,
1945
+ "count": 529
1946
  },
1947
  "US": {
1948
+ "accuracy": 0.9791666666666666,
1949
+ "count": 48
1950
  }
1951
  }
1952
  },
1953
  "add_C1": {
1954
+ "full_accuracy": 0.87,
1955
+ "digit_accuracy": 0.9814285714285714,
1956
+ "n_examples": 100,
1957
  "per_subtask": {
1958
  "SA": {
1959
  "accuracy": 1.0,
1960
+ "count": 500
1961
  },
1962
  "SC": {
1963
  "accuracy": 1.0,
1964
+ "count": 100
1965
  },
1966
  "UC": {
1967
+ "accuracy": 0.87,
1968
+ "count": 100
1969
  }
1970
  }
1971
  },
1972
  "add_C2": {
1973
+ "full_accuracy": 0.79,
1974
+ "digit_accuracy": 0.9685714285714285,
1975
+ "n_examples": 100,
1976
  "per_subtask": {
1977
  "SA": {
1978
  "accuracy": 1.0,
1979
+ "count": 400
1980
  },
1981
  "SC": {
1982
  "accuracy": 1.0,
1983
+ "count": 100
1984
  },
1985
  "UC": {
1986
+ "accuracy": 0.8653846153846154,
1987
+ "count": 156
1988
  },
1989
  "US": {
1990
+ "accuracy": 0.9772727272727273,
1991
+ "count": 44
1992
  }
1993
  }
1994
  },
1995
  "add_C3": {
1996
  "full_accuracy": 0.58,
1997
+ "digit_accuracy": 0.92,
1998
+ "n_examples": 100,
1999
  "per_subtask": {
2000
  "SA": {
2001
  "accuracy": 1.0,
2002
+ "count": 300
2003
  },
2004
  "SC": {
2005
  "accuracy": 1.0,
2006
+ "count": 100
2007
  },
2008
  "UC": {
2009
+ "accuracy": 0.7788944723618091,
2010
+ "count": 199
2011
  },
2012
  "US": {
2013
+ "accuracy": 0.8811881188118812,
2014
+ "count": 101
2015
  }
2016
  }
2017
  },
2018
  "add_C4": {
2019
+ "full_accuracy": 0.64,
2020
+ "digit_accuracy": 0.9214285714285714,
2021
+ "n_examples": 100,
2022
  "per_subtask": {
2023
  "SA": {
2024
  "accuracy": 1.0,
2025
+ "count": 200
2026
  },
2027
  "SC": {
2028
  "accuracy": 1.0,
2029
+ "count": 100
2030
  },
2031
  "UC": {
2032
+ "accuracy": 0.8636363636363636,
2033
+ "count": 264
2034
  },
2035
  "US": {
2036
+ "accuracy": 0.8602941176470589,
2037
+ "count": 136
2038
  }
2039
  }
2040
  },
2041
  "add_C5": {
2042
+ "full_accuracy": 0.59,
2043
+ "digit_accuracy": 0.9114285714285715,
2044
+ "n_examples": 100,
2045
  "per_subtask": {
2046
  "SA": {
2047
  "accuracy": 1.0,
2048
+ "count": 100
2049
  },
2050
  "SC": {
2051
  "accuracy": 1.0,
2052
+ "count": 100
2053
  },
2054
  "UC": {
2055
+ "accuracy": 0.8838709677419355,
2056
+ "count": 310
2057
  },
2058
  "US": {
2059
+ "accuracy": 0.8631578947368421,
2060
+ "count": 190
2061
  }
2062
  }
2063
  },
2064
  "add_C6": {
2065
+ "full_accuracy": 0.65,
2066
+ "digit_accuracy": 0.9014285714285715,
2067
+ "n_examples": 100,
2068
  "per_subtask": {
2069
  "SC": {
2070
  "accuracy": 1.0,
2071
+ "count": 100
2072
  },
2073
  "UC": {
2074
+ "accuracy": 0.8945945945945946,
2075
+ "count": 370
2076
  },
2077
  "US": {
2078
+ "accuracy": 0.8695652173913043,
2079
+ "count": 230
2080
  }
2081
  }
2082
  },
2083
  "sub_M0": {
2084
+ "full_accuracy": 0.93,
2085
+ "digit_accuracy": 0.99,
2086
+ "n_examples": 100,
2087
  "per_subtask": {
2088
  "MD": {
2089
+ "accuracy": 0.9886178861788618,
2090
+ "count": 615
2091
  },
2092
  "ME": {
2093
+ "accuracy": 1.0,
2094
+ "count": 85
2095
  }
2096
  }
2097
  },
2098
  "sub_M1": {
2099
+ "full_accuracy": 0.89,
2100
+ "digit_accuracy": 0.9842857142857143,
2101
+ "n_examples": 100,
2102
  "per_subtask": {
2103
  "MD": {
2104
+ "accuracy": 0.9965753424657534,
2105
+ "count": 292
2106
  },
2107
  "MB": {
2108
  "accuracy": 0.9861111111111112,
2109
+ "count": 144
2110
  },
2111
  "ME": {
2112
+ "accuracy": 1.0,
2113
+ "count": 25
2114
  },
2115
  "UB": {
2116
+ "accuracy": 0.9665271966527197,
2117
+ "count": 239
2118
  }
2119
  }
2120
  },
2121
  "sub_M2": {
2122
+ "full_accuracy": 0.7,
2123
+ "digit_accuracy": 0.9542857142857143,
2124
+ "n_examples": 100,
2125
  "per_subtask": {
2126
  "MD": {
2127
+ "accuracy": 0.995260663507109,
2128
+ "count": 211
2129
  },
2130
  "MB": {
2131
+ "accuracy": 0.9739130434782609,
2132
+ "count": 115
2133
  },
2134
  "ME": {
2135
+ "accuracy": 0.9882352941176471,
2136
+ "count": 85
2137
  },
2138
  "UB": {
2139
+ "accuracy": 0.850828729281768,
2140
+ "count": 181
2141
  },
2142
  "UD": {
2143
+ "accuracy": 1.0,
2144
+ "count": 108
2145
  }
2146
  }
2147
  },
2148
  "sub_M3": {
2149
+ "full_accuracy": 0.35,
2150
+ "digit_accuracy": 0.8571428571428571,
2151
+ "n_examples": 100,
2152
  "per_subtask": {
2153
  "MD": {
2154
+ "accuracy": 0.9888268156424581,
2155
+ "count": 179
2156
  },
2157
  "MB": {
2158
+ "accuracy": 0.970873786407767,
2159
+ "count": 103
2160
  },
2161
  "ME": {
2162
+ "accuracy": 1.0,
2163
+ "count": 56
2164
  },
2165
  "UB": {
2166
+ "accuracy": 0.610738255033557,
2167
+ "count": 149
2168
  },
2169
  "UD": {
2170
+ "accuracy": 0.8262910798122066,
2171
+ "count": 213
2172
  }
2173
  }
2174
  },
2175
  "sub_M4": {
2176
+ "full_accuracy": 0.23,
2177
+ "digit_accuracy": 0.77,
2178
+ "n_examples": 100,
2179
  "per_subtask": {
2180
  "MD": {
2181
  "accuracy": 1.0,
2182
+ "count": 200
2183
  },
2184
  "MB": {
2185
  "accuracy": 1.0,
2186
+ "count": 100
2187
  },
2188
  "UB": {
2189
+ "accuracy": 0.45,
2190
+ "count": 100
2191
  },
2192
  "UD": {
2193
+ "accuracy": 0.6466666666666666,
2194
+ "count": 300
2195
  }
2196
  }
2197
  },
2198
  "sub_M5": {
2199
+ "full_accuracy": 0.12,
2200
+ "digit_accuracy": 0.6314285714285715,
2201
+ "n_examples": 100,
2202
  "per_subtask": {
2203
  "MD": {
2204
  "accuracy": 1.0,
2205
+ "count": 100
2206
  },
2207
  "MB": {
2208
  "accuracy": 1.0,
2209
+ "count": 100
2210
  },
2211
  "UB": {
2212
+ "accuracy": 0.48,
2213
+ "count": 100
2214
  },
2215
  "UD": {
2216
+ "accuracy": 0.485,
2217
+ "count": 400
2218
  }
2219
  }
2220
  },
2221
  "sub_random": {
2222
+ "full_accuracy": 0.91,
2223
+ "digit_accuracy": 0.9864285714285714,
2224
  "n_examples": 200,
2225
  "per_subtask": {
2226
  "MD": {
2227
+ "accuracy": 0.9983333333333333,
2228
+ "count": 600
2229
  },
2230
  "MB": {
2231
+ "accuracy": 0.9850187265917603,
2232
+ "count": 267
2233
  },
2234
  "ME": {
2235
  "accuracy": 1.0,
2236
  "count": 53
2237
  },
2238
  "UB": {
2239
+ "accuracy": 0.9703872437357631,
2240
+ "count": 439
2241
  },
2242
  "UD": {
2243
+ "accuracy": 0.975609756097561,
2244
+ "count": 41
2245
  }
2246
  }
2247
  },
2248
  "sub_B3": {
2249
+ "full_accuracy": 0.6,
2250
+ "digit_accuracy": 0.9214285714285714,
2251
+ "n_examples": 100,
2252
  "per_subtask": {
2253
  "MD": {
2254
  "accuracy": 0.9933333333333333,
2255
+ "count": 300
2256
  },
2257
  "MB": {
2258
  "accuracy": 1.0,
2259
+ "count": 100
2260
  },
2261
  "UB": {
2262
+ "accuracy": 0.817258883248731,
2263
+ "count": 197
2264
  },
2265
  "UD": {
2266
+ "accuracy": 0.8349514563106796,
2267
+ "count": 103
2268
  }
2269
  }
2270
  },
2271
  "sub_B4": {
2272
+ "full_accuracy": 0.62,
2273
+ "digit_accuracy": 0.9171428571428571,
2274
+ "n_examples": 100,
2275
  "per_subtask": {
2276
  "MD": {
2277
+ "accuracy": 0.995,
2278
+ "count": 200
2279
  },
2280
  "MB": {
2281
+ "accuracy": 0.99,
2282
+ "count": 100
2283
  },
2284
  "UB": {
2285
+ "accuracy": 0.8704453441295547,
2286
+ "count": 247
2287
  },
2288
  "UD": {
2289
+ "accuracy": 0.8431372549019608,
2290
+ "count": 153
2291
  }
2292
  }
2293
  },
2294
  "sub_B5": {
2295
+ "full_accuracy": 0.54,
2296
+ "digit_accuracy": 0.8971428571428571,
2297
+ "n_examples": 100,
2298
  "per_subtask": {
2299
  "MD": {
2300
  "accuracy": 1.0,
2301
+ "count": 100
2302
  },
2303
  "MB": {
2304
  "accuracy": 1.0,
2305
+ "count": 100
2306
  },
2307
  "UB": {
2308
+ "accuracy": 0.8657718120805369,
2309
+ "count": 298
2310
  },
2311
  "UD": {
2312
+ "accuracy": 0.8415841584158416,
2313
+ "count": 202
2314
  }
2315
  }
2316
  }
2317
  },
2318
  "summary": {
2319
+ "overall_accuracy": 0.6407692307692308,
2320
+ "digit_accuracy": 0.900989010989011,
2321
+ "total_examples": 2600,
2322
  "n_splits": 24
2323
  }
2324
  },
2325
+ "sorl_overall_accuracy": 0.6407692307692308,
2326
+ "sft_overall_accuracy": 0.59
2327
  }
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76aa98787a44b36dd1e9fd52457c04af3c9ebd3f0bc0c822d63b94dee465388a
3
  size 634679036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abedcbc3a527a5ac60f33047ce6a4eb7c6124a4b9647aa1463c93dd8b722d329
3
  size 634679036
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/train_config.json CHANGED
@@ -69,16 +69,20 @@
69
  "no_wandb": false,
70
  "n_params": 158593426,
71
  "run_name": "add_sub_sorl_v1_abs10_K1_25K_1L3H510d",
72
- "git_commit": "57deaa28d9c21e39ddac5ef448d6e1be992fba91",
73
- "timestamp": "2026-04-13T09:55:13.754039+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
 
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "v1",
79
- "wandb_run_id": "u2cu9qs9",
80
- "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/u2cu9qs9",
81
- "final_accuracy": 0.5895833333333333,
82
- "sft_accuracy": 0.5416666666666666,
 
 
 
83
  "eval_method": "ArithmeticEvaluator"
84
  }
 
69
  "no_wandb": false,
70
  "n_params": 158593426,
71
  "run_name": "add_sub_sorl_v1_abs10_K1_25K_1L3H510d",
72
+ "git_commit": "f835493c19eb98267697007042c9d440cad2afbb",
73
+ "timestamp": "2026-04-15T21:12:50.528525+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
+ "train_dataset": "fixed_train/train_25K_seed42.pt",
78
  "model_repo": "thoughtworks/arithmetic-sorl",
79
  "trainer_version": "v1",
80
+ "wandb_run_id": "yh5kauv2",
81
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/yh5kauv2",
82
+ "eval_final_dataset": "eval_sets/eval_add_sub_6d_N100_seed42.json",
83
+ "eval_epoch_dataset": "eval_sets/eval_add_sub_6d_N25_seed42.json",
84
+ "eval_hf_repo": "thoughtworks/arithmetic-sorl-data",
85
+ "final_accuracy": 0.6407692307692308,
86
+ "sft_accuracy": 0.59,
87
  "eval_method": "ArithmeticEvaluator"
88
  }