amirali1985 commited on
Commit
459a6e3
·
verified ·
1 Parent(s): 7647fd5

Upload add_sub_baseline_50K

Browse files
add_sub_baseline_50K/metrics.json CHANGED
@@ -315,946 +315,946 @@
315
  15600
316
  ],
317
  "loss": [
318
- 11.5919189453125,
319
- 10.22839641571045,
320
- 8.68958568572998,
321
- 7.743628025054932,
322
- 7.083317279815674,
323
- 6.716556072235107,
324
- 6.151114463806152,
325
- 5.5387115478515625,
326
- 5.014580726623535,
327
- 4.768369197845459,
328
- 4.4252824783325195,
329
- 3.8769280910491943,
330
- 3.398421049118042,
331
- 2.7744126319885254,
332
- 2.505039691925049,
333
- 2.063697099685669,
334
- 1.9456151723861694,
335
- 1.927776575088501,
336
- 1.8299354314804077,
337
- 1.7789926528930664,
338
- 1.754504919052124,
339
- 1.6515535116195679,
340
- 1.4006385803222656,
341
- 1.2763017416000366,
342
- 0.943639874458313,
343
- 0.7492864727973938,
344
- 0.6557101011276245,
345
- 0.6082054972648621,
346
- 0.5593416094779968,
347
- 0.5054003000259399,
348
- 0.46518784761428833,
349
- 0.4824378490447998,
350
- 0.43475887179374695,
351
- 0.32205861806869507,
352
- 0.33166664838790894,
353
- 0.3009236752986908,
354
- 0.2400941401720047,
355
- 0.2248675525188446,
356
- 0.20954179763793945,
357
- 0.22428837418556213,
358
- 0.190036341547966,
359
- 0.1720418483018875,
360
- 0.21155861020088196,
361
- 0.18321479856967926,
362
- 0.14275167882442474,
363
- 0.1398281455039978,
364
- 0.14909550547599792,
365
- 0.13938067853450775,
366
- 0.11098699271678925,
367
- 0.12039850652217865,
368
- 0.12048500776290894,
369
- 0.13665030896663666,
370
- 0.11882513016462326,
371
- 0.09447120875120163,
372
- 0.12682169675827026,
373
- 0.12167376279830933,
374
- 0.09204739332199097,
375
- 0.08896524459123611,
376
- 0.10290355980396271,
377
- 0.13718438148498535,
378
- 0.08635856211185455,
379
- 0.07511933892965317,
380
- 0.08868758380413055,
381
- 0.10676174610853195,
382
- 0.09026769548654556,
383
- 0.07424621284008026,
384
- 0.05626862496137619,
385
- 0.08290921896696091,
386
- 0.054371219128370285,
387
- 0.0942576676607132,
388
- 0.05324546620249748,
389
- 0.08221446722745895,
390
- 0.06750369071960449,
391
- 0.06105080246925354,
392
- 0.05173609405755997,
393
- 0.04777436703443527,
394
- 0.045519717037677765,
395
- 0.07168582826852798,
396
- 0.054644014686346054,
397
- 0.03486175090074539,
398
- 0.07548998296260834,
399
- 0.09990178048610687,
400
- 0.04731791466474533,
401
- 0.041692253202199936,
402
- 0.01247780118137598,
403
- 0.041060905903577805,
404
- 0.027885200455784798,
405
- 0.04982740804553032,
406
- 0.050252627581357956,
407
- 0.024812696501612663,
408
- 0.05744260549545288,
409
- 0.02477910742163658,
410
- 0.02953481860458851,
411
- 0.04084356501698494,
412
- 0.03549288958311081,
413
- 0.05441907420754433,
414
- 0.05569736286997795,
415
- 0.04559105634689331,
416
- 0.027803650125861168,
417
- 0.030328769236803055,
418
- 0.02645830251276493,
419
- 0.021973788738250732,
420
- 0.05375004932284355,
421
- 0.047089457511901855,
422
- 0.04042302817106247,
423
- 0.031208833679556847,
424
- 0.029294179752469063,
425
- 0.032948557287454605,
426
- 0.016409551724791527,
427
- 0.026710275560617447,
428
- 0.023246869444847107,
429
- 0.022564152255654335,
430
- 0.02527036890387535,
431
- 0.04409774765372276,
432
- 0.02898082137107849,
433
- 0.02660987712442875,
434
- 0.03903169184923172,
435
- 0.01708422601222992,
436
- 0.02677464857697487,
437
- 0.026540687307715416,
438
- 0.05060029402375221,
439
- 0.025322888046503067,
440
- 0.03563829883933067,
441
- 0.02430422604084015,
442
- 0.017139693722128868,
443
- 0.03383349999785423,
444
- 0.03365055471658707,
445
- 0.03090469352900982,
446
- 0.0248873271048069,
447
- 0.00942988507449627,
448
- 0.0382811613380909,
449
- 0.01843721978366375,
450
- 0.047764234244823456,
451
- 0.011214124038815498,
452
- 0.013615109957754612,
453
- 0.006560151465237141,
454
- 0.03759806603193283,
455
- 0.0233162734657526,
456
- 0.014769653789699078,
457
- 0.016985494643449783,
458
- 0.013785802759230137,
459
- 0.007728015538305044,
460
- 0.025853386148810387,
461
- 0.03066740557551384,
462
- 0.02839043363928795,
463
- 0.026894863694906235,
464
- 0.0035975042264908552,
465
- 0.008512981235980988,
466
- 0.016585547477006912,
467
- 0.019298216328024864,
468
- 0.00979845691472292,
469
- 0.017035605385899544,
470
- 0.01915735937654972,
471
- 0.010648570954799652,
472
- 0.008926411159336567,
473
- 0.02289682626724243,
474
- 0.008877619169652462,
475
- 0.03562209755182266,
476
- 0.021168742328882217,
477
- 0.03884739428758621,
478
- 0.0040595331229269505,
479
- 0.032947879284620285,
480
- 0.016587775200605392,
481
- 0.031067589297890663,
482
- 0.013870512135326862,
483
- 0.017240136861801147,
484
- 0.013693884946405888,
485
- 0.012889769859611988,
486
- 0.012294416315853596,
487
- 0.006101136561483145,
488
- 0.007711687590926886,
489
- 0.016905682161450386,
490
- 0.021178143098950386,
491
- 0.009246415458619595,
492
- 0.023661259561777115,
493
- 0.017456870526075363,
494
- 0.011296899057924747,
495
- 0.010769916698336601,
496
- 0.01810161955654621,
497
- 0.012454019859433174,
498
- 0.024045029655098915,
499
- 0.011515917256474495,
500
- 0.015759386122226715,
501
- 0.008458860218524933,
502
- 0.015074710361659527,
503
- 0.013072207570075989,
504
- 0.020217230543494225,
505
- 0.017456945031881332,
506
- 0.007008220534771681,
507
- 0.010522151365876198,
508
- 0.04273875430226326,
509
- 0.007380801718682051,
510
- 0.007863359525799751,
511
- 0.022642916068434715,
512
- 0.005536367651075125,
513
- 0.011040055193006992,
514
- 0.02402535080909729,
515
- 0.03268098458647728,
516
- 0.007983776740729809,
517
- 0.0009367117309011519,
518
- 0.003367166267707944,
519
- 0.012411215342581272,
520
- 0.005119930952787399,
521
- 0.0009180908091366291,
522
- 0.01675061695277691,
523
- 0.010525167919695377,
524
- 0.0020072690676897764,
525
- 0.006876842118799686,
526
- 0.00596556905657053,
527
- 0.030373509973287582,
528
- 0.004591197706758976,
529
- 0.002322323154658079,
530
- 0.007020985241979361,
531
- 0.007853214628994465,
532
- 0.008730685338377953,
533
- 0.009200234897434711,
534
- 0.0027808325830847025,
535
- 0.0028766586910933256,
536
- 0.018433216959238052,
537
- 0.012993854470551014,
538
- 0.0012444989988580346,
539
- 0.00428613880649209,
540
- 0.009759555570781231,
541
- 0.0013757236301898956,
542
- 0.004756819922477007,
543
- 0.00030340059311129153,
544
- 0.0028560298960655928,
545
- 0.003237888216972351,
546
- 0.001639465568587184,
547
- 0.0027289309073239565,
548
- 0.008244549855589867,
549
- 0.0010426127118989825,
550
- 0.0010081265354529023,
551
- 0.003677833592519164,
552
- 0.002393932780250907,
553
- 0.0010960882063955069,
554
- 0.001997825223952532,
555
- 0.002053831471130252,
556
- 0.007450861390680075,
557
- 0.009496214799582958,
558
- 0.0037776045501232147,
559
- 0.0011938402894884348,
560
- 0.001780184917151928,
561
- 0.0031698753591626883,
562
- 0.0034218356013298035,
563
- 0.006750286091119051,
564
- 0.010689224116504192,
565
- 0.0021324653644114733,
566
- 0.003068158868700266,
567
- 0.005551895592361689,
568
- 0.0045491051860153675,
569
- 0.007758208084851503,
570
- 0.015876639634370804,
571
- 0.0027363928966224194,
572
- 0.0003699160588439554,
573
- 0.0012326623545959592,
574
- 0.0022686368320137262,
575
- 0.009696224704384804,
576
- 0.0006875990075059235,
577
- 0.007082183379679918,
578
- 0.012856023386120796,
579
- 0.003516225842759013,
580
- 0.0003926549688912928,
581
- 0.012320111505687237,
582
- 0.003966556396335363,
583
- 0.00729447603225708,
584
- 0.005913934204727411,
585
- 0.0009811369236558676,
586
- 0.001765951863490045,
587
- 0.0005217973375692964,
588
- 0.0009772671619430184,
589
- 0.0007188076269812882,
590
- 0.00019376876298338175,
591
- 0.014422808773815632,
592
- 0.0016550617292523384,
593
- 0.0031732916831970215,
594
- 0.0004921614890918136,
595
- 0.001544756582006812,
596
- 0.0014325276715680957,
597
- 0.001100499415770173,
598
- 0.00010810012463480234,
599
- 0.0007530619041062891,
600
- 0.00041833793511614203,
601
- 0.00023349902767222375,
602
- 0.0006965526263229549,
603
- 0.008410082198679447,
604
- 0.006447001360356808,
605
- 0.0002961367426905781,
606
- 0.0012474155519157648,
607
- 0.0024836647789925337,
608
- 0.0010210775071755052,
609
- 0.00011425733100622892,
610
- 0.0004679971025325358,
611
- 0.0003039550210814923,
612
- 0.0014614228857681155,
613
- 0.0010699779959395528,
614
- 0.0019428139785304666,
615
- 0.0011421445524320006,
616
- 0.00028592467424459755,
617
- 0.00010841414041351527,
618
- 0.0005279700853861868,
619
- 0.0015236141625791788,
620
- 0.0008620338630862534,
621
- 0.00045752586447633803,
622
- 0.004491482861340046,
623
- 0.000216478350921534,
624
- 0.0006986210937611759,
625
- 0.0001972295722225681,
626
- 0.0008027682197280228,
627
- 0.00018018601986113936,
628
- 0.00011814179742941633,
629
- 0.002507515251636505
630
  ],
631
  "base_loss": [
632
- 11.5919189453125,
633
- 10.22839641571045,
634
- 8.68958568572998,
635
- 7.743628025054932,
636
- 7.083317279815674,
637
- 6.716556072235107,
638
- 6.151114463806152,
639
- 5.5387115478515625,
640
- 5.014580726623535,
641
- 4.768369197845459,
642
- 4.4252824783325195,
643
- 3.8769280910491943,
644
- 3.398421049118042,
645
- 2.7744126319885254,
646
- 2.505039691925049,
647
- 2.063697099685669,
648
- 1.9456151723861694,
649
- 1.927776575088501,
650
- 1.8299354314804077,
651
- 1.7789926528930664,
652
- 1.754504919052124,
653
- 1.6515535116195679,
654
- 1.4006385803222656,
655
- 1.2763017416000366,
656
- 0.943639874458313,
657
- 0.7492864727973938,
658
- 0.6557101011276245,
659
- 0.6082054972648621,
660
- 0.5593416094779968,
661
- 0.5054003000259399,
662
- 0.46518784761428833,
663
- 0.4824378490447998,
664
- 0.43475887179374695,
665
- 0.32205861806869507,
666
- 0.33166664838790894,
667
- 0.3009236752986908,
668
- 0.2400941401720047,
669
- 0.2248675525188446,
670
- 0.20954179763793945,
671
- 0.22428837418556213,
672
- 0.190036341547966,
673
- 0.1720418483018875,
674
- 0.21155861020088196,
675
- 0.18321479856967926,
676
- 0.14275167882442474,
677
- 0.1398281455039978,
678
- 0.14909550547599792,
679
- 0.13938067853450775,
680
- 0.11098699271678925,
681
- 0.12039850652217865,
682
- 0.12048500776290894,
683
- 0.13665030896663666,
684
- 0.11882513016462326,
685
- 0.09447120875120163,
686
- 0.12682169675827026,
687
- 0.12167376279830933,
688
- 0.09204739332199097,
689
- 0.08896524459123611,
690
- 0.10290355980396271,
691
- 0.13718438148498535,
692
- 0.08635856211185455,
693
- 0.07511933892965317,
694
- 0.08868758380413055,
695
- 0.10676174610853195,
696
- 0.09026769548654556,
697
- 0.07424621284008026,
698
- 0.05626862496137619,
699
- 0.08290921896696091,
700
- 0.054371219128370285,
701
- 0.0942576676607132,
702
- 0.05324546620249748,
703
- 0.08221446722745895,
704
- 0.06750369071960449,
705
- 0.06105080246925354,
706
- 0.05173609405755997,
707
- 0.04777436703443527,
708
- 0.045519717037677765,
709
- 0.07168582826852798,
710
- 0.054644014686346054,
711
- 0.03486175090074539,
712
- 0.07548998296260834,
713
- 0.09990178048610687,
714
- 0.04731791466474533,
715
- 0.041692253202199936,
716
- 0.01247780118137598,
717
- 0.041060905903577805,
718
- 0.027885200455784798,
719
- 0.04982740804553032,
720
- 0.050252627581357956,
721
- 0.024812696501612663,
722
- 0.05744260549545288,
723
- 0.02477910742163658,
724
- 0.02953481860458851,
725
- 0.04084356501698494,
726
- 0.03549288958311081,
727
- 0.05441907420754433,
728
- 0.05569736286997795,
729
- 0.04559105634689331,
730
- 0.027803650125861168,
731
- 0.030328769236803055,
732
- 0.02645830251276493,
733
- 0.021973788738250732,
734
- 0.05375004932284355,
735
- 0.047089457511901855,
736
- 0.04042302817106247,
737
- 0.031208833679556847,
738
- 0.029294179752469063,
739
- 0.032948557287454605,
740
- 0.016409551724791527,
741
- 0.026710275560617447,
742
- 0.023246869444847107,
743
- 0.022564152255654335,
744
- 0.02527036890387535,
745
- 0.04409774765372276,
746
- 0.02898082137107849,
747
- 0.02660987712442875,
748
- 0.03903169184923172,
749
- 0.01708422601222992,
750
- 0.02677464857697487,
751
- 0.026540687307715416,
752
- 0.05060029402375221,
753
- 0.025322888046503067,
754
- 0.03563829883933067,
755
- 0.02430422604084015,
756
- 0.017139693722128868,
757
- 0.03383349999785423,
758
- 0.03365055471658707,
759
- 0.03090469352900982,
760
- 0.0248873271048069,
761
- 0.00942988507449627,
762
- 0.0382811613380909,
763
- 0.01843721978366375,
764
- 0.047764234244823456,
765
- 0.011214124038815498,
766
- 0.013615109957754612,
767
- 0.006560151465237141,
768
- 0.03759806603193283,
769
- 0.0233162734657526,
770
- 0.014769653789699078,
771
- 0.016985494643449783,
772
- 0.013785802759230137,
773
- 0.007728015538305044,
774
- 0.025853386148810387,
775
- 0.03066740557551384,
776
- 0.02839043363928795,
777
- 0.026894863694906235,
778
- 0.0035975042264908552,
779
- 0.008512981235980988,
780
- 0.016585547477006912,
781
- 0.019298216328024864,
782
- 0.00979845691472292,
783
- 0.017035605385899544,
784
- 0.01915735937654972,
785
- 0.010648570954799652,
786
- 0.008926411159336567,
787
- 0.02289682626724243,
788
- 0.008877619169652462,
789
- 0.03562209755182266,
790
- 0.021168742328882217,
791
- 0.03884739428758621,
792
- 0.0040595331229269505,
793
- 0.032947879284620285,
794
- 0.016587775200605392,
795
- 0.031067589297890663,
796
- 0.013870512135326862,
797
- 0.017240136861801147,
798
- 0.013693884946405888,
799
- 0.012889769859611988,
800
- 0.012294416315853596,
801
- 0.006101136561483145,
802
- 0.007711687590926886,
803
- 0.016905682161450386,
804
- 0.021178143098950386,
805
- 0.009246415458619595,
806
- 0.023661259561777115,
807
- 0.017456870526075363,
808
- 0.011296899057924747,
809
- 0.010769916698336601,
810
- 0.01810161955654621,
811
- 0.012454019859433174,
812
- 0.024045029655098915,
813
- 0.011515917256474495,
814
- 0.015759386122226715,
815
- 0.008458860218524933,
816
- 0.015074710361659527,
817
- 0.013072207570075989,
818
- 0.020217230543494225,
819
- 0.017456945031881332,
820
- 0.007008220534771681,
821
- 0.010522151365876198,
822
- 0.04273875430226326,
823
- 0.007380801718682051,
824
- 0.007863359525799751,
825
- 0.022642916068434715,
826
- 0.005536367651075125,
827
- 0.011040055193006992,
828
- 0.02402535080909729,
829
- 0.03268098458647728,
830
- 0.007983776740729809,
831
- 0.0009367117309011519,
832
- 0.003367166267707944,
833
- 0.012411215342581272,
834
- 0.005119930952787399,
835
- 0.0009180908091366291,
836
- 0.01675061695277691,
837
- 0.010525167919695377,
838
- 0.0020072690676897764,
839
- 0.006876842118799686,
840
- 0.00596556905657053,
841
- 0.030373509973287582,
842
- 0.004591197706758976,
843
- 0.002322323154658079,
844
- 0.007020985241979361,
845
- 0.007853214628994465,
846
- 0.008730685338377953,
847
- 0.009200234897434711,
848
- 0.0027808325830847025,
849
- 0.0028766586910933256,
850
- 0.018433216959238052,
851
- 0.012993854470551014,
852
- 0.0012444989988580346,
853
- 0.00428613880649209,
854
- 0.009759555570781231,
855
- 0.0013757236301898956,
856
- 0.004756819922477007,
857
- 0.00030340059311129153,
858
- 0.0028560298960655928,
859
- 0.003237888216972351,
860
- 0.001639465568587184,
861
- 0.0027289309073239565,
862
- 0.008244549855589867,
863
- 0.0010426127118989825,
864
- 0.0010081265354529023,
865
- 0.003677833592519164,
866
- 0.002393932780250907,
867
- 0.0010960882063955069,
868
- 0.001997825223952532,
869
- 0.002053831471130252,
870
- 0.007450861390680075,
871
- 0.009496214799582958,
872
- 0.0037776045501232147,
873
- 0.0011938402894884348,
874
- 0.001780184917151928,
875
- 0.0031698753591626883,
876
- 0.0034218356013298035,
877
- 0.006750286091119051,
878
- 0.010689224116504192,
879
- 0.0021324653644114733,
880
- 0.003068158868700266,
881
- 0.005551895592361689,
882
- 0.0045491051860153675,
883
- 0.007758208084851503,
884
- 0.015876639634370804,
885
- 0.0027363928966224194,
886
- 0.0003699160588439554,
887
- 0.0012326623545959592,
888
- 0.0022686368320137262,
889
- 0.009696224704384804,
890
- 0.0006875990075059235,
891
- 0.007082183379679918,
892
- 0.012856023386120796,
893
- 0.003516225842759013,
894
- 0.0003926549688912928,
895
- 0.012320111505687237,
896
- 0.003966556396335363,
897
- 0.00729447603225708,
898
- 0.005913934204727411,
899
- 0.0009811369236558676,
900
- 0.001765951863490045,
901
- 0.0005217973375692964,
902
- 0.0009772671619430184,
903
- 0.0007188076269812882,
904
- 0.00019376876298338175,
905
- 0.014422808773815632,
906
- 0.0016550617292523384,
907
- 0.0031732916831970215,
908
- 0.0004921614890918136,
909
- 0.001544756582006812,
910
- 0.0014325276715680957,
911
- 0.001100499415770173,
912
- 0.00010810012463480234,
913
- 0.0007530619041062891,
914
- 0.00041833793511614203,
915
- 0.00023349902767222375,
916
- 0.0006965526263229549,
917
- 0.008410082198679447,
918
- 0.006447001360356808,
919
- 0.0002961367426905781,
920
- 0.0012474155519157648,
921
- 0.0024836647789925337,
922
- 0.0010210775071755052,
923
- 0.00011425733100622892,
924
- 0.0004679971025325358,
925
- 0.0003039550210814923,
926
- 0.0014614228857681155,
927
- 0.0010699779959395528,
928
- 0.0019428139785304666,
929
- 0.0011421445524320006,
930
- 0.00028592467424459755,
931
- 0.00010841414041351527,
932
- 0.0005279700853861868,
933
- 0.0015236141625791788,
934
- 0.0008620338630862534,
935
- 0.00045752586447633803,
936
- 0.004491482861340046,
937
- 0.000216478350921534,
938
- 0.0006986210937611759,
939
- 0.0001972295722225681,
940
- 0.0008027682197280228,
941
- 0.00018018601986113936,
942
- 0.00011814179742941633,
943
- 0.002507515251636505
944
  ],
945
  "lr": [
946
- 1.2531969309462919e-06,
947
- 2.5319693094629154e-06,
948
- 3.81074168797954e-06,
949
- 5.089514066496164e-06,
950
- 6.368286445012788e-06,
951
- 7.647058823529411e-06,
952
- 8.925831202046037e-06,
953
- 1.020460358056266e-05,
954
- 1.1483375959079285e-05,
955
- 1.2762148337595909e-05,
956
- 1.4040920716112532e-05,
957
- 1.5319693094629158e-05,
958
- 1.6598465473145782e-05,
959
- 1.7877237851662406e-05,
960
- 1.915601023017903e-05,
961
- 2.0434782608695654e-05,
962
- 2.1713554987212278e-05,
963
- 2.2992327365728905e-05,
964
- 2.4271099744245526e-05,
965
- 2.554987212276215e-05,
966
- 2.6828644501278773e-05,
967
- 2.81074168797954e-05,
968
- 2.938618925831202e-05,
969
- 3.066496163682865e-05,
970
- 3.194373401534527e-05,
971
- 3.3222506393861896e-05,
972
- 3.450127877237852e-05,
973
- 3.5780051150895144e-05,
974
- 3.705882352941177e-05,
975
- 3.833759590792839e-05,
976
- 3.9616368286445016e-05,
977
- 4.089514066496164e-05,
978
- 4.2173913043478264e-05,
979
- 4.345268542199489e-05,
980
- 4.473145780051151e-05,
981
- 4.601023017902814e-05,
982
- 4.7289002557544766e-05,
983
- 4.8567774936061384e-05,
984
- 4.984654731457801e-05,
985
- 5.112531969309463e-05,
986
- 5.2404092071611255e-05,
987
- 5.368286445012788e-05,
988
- 5.496163682864451e-05,
989
- 5.6240409207161134e-05,
990
- 5.751918158567776e-05,
991
- 5.879795396419438e-05,
992
- 6.0076726342711e-05,
993
- 6.135549872122763e-05,
994
- 6.263427109974425e-05,
995
- 6.391304347826088e-05,
996
- 6.51918158567775e-05,
997
- 6.647058823529413e-05,
998
- 6.774936061381075e-05,
999
- 6.902813299232737e-05,
1000
- 7.0306905370844e-05,
1001
- 7.158567774936062e-05,
1002
- 7.286445012787725e-05,
1003
- 7.414322250639387e-05,
1004
- 7.542199488491049e-05,
1005
- 7.670076726342712e-05,
1006
- 7.797953964194374e-05,
1007
- 7.925831202046036e-05,
1008
- 7.999944394999003e-05,
1009
- 7.999364403546566e-05,
1010
- 7.998154076901258e-05,
1011
- 7.996313605821262e-05,
1012
- 7.993843280381094e-05,
1013
- 7.990743489925885e-05,
1014
- 7.987014723010021e-05,
1015
- 7.982657567320137e-05,
1016
- 7.977672709582497e-05,
1017
- 7.972060935454756e-05,
1018
- 7.965823129402139e-05,
1019
- 7.958960274558032e-05,
1020
- 7.951473452569042e-05,
1021
- 7.943363843424515e-05,
1022
- 7.934632725270557e-05,
1023
- 7.925281474208592e-05,
1024
- 7.915311564078471e-05,
1025
- 7.904724566226186e-05,
1026
- 7.89352214925621e-05,
1027
- 7.88170607876851e-05,
1028
- 7.869278217080276e-05,
1029
- 7.856240522932396e-05,
1030
- 7.842595051180752e-05,
1031
- 7.828343952472344e-05,
1032
- 7.813489472906339e-05,
1033
- 7.798033953680063e-05,
1034
- 7.781979830720006e-05,
1035
- 7.765329634297896e-05,
1036
- 7.748085988631916e-05,
1037
- 7.73025161147309e-05,
1038
- 7.711829313676953e-05,
1039
- 7.692821998760531e-05,
1040
- 7.673232662444721e-05,
1041
- 7.653064392182133e-05,
1042
- 7.632320366670496e-05,
1043
- 7.611003855351649e-05,
1044
- 7.589118217896258e-05,
1045
- 7.566666903674304e-05,
1046
- 7.543653451211426e-05,
1047
- 7.520081487631215e-05,
1048
- 7.495954728083564e-05,
1049
- 7.471276975159105e-05,
1050
- 7.446052118289905e-05,
1051
- 7.420284133136447e-05,
1052
- 7.393977080961035e-05,
1053
- 7.367135107987701e-05,
1054
- 7.339762444748726e-05,
1055
- 7.311863405417871e-05,
1056
- 7.283442387130419e-05,
1057
- 7.254503869290155e-05,
1058
- 7.225052412863366e-05,
1059
- 7.195092659660004e-05,
1060
- 7.164629331602078e-05,
1061
- 7.13366722997945e-05,
1062
- 7.102211234693107e-05,
1063
- 7.070266303486044e-05,
1064
- 7.03783747116188e-05,
1065
- 7.00492984879133e-05,
1066
- 6.971548622906651e-05,
1067
- 6.937699054684207e-05,
1068
- 6.903386479115247e-05,
1069
- 6.86861630416508e-05,
1070
- 6.833394009920719e-05,
1071
- 6.797725147727176e-05,
1072
- 6.761615339312524e-05,
1073
- 6.725070275901855e-05,
1074
- 6.688095717320297e-05,
1075
- 6.650697491085218e-05,
1076
- 6.612881491487751e-05,
1077
- 6.574653678663806e-05,
1078
- 6.536020077654701e-05,
1079
- 6.496986777457557e-05,
1080
- 6.457559930065623e-05,
1081
- 6.417745749498663e-05,
1082
- 6.377550510823579e-05,
1083
- 6.336980549165394e-05,
1084
- 6.296042258708793e-05,
1085
- 6.254742091690332e-05,
1086
- 6.213086557381514e-05,
1087
- 6.171082221062871e-05,
1088
- 6.128735702989213e-05,
1089
- 6.08605367734622e-05,
1090
- 6.043042871198537e-05,
1091
- 5.9997100634295156e-05,
1092
- 5.956062083672817e-05,
1093
- 5.9121058112359893e-05,
1094
- 5.8678481740162386e-05,
1095
- 5.823296147408519e-05,
1096
- 5.77845675320616e-05,
1097
- 5.733337058494163e-05,
1098
- 5.687944174535368e-05,
1099
- 5.642285255649657e-05,
1100
- 5.596367498086368e-05,
1101
- 5.5501981388901035e-05,
1102
- 5.503784454760108e-05,
1103
- 5.4571337609033947e-05,
1104
- 5.410253409881808e-05,
1105
- 5.363150790453191e-05,
1106
- 5.315833326406852e-05,
1107
- 5.268308475393511e-05,
1108
- 5.2205837277499095e-05,
1109
- 5.1726666053182625e-05,
1110
- 5.124564660260756e-05,
1111
- 5.0762854738692536e-05,
1112
- 5.0278366553704214e-05,
1113
- 4.979225840726445e-05,
1114
- 4.9304606914315355e-05,
1115
- 4.8815488933044075e-05,
1116
- 4.832498155276932e-05,
1117
- 4.783316208179134e-05,
1118
- 4.734010803520749e-05,
1119
- 4.6845897122695165e-05,
1120
- 4.635060723626403e-05,
1121
- 4.585431643797962e-05,
1122
- 4.5357102947659986e-05,
1123
- 4.485904513054766e-05,
1124
- 4.4360221484958475e-05,
1125
- 4.3860710629909643e-05,
1126
- 4.336059129272866e-05,
1127
- 4.2859942296645196e-05,
1128
- 4.23588425483678e-05,
1129
- 4.1857371025647626e-05,
1130
- 4.135560676483069e-05,
1131
- 4.0853628848401185e-05,
1132
- 4.035151639251729e-05,
1133
- 3.984934853454177e-05,
1134
- 3.934720442056928e-05,
1135
- 3.8845163192952226e-05,
1136
- 3.834330397782716e-05,
1137
- 3.7841705872643884e-05,
1138
- 3.734044793369901e-05,
1139
- 3.683960916367585e-05,
1140
- 3.633926849919306e-05,
1141
- 3.583950479836345e-05,
1142
- 3.534039682836521e-05,
1143
- 3.48420232530276e-05,
1144
- 3.434446262043278e-05,
1145
- 3.384779335053595e-05,
1146
- 3.335209372280563e-05,
1147
- 3.2857441863886186e-05,
1148
- 3.23639157352844e-05,
1149
- 3.18715931210819e-05,
1150
- 3.138055161567589e-05,
1151
- 3.0890868611549485e-05,
1152
- 3.0402621287073947e-05,
1153
- 2.991588659434476e-05,
1154
- 2.943074124705329e-05,
1155
- 2.894726170839595e-05,
1156
- 2.8465524179023035e-05,
1157
- 2.798560458502876e-05,
1158
- 2.7507578565984655e-05,
1159
- 2.7031521463018056e-05,
1160
- 2.6557508306937763e-05,
1161
- 2.608561380640848e-05,
1162
- 2.5615912336175992e-05,
1163
- 2.5148477925345197e-05,
1164
- 2.4683384245712353e-05,
1165
- 2.422070460015378e-05,
1166
- 2.3760511911072684e-05,
1167
- 2.3302878708905953e-05,
1168
- 2.2847877120692683e-05,
1169
- 2.2395578858706372e-05,
1170
- 2.1946055209152374e-05,
1171
- 2.1499377020932627e-05,
1172
- 2.1055614694479143e-05,
1173
- 2.061483817065839e-05,
1174
- 2.0177116919747935e-05,
1175
- 1.9742519930487273e-05,
1176
- 1.931111569920469e-05,
1177
- 1.8882972219021584e-05,
1178
- 1.8458156969136093e-05,
1179
- 1.8036736904187883e-05,
1180
- 1.7618778443705458e-05,
1181
- 1.7204347461637797e-05,
1182
- 1.679350927597214e-05,
1183
- 1.638632863843922e-05,
1184
- 1.598286972430788e-05,
1185
- 1.5583196122270384e-05,
1186
- 1.5187370824420397e-05,
1187
- 1.4795456216324805e-05,
1188
- 1.4407514067191177e-05,
1189
- 1.4023605520132458e-05,
1190
- 1.3643791082530244e-05,
1191
- 1.3268130616498244e-05,
1192
- 1.2896683329447552e-05,
1193
- 1.2529507764755003e-05,
1194
- 1.2166661792536174e-05,
1195
- 1.1808202600524639e-05,
1196
- 1.1454186685058585e-05,
1197
- 1.110466984217657e-05,
1198
- 1.0759707158823462e-05,
1199
- 1.0419353004168382e-05,
1200
- 1.0083661021035568e-05,
1201
- 9.752684117449767e-06,
1202
- 9.426474458297528e-06,
1203
- 9.105083457105542e-06,
1204
- 8.788561767937342e-06,
1205
- 8.476959277409883e-06,
1206
- 8.170325096830898e-06,
1207
- 7.868707554458552e-06,
1208
- 7.572154187884444e-06,
1209
- 7.280711736541315e-06,
1210
- 6.994426134336478e-06,
1211
- 6.7133425024121925e-06,
1212
- 6.437505142034237e-06,
1213
- 6.166957527609598e-06,
1214
- 5.901742299834516e-06,
1215
- 5.641901258973956e-06,
1216
- 5.387475358273513e-06,
1217
- 5.138504697504787e-06,
1218
- 4.895028516645353e-06,
1219
- 4.6570851896941615e-06,
1220
- 4.424712218623466e-06,
1221
- 4.197946227468168e-06,
1222
- 3.976822956553572e-06,
1223
- 3.7613772568623865e-06,
1224
- 3.5516430845418517e-06,
1225
- 3.347653495552039e-06,
1226
- 3.1494406404558807e-06,
1227
- 2.9570357593519473e-06,
1228
- 2.7704691769507674e-06,
1229
- 2.589770297795391e-06,
1230
- 2.414967601626912e-06,
1231
- 2.246088638895887e-06,
1232
- 2.0831600264200837e-06,
1233
- 1.926207443189476e-06,
1234
- 1.775255626318959e-06,
1235
- 1.6303283671496163e-06,
1236
- 1.4914485074989559e-06,
1237
- 1.3586379360608449e-06,
1238
- 1.2319175849556754e-06,
1239
- 1.111307426431263e-06,
1240
- 9.968264697150266e-07,
1241
- 8.884927580180113e-07,
1242
- 7.863233656910796e-07,
1243
- 6.903343955338538e-07,
1244
- 6.005409762567738e-07,
1245
- 5.169572600966888e-07,
1246
- 4.395964205863212e-07,
1247
- 3.684706504780078e-07,
1248
- 3.0359115982202314e-07,
1249
- 2.449681741997667e-07,
1250
- 1.926109331121273e-07,
1251
- 1.4652768852324362e-07,
1252
- 1.0672570355994006e-07,
1253
- 7.32112513669847e-08,
1254
- 4.598961411837355e-08,
1255
- 2.5065082184840826e-08,
1256
- 1.0440953457631075e-08,
1257
- 2.119532828745996e-09
1258
  ],
1259
  "eval_step": [
1260
  781,
@@ -1301,189 +1301,189 @@
1301
  20
1302
  ],
1303
  "eval_accuracy": [
1304
- 0.005714285714285714,
1305
- 0.2042857142857143,
1306
- 0.5407142857142857,
1307
- 0.6728571428571428,
1308
- 0.7278571428571429,
1309
- 0.7321428571428571,
1310
- 0.7878571428571428,
1311
- 0.8514285714285714,
1312
- 0.865,
1313
- 0.8557142857142858,
1314
- 0.8678571428571429,
1315
- 0.9171428571428571,
1316
- 0.9,
1317
- 0.9235714285714286,
1318
- 0.925,
1319
- 0.9557142857142857,
1320
- 0.965,
1321
- 0.9864285714285714,
1322
- 0.9842857142857143,
1323
- 0.9914285714285714
1324
  ]
1325
  },
1326
- "final_accuracy": 0.9914285714285714,
1327
  "sft_eval": {
1328
  "config": {
1329
  "ops": "add_sub",
1330
  "K": null,
1331
  "mode": "sft",
1332
  "n_digits": 6,
1333
- "n_per_split": 50
1334
  },
1335
  "splits": {
1336
  "add_S0": {
1337
  "full_accuracy": 1.0,
1338
- "n_examples": 50,
1339
  "per_subtask": {
1340
  "SA": {
1341
  "accuracy": 1.0,
1342
- "count": 295
1343
  },
1344
  "SS": {
1345
  "accuracy": 1.0,
1346
- "count": 55
1347
  }
1348
  }
1349
  },
1350
  "add_S1": {
1351
  "full_accuracy": 1.0,
1352
- "n_examples": 50,
1353
  "per_subtask": {
1354
  "SA": {
1355
  "accuracy": 1.0,
1356
- "count": 126
1357
  },
1358
  "SC": {
1359
  "accuracy": 1.0,
1360
- "count": 79
1361
  },
1362
  "SS": {
1363
  "accuracy": 1.0,
1364
- "count": 21
1365
  },
1366
  "UC": {
1367
  "accuracy": 1.0,
1368
- "count": 124
1369
  }
1370
  }
1371
  },
1372
  "add_S2": {
1373
  "full_accuracy": 1.0,
1374
- "n_examples": 50,
1375
  "per_subtask": {
1376
  "SA": {
1377
  "accuracy": 1.0,
1378
- "count": 75
1379
  },
1380
  "SC": {
1381
  "accuracy": 1.0,
1382
- "count": 62
1383
  },
1384
  "SS": {
1385
  "accuracy": 1.0,
1386
- "count": 39
1387
  },
1388
  "UC": {
1389
  "accuracy": 1.0,
1390
- "count": 111
1391
  },
1392
  "US": {
1393
  "accuracy": 1.0,
1394
- "count": 63
1395
  }
1396
  }
1397
  },
1398
  "add_S3": {
1399
  "full_accuracy": 1.0,
1400
- "n_examples": 50,
1401
  "per_subtask": {
1402
  "SA": {
1403
  "accuracy": 1.0,
1404
- "count": 60
1405
  },
1406
  "SC": {
1407
  "accuracy": 1.0,
1408
- "count": 57
1409
  },
1410
  "SS": {
1411
  "accuracy": 1.0,
1412
- "count": 19
1413
  },
1414
  "UC": {
1415
  "accuracy": 1.0,
1416
- "count": 104
1417
  },
1418
  "US": {
1419
  "accuracy": 1.0,
1420
- "count": 110
1421
  }
1422
  }
1423
  },
1424
  "add_S4": {
1425
  "full_accuracy": 1.0,
1426
- "n_examples": 50,
1427
  "per_subtask": {
1428
  "SA": {
1429
  "accuracy": 1.0,
1430
- "count": 48
1431
  },
1432
  "SC": {
1433
  "accuracy": 1.0,
1434
- "count": 52
1435
  },
1436
  "SS": {
1437
  "accuracy": 1.0,
1438
- "count": 7
1439
  },
1440
  "UC": {
1441
  "accuracy": 1.0,
1442
- "count": 89
1443
  },
1444
  "US": {
1445
  "accuracy": 1.0,
1446
- "count": 154
1447
  }
1448
  }
1449
  },
1450
  "add_S5": {
1451
- "full_accuracy": 0.92,
1452
- "n_examples": 50,
1453
  "per_subtask": {
1454
  "SA": {
1455
  "accuracy": 1.0,
1456
- "count": 50
1457
  },
1458
  "SC": {
1459
  "accuracy": 1.0,
1460
- "count": 50
1461
  },
1462
  "UC": {
1463
- "accuracy": 0.94,
1464
- "count": 50
1465
  },
1466
  "US": {
1467
- "accuracy": 0.99,
1468
- "count": 200
1469
  }
1470
  }
1471
  },
1472
  "add_S6": {
1473
- "full_accuracy": 0.96,
1474
- "n_examples": 50,
1475
  "per_subtask": {
1476
  "SC": {
1477
  "accuracy": 1.0,
1478
- "count": 50
1479
  },
1480
  "UC": {
1481
- "accuracy": 0.96,
1482
- "count": 50
1483
  },
1484
  "US": {
1485
- "accuracy": 0.992,
1486
- "count": 250
1487
  }
1488
  }
1489
  },
@@ -1493,239 +1493,239 @@
1493
  "per_subtask": {
1494
  "SA": {
1495
  "accuracy": 1.0,
1496
- "count": 431
1497
  },
1498
  "SC": {
1499
  "accuracy": 1.0,
1500
- "count": 316
1501
  },
1502
  "SS": {
1503
  "accuracy": 1.0,
1504
- "count": 39
1505
  },
1506
  "UC": {
1507
  "accuracy": 1.0,
1508
- "count": 560
1509
  },
1510
  "US": {
1511
  "accuracy": 1.0,
1512
- "count": 54
1513
  }
1514
  }
1515
  },
1516
  "add_C3": {
1517
  "full_accuracy": 1.0,
1518
- "n_examples": 50,
1519
  "per_subtask": {
1520
  "SA": {
1521
  "accuracy": 1.0,
1522
- "count": 150
1523
  },
1524
  "SC": {
1525
  "accuracy": 1.0,
1526
- "count": 50
1527
  },
1528
  "UC": {
1529
  "accuracy": 1.0,
1530
- "count": 104
1531
  },
1532
  "US": {
1533
  "accuracy": 1.0,
1534
- "count": 46
1535
  }
1536
  }
1537
  },
1538
  "add_C4": {
1539
  "full_accuracy": 1.0,
1540
- "n_examples": 50,
1541
  "per_subtask": {
1542
  "SA": {
1543
  "accuracy": 1.0,
1544
- "count": 100
1545
  },
1546
  "SC": {
1547
  "accuracy": 1.0,
1548
- "count": 50
1549
  },
1550
  "UC": {
1551
  "accuracy": 1.0,
1552
- "count": 123
1553
  },
1554
  "US": {
1555
  "accuracy": 1.0,
1556
- "count": 77
1557
  }
1558
  }
1559
  },
1560
  "add_C5": {
1561
- "full_accuracy": 0.98,
1562
- "n_examples": 50,
1563
  "per_subtask": {
1564
  "SA": {
1565
  "accuracy": 1.0,
1566
- "count": 50
1567
  },
1568
  "SC": {
1569
  "accuracy": 1.0,
1570
- "count": 50
1571
  },
1572
  "UC": {
1573
- "accuracy": 0.9935064935064936,
1574
- "count": 154
1575
  },
1576
  "US": {
1577
  "accuracy": 1.0,
1578
- "count": 96
1579
  }
1580
  }
1581
  },
1582
  "add_C6": {
1583
  "full_accuracy": 1.0,
1584
- "n_examples": 50,
1585
  "per_subtask": {
1586
  "SC": {
1587
  "accuracy": 1.0,
1588
- "count": 50
1589
  },
1590
  "UC": {
1591
  "accuracy": 1.0,
1592
- "count": 182
1593
  },
1594
  "US": {
1595
  "accuracy": 1.0,
1596
- "count": 118
1597
  }
1598
  }
1599
  },
1600
  "sub_M0": {
1601
  "full_accuracy": 1.0,
1602
- "n_examples": 50,
1603
  "per_subtask": {
1604
  "MD": {
1605
  "accuracy": 1.0,
1606
- "count": 294
1607
  },
1608
  "ME": {
1609
  "accuracy": 1.0,
1610
- "count": 56
1611
  }
1612
  }
1613
  },
1614
  "sub_M1": {
1615
  "full_accuracy": 1.0,
1616
- "n_examples": 50,
1617
  "per_subtask": {
1618
  "MD": {
1619
  "accuracy": 1.0,
1620
- "count": 143
1621
  },
1622
  "MB": {
1623
  "accuracy": 1.0,
1624
- "count": 69
1625
  },
1626
  "ME": {
1627
  "accuracy": 1.0,
1628
- "count": 15
1629
  },
1630
  "UB": {
1631
  "accuracy": 1.0,
1632
- "count": 123
1633
  }
1634
  }
1635
  },
1636
  "sub_M2": {
1637
  "full_accuracy": 1.0,
1638
- "n_examples": 50,
1639
  "per_subtask": {
1640
  "MD": {
1641
  "accuracy": 1.0,
1642
- "count": 108
1643
  },
1644
  "MB": {
1645
  "accuracy": 1.0,
1646
- "count": 52
1647
  },
1648
  "ME": {
1649
  "accuracy": 1.0,
1650
- "count": 52
1651
  },
1652
  "UB": {
1653
  "accuracy": 1.0,
1654
- "count": 87
1655
  },
1656
  "UD": {
1657
  "accuracy": 1.0,
1658
- "count": 51
1659
  }
1660
  }
1661
  },
1662
  "sub_M3": {
1663
  "full_accuracy": 1.0,
1664
- "n_examples": 50,
1665
  "per_subtask": {
1666
  "MD": {
1667
  "accuracy": 1.0,
1668
- "count": 94
1669
  },
1670
  "MB": {
1671
  "accuracy": 1.0,
1672
- "count": 51
1673
  },
1674
  "ME": {
1675
  "accuracy": 1.0,
1676
- "count": 25
1677
  },
1678
  "UB": {
1679
  "accuracy": 1.0,
1680
- "count": 78
1681
  },
1682
  "UD": {
1683
  "accuracy": 1.0,
1684
- "count": 102
1685
  }
1686
  }
1687
  },
1688
  "sub_M4": {
1689
- "full_accuracy": 0.94,
1690
- "n_examples": 50,
1691
  "per_subtask": {
1692
  "MD": {
1693
  "accuracy": 1.0,
1694
- "count": 100
1695
  },
1696
  "MB": {
1697
  "accuracy": 1.0,
1698
- "count": 50
1699
  },
1700
  "UB": {
1701
- "accuracy": 0.94,
1702
- "count": 50
1703
  },
1704
  "UD": {
1705
  "accuracy": 1.0,
1706
- "count": 150
1707
  }
1708
  }
1709
  },
1710
  "sub_M5": {
1711
- "full_accuracy": 0.96,
1712
- "n_examples": 50,
1713
  "per_subtask": {
1714
  "MD": {
1715
  "accuracy": 1.0,
1716
- "count": 50
1717
  },
1718
  "MB": {
1719
  "accuracy": 1.0,
1720
- "count": 50
1721
  },
1722
  "UB": {
1723
  "accuracy": 1.0,
1724
- "count": 50
1725
  },
1726
  "UD": {
1727
- "accuracy": 0.99,
1728
- "count": 200
1729
  }
1730
  }
1731
  },
@@ -1735,96 +1735,96 @@
1735
  "per_subtask": {
1736
  "MD": {
1737
  "accuracy": 1.0,
1738
- "count": 588
1739
  },
1740
  "MB": {
1741
  "accuracy": 1.0,
1742
- "count": 268
1743
  },
1744
  "ME": {
1745
  "accuracy": 1.0,
1746
- "count": 60
1747
  },
1748
  "UB": {
1749
  "accuracy": 1.0,
1750
- "count": 447
1751
  },
1752
  "UD": {
1753
  "accuracy": 1.0,
1754
- "count": 37
1755
  }
1756
  }
1757
  },
1758
  "sub_B3": {
1759
  "full_accuracy": 1.0,
1760
- "n_examples": 50,
1761
  "per_subtask": {
1762
  "MD": {
1763
  "accuracy": 1.0,
1764
- "count": 150
1765
  },
1766
  "MB": {
1767
  "accuracy": 1.0,
1768
- "count": 50
1769
  },
1770
  "UB": {
1771
  "accuracy": 1.0,
1772
- "count": 107
1773
  },
1774
  "UD": {
1775
  "accuracy": 1.0,
1776
- "count": 43
1777
  }
1778
  }
1779
  },
1780
  "sub_B4": {
1781
  "full_accuracy": 1.0,
1782
- "n_examples": 50,
1783
  "per_subtask": {
1784
  "MD": {
1785
  "accuracy": 1.0,
1786
- "count": 100
1787
  },
1788
  "MB": {
1789
  "accuracy": 1.0,
1790
- "count": 50
1791
  },
1792
  "UB": {
1793
  "accuracy": 1.0,
1794
- "count": 114
1795
  },
1796
  "UD": {
1797
  "accuracy": 1.0,
1798
- "count": 86
1799
  }
1800
  }
1801
  },
1802
  "sub_B5": {
1803
  "full_accuracy": 1.0,
1804
- "n_examples": 50,
1805
  "per_subtask": {
1806
  "MD": {
1807
  "accuracy": 1.0,
1808
- "count": 50
1809
  },
1810
  "MB": {
1811
  "accuracy": 1.0,
1812
- "count": 50
1813
  },
1814
  "UB": {
1815
  "accuracy": 1.0,
1816
- "count": 153
1817
  },
1818
  "UD": {
1819
  "accuracy": 1.0,
1820
- "count": 97
1821
  }
1822
  }
1823
  }
1824
  },
1825
  "summary": {
1826
- "overall_accuracy": 0.9914285714285714,
1827
- "total_examples": 1400,
1828
  "n_splits": 22
1829
  }
1830
  }
 
315
  15600
316
  ],
317
  "loss": [
318
+ 10.58569622039795,
319
+ 7.936840534210205,
320
+ 7.2051615715026855,
321
+ 6.292751789093018,
322
+ 5.380630016326904,
323
+ 4.158393383026123,
324
+ 2.91589093208313,
325
+ 2.2648446559906006,
326
+ 1.9414230585098267,
327
+ 1.8107651472091675,
328
+ 1.800335168838501,
329
+ 1.7123852968215942,
330
+ 1.710773229598999,
331
+ 1.6536633968353271,
332
+ 1.5563157796859741,
333
+ 1.4610322713851929,
334
+ 1.153622031211853,
335
+ 0.8404688835144043,
336
+ 0.6217839121818542,
337
+ 0.5448878407478333,
338
+ 0.4241843819618225,
339
+ 0.34047311544418335,
340
+ 0.2867976725101471,
341
+ 0.2169194221496582,
342
+ 0.13546958565711975,
343
+ 0.12438952922821045,
344
+ 0.11186210811138153,
345
+ 0.0944901555776596,
346
+ 0.1039363369345665,
347
+ 0.08501303195953369,
348
+ 0.11829841136932373,
349
+ 0.04217684641480446,
350
+ 0.09959305822849274,
351
+ 0.030621467158198357,
352
+ 0.047111596912145615,
353
+ 0.06351909786462784,
354
+ 0.050098877400159836,
355
+ 0.03927459940314293,
356
+ 0.0296621173620224,
357
+ 0.05172724649310112,
358
+ 0.025398079305887222,
359
+ 0.0191025547683239,
360
+ 0.02552829682826996,
361
+ 0.014332198537886143,
362
+ 0.020101871341466904,
363
+ 0.016612058505415916,
364
+ 0.02385997399687767,
365
+ 0.008157567121088505,
366
+ 0.026038693264126778,
367
+ 0.01184095162898302,
368
+ 0.02082441933453083,
369
+ 0.038108620792627335,
370
+ 0.031920164823532104,
371
+ 0.023155227303504944,
372
+ 0.029825836420059204,
373
+ 0.009044607169926167,
374
+ 0.027161160483956337,
375
+ 0.007930578663945198,
376
+ 0.021433621644973755,
377
+ 0.005527331493794918,
378
+ 0.007499660365283489,
379
+ 0.017420727759599686,
380
+ 0.014103187248110771,
381
+ 0.011276716366410255,
382
+ 0.012983249500393867,
383
+ 0.030153032392263412,
384
+ 0.008853526785969734,
385
+ 0.03762682154774666,
386
+ 0.01117522269487381,
387
+ 0.023195743560791016,
388
+ 0.013249148614704609,
389
+ 0.014523004181683064,
390
+ 0.0055778538808226585,
391
+ 0.020841525867581367,
392
+ 0.014729819260537624,
393
+ 0.013070695102214813,
394
+ 0.00855342485010624,
395
+ 0.011591176502406597,
396
+ 0.002833534497767687,
397
+ 0.025547100231051445,
398
+ 0.012554551474750042,
399
+ 0.009271000511944294,
400
+ 0.01295425184071064,
401
+ 0.010510939173400402,
402
+ 0.007462501060217619,
403
+ 0.014900215901434422,
404
+ 0.014310033991932869,
405
+ 0.008596707135438919,
406
+ 0.001958919456228614,
407
+ 0.0046486821956932545,
408
+ 0.018444955348968506,
409
+ 0.008556962944567204,
410
+ 0.004552661441266537,
411
+ 0.027067942544817924,
412
+ 0.00958697684109211,
413
+ 0.009366557002067566,
414
+ 0.004811967723071575,
415
+ 0.013344655744731426,
416
+ 0.008742153644561768,
417
+ 0.008907586336135864,
418
+ 0.007083355449140072,
419
+ 0.015731360763311386,
420
+ 0.00568253081291914,
421
+ 0.001037583569996059,
422
+ 0.0031601584050804377,
423
+ 0.0036910015624016523,
424
+ 0.006643303204327822,
425
+ 0.020221156999468803,
426
+ 0.011893088929355145,
427
+ 0.005519048310816288,
428
+ 0.023880789056420326,
429
+ 0.01644163765013218,
430
+ 0.026987669989466667,
431
+ 0.0011081791017204523,
432
+ 0.0033056442625820637,
433
+ 0.000858158920891583,
434
+ 0.009378315880894661,
435
+ 0.005004131700843573,
436
+ 0.002976806601509452,
437
+ 0.007938246242702007,
438
+ 0.00408149091526866,
439
+ 0.010652265511453152,
440
+ 0.006073917727917433,
441
+ 0.018400464206933975,
442
+ 0.0039037999231368303,
443
+ 0.01426150742918253,
444
+ 0.003052217187359929,
445
+ 0.016827749088406563,
446
+ 0.0037939571775496006,
447
+ 0.000831973273307085,
448
+ 0.002118675271049142,
449
+ 0.004233058542013168,
450
+ 0.0016138197388499975,
451
+ 0.010185384191572666,
452
+ 0.0007464668597094715,
453
+ 0.00029250283841975033,
454
+ 0.01994812674820423,
455
+ 0.0005010344320908189,
456
+ 0.006236571352928877,
457
+ 0.002717872615903616,
458
+ 0.002377332653850317,
459
+ 0.007453155238181353,
460
+ 0.0005609996733255684,
461
+ 0.0011483365669846535,
462
+ 0.00160598277579993,
463
+ 0.0012431765208020806,
464
+ 0.000852460041642189,
465
+ 0.0023972985800355673,
466
+ 0.0035680646542459726,
467
+ 0.004298224579542875,
468
+ 0.0015388120664283633,
469
+ 0.0022699700202792883,
470
+ 0.0015617648605257273,
471
+ 0.0017532998463138938,
472
+ 0.0030310507863759995,
473
+ 0.002370015950873494,
474
+ 0.0003038027498405427,
475
+ 0.0020328103564679623,
476
+ 0.0005833978648297489,
477
+ 0.0011084805009886622,
478
+ 0.0037475605495274067,
479
+ 0.001984222559258342,
480
+ 0.0002572322264313698,
481
+ 0.012740428559482098,
482
+ 0.013420491479337215,
483
+ 0.0002775133471004665,
484
+ 0.0007172977202571929,
485
+ 0.0003056035202462226,
486
+ 0.006681203842163086,
487
+ 0.0008299726760014892,
488
+ 0.002304975176230073,
489
+ 0.004181632772088051,
490
+ 0.00016523328667972237,
491
+ 0.00445162458345294,
492
+ 0.0003404470335226506,
493
+ 0.00019811275706160814,
494
+ 0.00017772385035641491,
495
+ 0.00016012518608476967,
496
+ 0.0027634704019874334,
497
+ 0.00020777643658220768,
498
+ 0.014718319289386272,
499
+ 0.000357446086127311,
500
+ 0.002793132560327649,
501
+ 0.001239171950146556,
502
+ 0.0007129187579266727,
503
+ 0.0010272933868691325,
504
+ 0.00018213230941910297,
505
+ 0.000530033721588552,
506
+ 0.0005569524364545941,
507
+ 0.003903051372617483,
508
+ 0.0002563659509178251,
509
+ 0.00018147526134271175,
510
+ 0.0007504919194616377,
511
+ 0.0001638331450521946,
512
+ 0.0011349172564223409,
513
+ 0.0035133049823343754,
514
+ 0.0009539271704852581,
515
+ 0.002135826274752617,
516
+ 0.0004690276109613478,
517
+ 0.005972139071673155,
518
+ 0.00011733471183106303,
519
+ 0.001328730140812695,
520
+ 0.0010840508621186018,
521
+ 0.0002192206884501502,
522
+ 0.00018484889005776495,
523
+ 0.0002260169858345762,
524
+ 0.0002009521413128823,
525
+ 0.00014640075096394867,
526
+ 0.005435463972389698,
527
+ 0.00014929195458535105,
528
+ 0.00013214684440754354,
529
+ 0.003676820080727339,
530
+ 0.00011442940012784675,
531
+ 0.000258677318925038,
532
+ 0.0004960866062901914,
533
+ 0.0001674027444096282,
534
+ 8.12130092526786e-05,
535
+ 0.0003417516709305346,
536
+ 6.978169403737411e-05,
537
+ 8.650257223052904e-05,
538
+ 0.0002306181559106335,
539
+ 0.00016351799422409385,
540
+ 0.009652246721088886,
541
+ 0.0026225128676742315,
542
+ 0.00016337752458639443,
543
+ 8.328648254973814e-05,
544
+ 7.744420872768387e-05,
545
+ 9.118799061980098e-05,
546
+ 6.463653699029237e-05,
547
+ 7.599593664053828e-05,
548
+ 8.013672049855813e-05,
549
+ 7.97944376245141e-05,
550
+ 0.00010155878408113495,
551
+ 6.792811473133042e-05,
552
+ 6.025990660418756e-05,
553
+ 7.2588307375554e-05,
554
+ 6.611739081563428e-05,
555
+ 0.0003789363254327327,
556
+ 0.0001938583591254428,
557
+ 6.89834268996492e-05,
558
+ 0.00017879356164485216,
559
+ 5.964725278317928e-05,
560
+ 0.00018611035193316638,
561
+ 6.1028571508359164e-05,
562
+ 6.326786387944594e-05,
563
+ 5.722946298192255e-05,
564
+ 6.754681089660153e-05,
565
+ 5.179006984690204e-05,
566
+ 6.45291293039918e-05,
567
+ 6.92441753926687e-05,
568
+ 5.522249921341427e-05,
569
+ 6.423047307180241e-05,
570
+ 5.416608109953813e-05,
571
+ 5.762785440310836e-05,
572
+ 0.0005954610533080995,
573
+ 0.00022168297437019646,
574
+ 7.994456973392516e-05,
575
+ 5.1874576456611976e-05,
576
+ 8.403260289924219e-05,
577
+ 6.857109838165343e-05,
578
+ 5.5658929341007024e-05,
579
+ 4.719209027825855e-05,
580
+ 5.400797090260312e-05,
581
+ 6.980347825447097e-05,
582
+ 6.258647044887766e-05,
583
+ 5.260522084427066e-05,
584
+ 5.064627112005837e-05,
585
+ 4.657195677282289e-05,
586
+ 4.758452996611595e-05,
587
+ 5.012748806620948e-05,
588
+ 4.798350346391089e-05,
589
+ 4.9020447477232665e-05,
590
+ 4.789638114743866e-05,
591
+ 4.8645579227013513e-05,
592
+ 8.214709669118747e-05,
593
+ 6.254074833123013e-05,
594
+ 4.200910188956186e-05,
595
+ 4.811974213225767e-05,
596
+ 4.6095901780063286e-05,
597
+ 4.615750003722496e-05,
598
+ 4.747844286612235e-05,
599
+ 5.03574192407541e-05,
600
+ 4.847695890930481e-05,
601
+ 4.824926145374775e-05,
602
+ 4.547606658888981e-05,
603
+ 6.05795175943058e-05,
604
+ 4.456207170733251e-05,
605
+ 0.00015212806465569884,
606
+ 4.549638106254861e-05,
607
+ 5.563519516726956e-05,
608
+ 4.517229172051884e-05,
609
+ 5.8811048802454025e-05,
610
+ 5.0130507588619366e-05,
611
+ 5.230798706179485e-05,
612
+ 4.3880845623789355e-05,
613
+ 4.618477032636292e-05,
614
+ 4.3583720980677754e-05,
615
+ 5.8016888942802325e-05,
616
+ 4.766209167428315e-05,
617
+ 5.783405140391551e-05,
618
+ 0.002386096864938736,
619
+ 4.821366383112036e-05,
620
+ 4.6168104745447636e-05,
621
+ 4.6418874262599275e-05,
622
+ 4.2866351577686146e-05,
623
+ 4.370551687316038e-05,
624
+ 4.045507375849411e-05,
625
+ 4.6614575694547966e-05,
626
+ 5.645145211019553e-05,
627
+ 4.3131029087817296e-05,
628
+ 4.016207094537094e-05,
629
+ 0.0004487961414270103
630
  ],
631
  "base_loss": [
632
+ 10.58569622039795,
633
+ 7.936840534210205,
634
+ 7.2051615715026855,
635
+ 6.292751789093018,
636
+ 5.380630016326904,
637
+ 4.158393383026123,
638
+ 2.91589093208313,
639
+ 2.2648446559906006,
640
+ 1.9414230585098267,
641
+ 1.8107651472091675,
642
+ 1.800335168838501,
643
+ 1.7123852968215942,
644
+ 1.710773229598999,
645
+ 1.6536633968353271,
646
+ 1.5563157796859741,
647
+ 1.4610322713851929,
648
+ 1.153622031211853,
649
+ 0.8404688835144043,
650
+ 0.6217839121818542,
651
+ 0.5448878407478333,
652
+ 0.4241843819618225,
653
+ 0.34047311544418335,
654
+ 0.2867976725101471,
655
+ 0.2169194221496582,
656
+ 0.13546958565711975,
657
+ 0.12438952922821045,
658
+ 0.11186210811138153,
659
+ 0.0944901555776596,
660
+ 0.1039363369345665,
661
+ 0.08501303195953369,
662
+ 0.11829841136932373,
663
+ 0.04217684641480446,
664
+ 0.09959305822849274,
665
+ 0.030621467158198357,
666
+ 0.047111596912145615,
667
+ 0.06351909786462784,
668
+ 0.050098877400159836,
669
+ 0.03927459940314293,
670
+ 0.0296621173620224,
671
+ 0.05172724649310112,
672
+ 0.025398079305887222,
673
+ 0.0191025547683239,
674
+ 0.02552829682826996,
675
+ 0.014332198537886143,
676
+ 0.020101871341466904,
677
+ 0.016612058505415916,
678
+ 0.02385997399687767,
679
+ 0.008157567121088505,
680
+ 0.026038693264126778,
681
+ 0.01184095162898302,
682
+ 0.02082441933453083,
683
+ 0.038108620792627335,
684
+ 0.031920164823532104,
685
+ 0.023155227303504944,
686
+ 0.029825836420059204,
687
+ 0.009044607169926167,
688
+ 0.027161160483956337,
689
+ 0.007930578663945198,
690
+ 0.021433621644973755,
691
+ 0.005527331493794918,
692
+ 0.007499660365283489,
693
+ 0.017420727759599686,
694
+ 0.014103187248110771,
695
+ 0.011276716366410255,
696
+ 0.012983249500393867,
697
+ 0.030153032392263412,
698
+ 0.008853526785969734,
699
+ 0.03762682154774666,
700
+ 0.01117522269487381,
701
+ 0.023195743560791016,
702
+ 0.013249148614704609,
703
+ 0.014523004181683064,
704
+ 0.0055778538808226585,
705
+ 0.020841525867581367,
706
+ 0.014729819260537624,
707
+ 0.013070695102214813,
708
+ 0.00855342485010624,
709
+ 0.011591176502406597,
710
+ 0.002833534497767687,
711
+ 0.025547100231051445,
712
+ 0.012554551474750042,
713
+ 0.009271000511944294,
714
+ 0.01295425184071064,
715
+ 0.010510939173400402,
716
+ 0.007462501060217619,
717
+ 0.014900215901434422,
718
+ 0.014310033991932869,
719
+ 0.008596707135438919,
720
+ 0.001958919456228614,
721
+ 0.0046486821956932545,
722
+ 0.018444955348968506,
723
+ 0.008556962944567204,
724
+ 0.004552661441266537,
725
+ 0.027067942544817924,
726
+ 0.00958697684109211,
727
+ 0.009366557002067566,
728
+ 0.004811967723071575,
729
+ 0.013344655744731426,
730
+ 0.008742153644561768,
731
+ 0.008907586336135864,
732
+ 0.007083355449140072,
733
+ 0.015731360763311386,
734
+ 0.00568253081291914,
735
+ 0.001037583569996059,
736
+ 0.0031601584050804377,
737
+ 0.0036910015624016523,
738
+ 0.006643303204327822,
739
+ 0.020221156999468803,
740
+ 0.011893088929355145,
741
+ 0.005519048310816288,
742
+ 0.023880789056420326,
743
+ 0.01644163765013218,
744
+ 0.026987669989466667,
745
+ 0.0011081791017204523,
746
+ 0.0033056442625820637,
747
+ 0.000858158920891583,
748
+ 0.009378315880894661,
749
+ 0.005004131700843573,
750
+ 0.002976806601509452,
751
+ 0.007938246242702007,
752
+ 0.00408149091526866,
753
+ 0.010652265511453152,
754
+ 0.006073917727917433,
755
+ 0.018400464206933975,
756
+ 0.0039037999231368303,
757
+ 0.01426150742918253,
758
+ 0.003052217187359929,
759
+ 0.016827749088406563,
760
+ 0.0037939571775496006,
761
+ 0.000831973273307085,
762
+ 0.002118675271049142,
763
+ 0.004233058542013168,
764
+ 0.0016138197388499975,
765
+ 0.010185384191572666,
766
+ 0.0007464668597094715,
767
+ 0.00029250283841975033,
768
+ 0.01994812674820423,
769
+ 0.0005010344320908189,
770
+ 0.006236571352928877,
771
+ 0.002717872615903616,
772
+ 0.002377332653850317,
773
+ 0.007453155238181353,
774
+ 0.0005609996733255684,
775
+ 0.0011483365669846535,
776
+ 0.00160598277579993,
777
+ 0.0012431765208020806,
778
+ 0.000852460041642189,
779
+ 0.0023972985800355673,
780
+ 0.0035680646542459726,
781
+ 0.004298224579542875,
782
+ 0.0015388120664283633,
783
+ 0.0022699700202792883,
784
+ 0.0015617648605257273,
785
+ 0.0017532998463138938,
786
+ 0.0030310507863759995,
787
+ 0.002370015950873494,
788
+ 0.0003038027498405427,
789
+ 0.0020328103564679623,
790
+ 0.0005833978648297489,
791
+ 0.0011084805009886622,
792
+ 0.0037475605495274067,
793
+ 0.001984222559258342,
794
+ 0.0002572322264313698,
795
+ 0.012740428559482098,
796
+ 0.013420491479337215,
797
+ 0.0002775133471004665,
798
+ 0.0007172977202571929,
799
+ 0.0003056035202462226,
800
+ 0.006681203842163086,
801
+ 0.0008299726760014892,
802
+ 0.002304975176230073,
803
+ 0.004181632772088051,
804
+ 0.00016523328667972237,
805
+ 0.00445162458345294,
806
+ 0.0003404470335226506,
807
+ 0.00019811275706160814,
808
+ 0.00017772385035641491,
809
+ 0.00016012518608476967,
810
+ 0.0027634704019874334,
811
+ 0.00020777643658220768,
812
+ 0.014718319289386272,
813
+ 0.000357446086127311,
814
+ 0.002793132560327649,
815
+ 0.001239171950146556,
816
+ 0.0007129187579266727,
817
+ 0.0010272933868691325,
818
+ 0.00018213230941910297,
819
+ 0.000530033721588552,
820
+ 0.0005569524364545941,
821
+ 0.003903051372617483,
822
+ 0.0002563659509178251,
823
+ 0.00018147526134271175,
824
+ 0.0007504919194616377,
825
+ 0.0001638331450521946,
826
+ 0.0011349172564223409,
827
+ 0.0035133049823343754,
828
+ 0.0009539271704852581,
829
+ 0.002135826274752617,
830
+ 0.0004690276109613478,
831
+ 0.005972139071673155,
832
+ 0.00011733471183106303,
833
+ 0.001328730140812695,
834
+ 0.0010840508621186018,
835
+ 0.0002192206884501502,
836
+ 0.00018484889005776495,
837
+ 0.0002260169858345762,
838
+ 0.0002009521413128823,
839
+ 0.00014640075096394867,
840
+ 0.005435463972389698,
841
+ 0.00014929195458535105,
842
+ 0.00013214684440754354,
843
+ 0.003676820080727339,
844
+ 0.00011442940012784675,
845
+ 0.000258677318925038,
846
+ 0.0004960866062901914,
847
+ 0.0001674027444096282,
848
+ 8.12130092526786e-05,
849
+ 0.0003417516709305346,
850
+ 6.978169403737411e-05,
851
+ 8.650257223052904e-05,
852
+ 0.0002306181559106335,
853
+ 0.00016351799422409385,
854
+ 0.009652246721088886,
855
+ 0.0026225128676742315,
856
+ 0.00016337752458639443,
857
+ 8.328648254973814e-05,
858
+ 7.744420872768387e-05,
859
+ 9.118799061980098e-05,
860
+ 6.463653699029237e-05,
861
+ 7.599593664053828e-05,
862
+ 8.013672049855813e-05,
863
+ 7.97944376245141e-05,
864
+ 0.00010155878408113495,
865
+ 6.792811473133042e-05,
866
+ 6.025990660418756e-05,
867
+ 7.2588307375554e-05,
868
+ 6.611739081563428e-05,
869
+ 0.0003789363254327327,
870
+ 0.0001938583591254428,
871
+ 6.89834268996492e-05,
872
+ 0.00017879356164485216,
873
+ 5.964725278317928e-05,
874
+ 0.00018611035193316638,
875
+ 6.1028571508359164e-05,
876
+ 6.326786387944594e-05,
877
+ 5.722946298192255e-05,
878
+ 6.754681089660153e-05,
879
+ 5.179006984690204e-05,
880
+ 6.45291293039918e-05,
881
+ 6.92441753926687e-05,
882
+ 5.522249921341427e-05,
883
+ 6.423047307180241e-05,
884
+ 5.416608109953813e-05,
885
+ 5.762785440310836e-05,
886
+ 0.0005954610533080995,
887
+ 0.00022168297437019646,
888
+ 7.994456973392516e-05,
889
+ 5.1874576456611976e-05,
890
+ 8.403260289924219e-05,
891
+ 6.857109838165343e-05,
892
+ 5.5658929341007024e-05,
893
+ 4.719209027825855e-05,
894
+ 5.400797090260312e-05,
895
+ 6.980347825447097e-05,
896
+ 6.258647044887766e-05,
897
+ 5.260522084427066e-05,
898
+ 5.064627112005837e-05,
899
+ 4.657195677282289e-05,
900
+ 4.758452996611595e-05,
901
+ 5.012748806620948e-05,
902
+ 4.798350346391089e-05,
903
+ 4.9020447477232665e-05,
904
+ 4.789638114743866e-05,
905
+ 4.8645579227013513e-05,
906
+ 8.214709669118747e-05,
907
+ 6.254074833123013e-05,
908
+ 4.200910188956186e-05,
909
+ 4.811974213225767e-05,
910
+ 4.6095901780063286e-05,
911
+ 4.615750003722496e-05,
912
+ 4.747844286612235e-05,
913
+ 5.03574192407541e-05,
914
+ 4.847695890930481e-05,
915
+ 4.824926145374775e-05,
916
+ 4.547606658888981e-05,
917
+ 6.05795175943058e-05,
918
+ 4.456207170733251e-05,
919
+ 0.00015212806465569884,
920
+ 4.549638106254861e-05,
921
+ 5.563519516726956e-05,
922
+ 4.517229172051884e-05,
923
+ 5.8811048802454025e-05,
924
+ 5.0130507588619366e-05,
925
+ 5.230798706179485e-05,
926
+ 4.3880845623789355e-05,
927
+ 4.618477032636292e-05,
928
+ 4.3583720980677754e-05,
929
+ 5.8016888942802325e-05,
930
+ 4.766209167428315e-05,
931
+ 5.783405140391551e-05,
932
+ 0.002386096864938736,
933
+ 4.821366383112036e-05,
934
+ 4.6168104745447636e-05,
935
+ 4.6418874262599275e-05,
936
+ 4.2866351577686146e-05,
937
+ 4.370551687316038e-05,
938
+ 4.045507375849411e-05,
939
+ 4.6614575694547966e-05,
940
+ 5.645145211019553e-05,
941
+ 4.3131029087817296e-05,
942
+ 4.016207094537094e-05,
943
+ 0.0004487961414270103
944
  ],
945
  "lr": [
946
+ 4.188034188034189e-06,
947
+ 8.461538461538462e-06,
948
+ 1.2735042735042738e-05,
949
+ 1.700854700854701e-05,
950
+ 2.1282051282051282e-05,
951
+ 2.5555555555555557e-05,
952
+ 2.9829059829059833e-05,
953
+ 3.4102564102564105e-05,
954
+ 3.837606837606838e-05,
955
+ 3.999958796323024e-05,
956
+ 3.999718697278298e-05,
957
+ 3.9992642503923525e-05,
958
+ 3.998595504376894e-05,
959
+ 3.997712530914136e-05,
960
+ 3.996615424649119e-05,
961
+ 3.995304303179564e-05,
962
+ 3.993779307043264e-05,
963
+ 3.992040599703026e-05,
964
+ 3.990088367529147e-05,
965
+ 3.9879228197794335e-05,
966
+ 3.9855441885767774e-05,
967
+ 3.982952728884272e-05,
968
+ 3.98014871847788e-05,
969
+ 3.977132457916666e-05,
970
+ 3.9739042705105735e-05,
971
+ 3.970464502285772e-05,
972
+ 3.9668135219475686e-05,
973
+ 3.962951720840881e-05,
974
+ 3.9588795129082965e-05,
975
+ 3.9545973346457e-05,
976
+ 3.950105645055483e-05,
977
+ 3.9454049255973464e-05,
978
+ 3.9404956801366954e-05,
979
+ 3.9353784348906246e-05,
980
+ 3.930053738371519e-05,
981
+ 3.924522161328258e-05,
982
+ 3.9187842966850365e-05,
983
+ 3.912840759477808e-05,
984
+ 3.9066921867883654e-05,
985
+ 3.900339237676047e-05,
986
+ 3.893782593107095e-05,
987
+ 3.8870229558816636e-05,
988
+ 3.880061050558488e-05,
989
+ 3.8728976233772144e-05,
990
+ 3.865533442178418e-05,
991
+ 3.857969296321293e-05,
992
+ 3.8502059965990464e-05,
993
+ 3.842244375151989e-05,
994
+ 3.8340852853783366e-05,
995
+ 3.825729601842738e-05,
996
+ 3.817178220182529e-05,
997
+ 3.8084320570117316e-05,
998
+ 3.799492049822804e-05,
999
+ 3.7903591568861476e-05,
1000
+ 3.7810343571473957e-05,
1001
+ 3.771518650122478e-05,
1002
+ 3.7618130557904865e-05,
1003
+ 3.75191861448434e-05,
1004
+ 3.7418363867792776e-05,
1005
+ 3.7315674533791735e-05,
1006
+ 3.7211129150006987e-05,
1007
+ 3.7104738922553335e-05,
1008
+ 3.6996515255292544e-05,
1009
+ 3.688646974861095e-05,
1010
+ 3.677461419817603e-05,
1011
+ 3.666096059367202e-05,
1012
+ 3.654552111751479e-05,
1013
+ 3.642830814354598e-05,
1014
+ 3.6309334235706705e-05,
1015
+ 3.618861214669079e-05,
1016
+ 3.606615481657787e-05,
1017
+ 3.594197537144631e-05,
1018
+ 3.5816087121966275e-05,
1019
+ 3.5688503561972944e-05,
1020
+ 3.5559238367020136e-05,
1021
+ 3.542830539291442e-05,
1022
+ 3.529571867422996e-05,
1023
+ 3.516149242280414e-05,
1024
+ 3.502564102621419e-05,
1025
+ 3.488817904623504e-05,
1026
+ 3.474912121727844e-05,
1027
+ 3.4608482444813575e-05,
1028
+ 3.446627780376941e-05,
1029
+ 3.432252253691874e-05,
1030
+ 3.4177232053244447e-05,
1031
+ 3.403042192628771e-05,
1032
+ 3.388210789247879e-05,
1033
+ 3.373230584945018e-05,
1034
+ 3.358103185433261e-05,
1035
+ 3.342830212203387e-05,
1036
+ 3.3274133023500764e-05,
1037
+ 3.311854108396431e-05,
1038
+ 3.2961542981168435e-05,
1039
+ 3.280315554358229e-05,
1040
+ 3.264339574859642e-05,
1041
+ 3.248228072070302e-05,
1042
+ 3.2319827729660285e-05,
1043
+ 3.2156054188641376e-05,
1044
+ 3.1990977652367865e-05,
1045
+ 3.1824615815228095e-05,
1046
+ 3.165698650938051e-05,
1047
+ 3.148810770284225e-05,
1048
+ 3.131799749756318e-05,
1049
+ 3.114667412748557e-05,
1050
+ 3.0974155956589594e-05,
1051
+ 3.0800461476924934e-05,
1052
+ 3.062560930662865e-05,
1053
+ 3.0449618187929455e-05,
1054
+ 3.027250698513884e-05,
1055
+ 3.0094294682628963e-05,
1056
+ 2.9915000382797757e-05,
1057
+ 2.973464330402138e-05,
1058
+ 2.9553242778594188e-05,
1059
+ 2.9370818250656534e-05,
1060
+ 2.918738927411057e-05,
1061
+ 2.900297551052429e-05,
1062
+ 2.8817596727024034e-05,
1063
+ 2.863127279417565e-05,
1064
+ 2.8444023683854588e-05,
1065
+ 2.8255869467105155e-05,
1066
+ 2.806683031198911e-05,
1067
+ 2.787692648142386e-05,
1068
+ 2.76861783310105e-05,
1069
+ 2.7494606306851945e-05,
1070
+ 2.730223094336128e-05,
1071
+ 2.7109072861060756e-05,
1072
+ 2.6915152764371454e-05,
1073
+ 2.6720491439394013e-05,
1074
+ 2.6525109751680584e-05,
1075
+ 2.6329028643998294e-05,
1076
+ 2.613226913408438e-05,
1077
+ 2.593485231239333e-05,
1078
+ 2.5736799339836247e-05,
1079
+ 2.5538131445512574e-05,
1080
+ 2.5338869924434622e-05,
1081
+ 2.5139036135244954e-05,
1082
+ 2.493865149792698e-05,
1083
+ 2.4737737491508967e-05,
1084
+ 2.4536315651761724e-05,
1085
+ 2.433440756889019e-05,
1086
+ 2.4132034885219254e-05,
1087
+ 2.3929219292873862e-05,
1088
+ 2.3725982531453916e-05,
1089
+ 2.3522346385703997e-05,
1090
+ 2.3318332683178304e-05,
1091
+ 2.3113963291900965e-05,
1092
+ 2.290926011802202e-05,
1093
+ 2.2704245103469335e-05,
1094
+ 2.2498940223596676e-05,
1095
+ 2.229336748482816e-05,
1096
+ 2.2087548922299454e-05,
1097
+ 2.1881506597495808e-05,
1098
+ 2.1675262595887345e-05,
1099
+ 2.1468839024561703e-05,
1100
+ 2.1262258009854425e-05,
1101
+ 2.1055541694977263e-05,
1102
+ 2.0848712237644633e-05,
1103
+ 2.0641791807698616e-05,
1104
+ 2.0434802584732507e-05,
1105
+ 2.022776675571351e-05,
1106
+ 2.0020706512604437e-05,
1107
+ 1.9813644049985047e-05,
1108
+ 1.9606601562673005e-05,
1109
+ 1.939960124334484e-05,
1110
+ 1.919266528015713e-05,
1111
+ 1.8985815854368193e-05,
1112
+ 1.8779075137960494e-05,
1113
+ 1.8572465291264022e-05,
1114
+ 1.8366008460581004e-05,
1115
+ 1.815972677581202e-05,
1116
+ 1.7953642348083943e-05,
1117
+ 1.774777726737984e-05,
1118
+ 1.7542153600171213e-05,
1119
+ 1.7336793387052705e-05,
1120
+ 1.7131718640379524e-05,
1121
+ 1.6926951341908083e-05,
1122
+ 1.672251344043969e-05,
1123
+ 1.651842684946793e-05,
1124
+ 1.6314713444829764e-05,
1125
+ 1.611139506236069e-05,
1126
+ 1.5908493495554186e-05,
1127
+ 1.5706030493225642e-05,
1128
+ 1.5504027757181196e-05,
1129
+ 1.5302506939891503e-05,
1130
+ 1.5101489642170806e-05,
1131
+ 1.4900997410861609e-05,
1132
+ 1.4701051736525065e-05,
1133
+ 1.4501674051137457e-05,
1134
+ 1.4302885725792858e-05,
1135
+ 1.4104708068412472e-05,
1136
+ 1.3907162321460597e-05,
1137
+ 1.371026965966768e-05,
1138
+ 1.3514051187760642e-05,
1139
+ 1.3318527938200655e-05,
1140
+ 1.3123720868928707e-05,
1141
+ 1.292965086111913e-05,
1142
+ 1.2736338716941403e-05,
1143
+ 1.2543805157330346e-05,
1144
+ 1.2352070819765072e-05,
1145
+ 1.2161156256056894e-05,
1146
+ 1.1971081930146396e-05,
1147
+ 1.1781868215909893e-05,
1148
+ 1.1593535394975626e-05,
1149
+ 1.1406103654549742e-05,
1150
+ 1.1219593085252485e-05,
1151
+ 1.103402367896469e-05,
1152
+ 1.0849415326684864e-05,
1153
+ 1.0665787816397109e-05,
1154
+ 1.0483160830950054e-05,
1155
+ 1.030155394594707e-05,
1156
+ 1.0120986627648004e-05,
1157
+ 9.941478230882551e-06,
1158
+ 9.763047996975699e-06,
1159
+ 9.585715051685247e-06,
1160
+ 9.409498403151716e-06,
1161
+ 9.234416939860887e-06,
1162
+ 9.060489428619184e-06,
1163
+ 8.887734512542072e-06,
1164
+ 8.716170709055716e-06,
1165
+ 8.545816407912107e-06,
1166
+ 8.376689869217913e-06,
1167
+ 8.208809221477138e-06,
1168
+ 8.042192459648035e-06,
1169
+ 7.87685744321416e-06,
1170
+ 7.712821894270087e-06,
1171
+ 7.550103395621737e-06,
1172
+ 7.388719388901766e-06,
1173
+ 7.228687172699982e-06,
1174
+ 7.070023900709091e-06,
1175
+ 6.912746579886067e-06,
1176
+ 6.756872068629164e-06,
1177
+ 6.6024170749708814e-06,
1178
+ 6.4493981547870566e-06,
1179
+ 6.297831710022247e-06,
1180
+ 6.147733986931628e-06,
1181
+ 5.999121074339575e-06,
1182
+ 5.8520089019151116e-06,
1183
+ 5.706413238464439e-06,
1184
+ 5.562349690240656e-06,
1185
+ 5.419833699270991e-06,
1186
+ 5.278880541701565e-06,
1187
+ 5.139505326159946e-06,
1188
+ 5.0017229921356696e-06,
1189
+ 4.8655483083789115e-06,
1190
+ 4.730995871317427e-06,
1191
+ 4.598080103491973e-06,
1192
+ 4.4668152520103745e-06,
1193
+ 4.33721538702039e-06,
1194
+ 4.209294400201533e-06,
1195
+ 4.083066003276077e-06,
1196
+ 3.958543726539259e-06,
1197
+ 3.835740917409019e-06,
1198
+ 3.714670738995274e-06,
1199
+ 3.595346168689e-06,
1200
+ 3.477779996771207e-06,
1201
+ 3.361984825041915e-06,
1202
+ 3.2479730654694342e-06,
1203
+ 3.135756938859904e-06,
1204
+ 3.0253484735473714e-06,
1205
+ 2.9167595041044805e-06,
1206
+ 2.8100016700739385e-06,
1207
+ 2.705086414720892e-06,
1208
+ 2.6020249838063037e-06,
1209
+ 2.500828424381587e-06,
1210
+ 2.4015075836044345e-06,
1211
+ 2.3040731075761303e-06,
1212
+ 2.208535440200428e-06,
1213
+ 2.11490482206405e-06,
1214
+ 2.0231912893390504e-06,
1215
+ 1.9334046727069866e-06,
1216
+ 1.8455545963052347e-06,
1217
+ 1.7596504766953605e-06,
1218
+ 1.6757015218537743e-06,
1219
+ 1.5937167301847356e-06,
1220
+ 1.513704889555827e-06,
1221
+ 1.4356745763559742e-06,
1222
+ 1.3596341545761815e-06,
1223
+ 1.2855917749129866e-06,
1224
+ 1.2135553738947903e-06,
1225
+ 1.1435326730311536e-06,
1226
+ 1.075531177985145e-06,
1227
+ 1.0095581777688036e-06,
1228
+ 9.456207439618459e-07,
1229
+ 8.837257299536639e-07,
1230
+ 8.238797702087242e-07,
1231
+ 7.660892795554131e-07,
1232
+ 7.103604524984597e-07,
1233
+ 6.566992625549318e-07,
1234
+ 6.051114616139542e-07,
1235
+ 5.556025793201581e-07,
1236
+ 5.081779224809702e-07,
1237
+ 4.628425744977927e-07,
1238
+ 4.196013948210942e-07,
1239
+ 3.784590184295511e-07,
1240
+ 3.394198553332162e-07,
1241
+ 3.024880901008209e-07,
1242
+ 2.676676814112367e-07,
1243
+ 2.3496236162914076e-07,
1244
+ 2.0437563640495206e-07,
1245
+ 1.7591078429906706e-07,
1246
+ 1.495708564304299e-07,
1247
+ 1.2535867614948739e-07,
1248
+ 1.0327683873555317e-07,
1249
+ 8.332771111863037e-08,
1250
+ 6.551343162569667e-08,
1251
+ 4.983590975150554e-08,
1252
+ 3.629682595389428e-08,
1253
+ 2.4897631473679297e-08,
1254
+ 1.563954817907831e-08,
1255
+ 8.523568434752882e-09,
1256
+ 3.550454995435715e-09,
1257
+ 7.207409241671848e-10
1258
  ],
1259
  "eval_step": [
1260
  781,
 
1301
  20
1302
  ],
1303
  "eval_accuracy": [
1304
+ 0.0011111111111111111,
1305
+ 0.8111111111111111,
1306
+ 0.8677777777777778,
1307
+ 0.8966666666666666,
1308
+ 0.8977777777777778,
1309
+ 0.9522222222222222,
1310
+ 0.9411111111111111,
1311
+ 0.9588888888888889,
1312
+ 0.9788888888888889,
1313
+ 0.9755555555555555,
1314
+ 0.9944444444444445,
1315
+ 0.9888888888888889,
1316
+ 0.9966666666666667,
1317
+ 0.9811111111111112,
1318
+ 0.9955555555555555,
1319
+ 0.9977777777777778,
1320
+ 1.0,
1321
+ 1.0,
1322
+ 1.0,
1323
+ 1.0
1324
  ]
1325
  },
1326
+ "final_accuracy": 1.0,
1327
  "sft_eval": {
1328
  "config": {
1329
  "ops": "add_sub",
1330
  "K": null,
1331
  "mode": "sft",
1332
  "n_digits": 6,
1333
+ "n_per_split": 100
1334
  },
1335
  "splits": {
1336
  "add_S0": {
1337
  "full_accuracy": 1.0,
1338
+ "n_examples": 100,
1339
  "per_subtask": {
1340
  "SA": {
1341
  "accuracy": 1.0,
1342
+ "count": 605
1343
  },
1344
  "SS": {
1345
  "accuracy": 1.0,
1346
+ "count": 95
1347
  }
1348
  }
1349
  },
1350
  "add_S1": {
1351
  "full_accuracy": 1.0,
1352
+ "n_examples": 100,
1353
  "per_subtask": {
1354
  "SA": {
1355
  "accuracy": 1.0,
1356
+ "count": 204
1357
  },
1358
  "SC": {
1359
  "accuracy": 1.0,
1360
+ "count": 169
1361
  },
1362
  "SS": {
1363
  "accuracy": 1.0,
1364
+ "count": 31
1365
  },
1366
  "UC": {
1367
  "accuracy": 1.0,
1368
+ "count": 296
1369
  }
1370
  }
1371
  },
1372
  "add_S2": {
1373
  "full_accuracy": 1.0,
1374
+ "n_examples": 100,
1375
  "per_subtask": {
1376
  "SA": {
1377
  "accuracy": 1.0,
1378
+ "count": 163
1379
  },
1380
  "SC": {
1381
  "accuracy": 1.0,
1382
+ "count": 130
1383
  },
1384
  "SS": {
1385
  "accuracy": 1.0,
1386
+ "count": 87
1387
  },
1388
  "UC": {
1389
  "accuracy": 1.0,
1390
+ "count": 203
1391
  },
1392
  "US": {
1393
  "accuracy": 1.0,
1394
+ "count": 117
1395
  }
1396
  }
1397
  },
1398
  "add_S3": {
1399
  "full_accuracy": 1.0,
1400
+ "n_examples": 100,
1401
  "per_subtask": {
1402
  "SA": {
1403
  "accuracy": 1.0,
1404
+ "count": 121
1405
  },
1406
  "SC": {
1407
  "accuracy": 1.0,
1408
+ "count": 121
1409
  },
1410
  "SS": {
1411
  "accuracy": 1.0,
1412
+ "count": 49
1413
  },
1414
  "UC": {
1415
  "accuracy": 1.0,
1416
+ "count": 186
1417
  },
1418
  "US": {
1419
  "accuracy": 1.0,
1420
+ "count": 223
1421
  }
1422
  }
1423
  },
1424
  "add_S4": {
1425
  "full_accuracy": 1.0,
1426
+ "n_examples": 100,
1427
  "per_subtask": {
1428
  "SA": {
1429
  "accuracy": 1.0,
1430
+ "count": 104
1431
  },
1432
  "SC": {
1433
  "accuracy": 1.0,
1434
+ "count": 106
1435
  },
1436
  "SS": {
1437
  "accuracy": 1.0,
1438
+ "count": 23
1439
  },
1440
  "UC": {
1441
  "accuracy": 1.0,
1442
+ "count": 160
1443
  },
1444
  "US": {
1445
  "accuracy": 1.0,
1446
+ "count": 307
1447
  }
1448
  }
1449
  },
1450
  "add_S5": {
1451
+ "full_accuracy": 1.0,
1452
+ "n_examples": 100,
1453
  "per_subtask": {
1454
  "SA": {
1455
  "accuracy": 1.0,
1456
+ "count": 100
1457
  },
1458
  "SC": {
1459
  "accuracy": 1.0,
1460
+ "count": 100
1461
  },
1462
  "UC": {
1463
+ "accuracy": 1.0,
1464
+ "count": 100
1465
  },
1466
  "US": {
1467
+ "accuracy": 1.0,
1468
+ "count": 400
1469
  }
1470
  }
1471
  },
1472
  "add_S6": {
1473
+ "full_accuracy": 1.0,
1474
+ "n_examples": 100,
1475
  "per_subtask": {
1476
  "SC": {
1477
  "accuracy": 1.0,
1478
+ "count": 100
1479
  },
1480
  "UC": {
1481
+ "accuracy": 1.0,
1482
+ "count": 100
1483
  },
1484
  "US": {
1485
+ "accuracy": 1.0,
1486
+ "count": 500
1487
  }
1488
  }
1489
  },
 
1493
  "per_subtask": {
1494
  "SA": {
1495
  "accuracy": 1.0,
1496
+ "count": 447
1497
  },
1498
  "SC": {
1499
  "accuracy": 1.0,
1500
+ "count": 320
1501
  },
1502
  "SS": {
1503
  "accuracy": 1.0,
1504
+ "count": 56
1505
  },
1506
  "UC": {
1507
  "accuracy": 1.0,
1508
+ "count": 529
1509
  },
1510
  "US": {
1511
  "accuracy": 1.0,
1512
+ "count": 48
1513
  }
1514
  }
1515
  },
1516
  "add_C3": {
1517
  "full_accuracy": 1.0,
1518
+ "n_examples": 100,
1519
  "per_subtask": {
1520
  "SA": {
1521
  "accuracy": 1.0,
1522
+ "count": 300
1523
  },
1524
  "SC": {
1525
  "accuracy": 1.0,
1526
+ "count": 100
1527
  },
1528
  "UC": {
1529
  "accuracy": 1.0,
1530
+ "count": 193
1531
  },
1532
  "US": {
1533
  "accuracy": 1.0,
1534
+ "count": 107
1535
  }
1536
  }
1537
  },
1538
  "add_C4": {
1539
  "full_accuracy": 1.0,
1540
+ "n_examples": 100,
1541
  "per_subtask": {
1542
  "SA": {
1543
  "accuracy": 1.0,
1544
+ "count": 200
1545
  },
1546
  "SC": {
1547
  "accuracy": 1.0,
1548
+ "count": 100
1549
  },
1550
  "UC": {
1551
  "accuracy": 1.0,
1552
+ "count": 256
1553
  },
1554
  "US": {
1555
  "accuracy": 1.0,
1556
+ "count": 144
1557
  }
1558
  }
1559
  },
1560
  "add_C5": {
1561
+ "full_accuracy": 1.0,
1562
+ "n_examples": 100,
1563
  "per_subtask": {
1564
  "SA": {
1565
  "accuracy": 1.0,
1566
+ "count": 100
1567
  },
1568
  "SC": {
1569
  "accuracy": 1.0,
1570
+ "count": 100
1571
  },
1572
  "UC": {
1573
+ "accuracy": 1.0,
1574
+ "count": 306
1575
  },
1576
  "US": {
1577
  "accuracy": 1.0,
1578
+ "count": 194
1579
  }
1580
  }
1581
  },
1582
  "add_C6": {
1583
  "full_accuracy": 1.0,
1584
+ "n_examples": 100,
1585
  "per_subtask": {
1586
  "SC": {
1587
  "accuracy": 1.0,
1588
+ "count": 100
1589
  },
1590
  "UC": {
1591
  "accuracy": 1.0,
1592
+ "count": 366
1593
  },
1594
  "US": {
1595
  "accuracy": 1.0,
1596
+ "count": 234
1597
  }
1598
  }
1599
  },
1600
  "sub_M0": {
1601
  "full_accuracy": 1.0,
1602
+ "n_examples": 100,
1603
  "per_subtask": {
1604
  "MD": {
1605
  "accuracy": 1.0,
1606
+ "count": 601
1607
  },
1608
  "ME": {
1609
  "accuracy": 1.0,
1610
+ "count": 99
1611
  }
1612
  }
1613
  },
1614
  "sub_M1": {
1615
  "full_accuracy": 1.0,
1616
+ "n_examples": 100,
1617
  "per_subtask": {
1618
  "MD": {
1619
  "accuracy": 1.0,
1620
+ "count": 279
1621
  },
1622
  "MB": {
1623
  "accuracy": 1.0,
1624
+ "count": 145
1625
  },
1626
  "ME": {
1627
  "accuracy": 1.0,
1628
+ "count": 24
1629
  },
1630
  "UB": {
1631
  "accuracy": 1.0,
1632
+ "count": 252
1633
  }
1634
  }
1635
  },
1636
  "sub_M2": {
1637
  "full_accuracy": 1.0,
1638
+ "n_examples": 100,
1639
  "per_subtask": {
1640
  "MD": {
1641
  "accuracy": 1.0,
1642
+ "count": 213
1643
  },
1644
  "MB": {
1645
  "accuracy": 1.0,
1646
+ "count": 113
1647
  },
1648
  "ME": {
1649
  "accuracy": 1.0,
1650
+ "count": 85
1651
  },
1652
  "UB": {
1653
  "accuracy": 1.0,
1654
+ "count": 181
1655
  },
1656
  "UD": {
1657
  "accuracy": 1.0,
1658
+ "count": 108
1659
  }
1660
  }
1661
  },
1662
  "sub_M3": {
1663
  "full_accuracy": 1.0,
1664
+ "n_examples": 100,
1665
  "per_subtask": {
1666
  "MD": {
1667
  "accuracy": 1.0,
1668
+ "count": 179
1669
  },
1670
  "MB": {
1671
  "accuracy": 1.0,
1672
+ "count": 103
1673
  },
1674
  "ME": {
1675
  "accuracy": 1.0,
1676
+ "count": 56
1677
  },
1678
  "UB": {
1679
  "accuracy": 1.0,
1680
+ "count": 149
1681
  },
1682
  "UD": {
1683
  "accuracy": 1.0,
1684
+ "count": 213
1685
  }
1686
  }
1687
  },
1688
  "sub_M4": {
1689
+ "full_accuracy": 1.0,
1690
+ "n_examples": 100,
1691
  "per_subtask": {
1692
  "MD": {
1693
  "accuracy": 1.0,
1694
+ "count": 200
1695
  },
1696
  "MB": {
1697
  "accuracy": 1.0,
1698
+ "count": 100
1699
  },
1700
  "UB": {
1701
+ "accuracy": 1.0,
1702
+ "count": 100
1703
  },
1704
  "UD": {
1705
  "accuracy": 1.0,
1706
+ "count": 300
1707
  }
1708
  }
1709
  },
1710
  "sub_M5": {
1711
+ "full_accuracy": 1.0,
1712
+ "n_examples": 100,
1713
  "per_subtask": {
1714
  "MD": {
1715
  "accuracy": 1.0,
1716
+ "count": 100
1717
  },
1718
  "MB": {
1719
  "accuracy": 1.0,
1720
+ "count": 100
1721
  },
1722
  "UB": {
1723
  "accuracy": 1.0,
1724
+ "count": 100
1725
  },
1726
  "UD": {
1727
+ "accuracy": 1.0,
1728
+ "count": 400
1729
  }
1730
  }
1731
  },
 
1735
  "per_subtask": {
1736
  "MD": {
1737
  "accuracy": 1.0,
1738
+ "count": 600
1739
  },
1740
  "MB": {
1741
  "accuracy": 1.0,
1742
+ "count": 267
1743
  },
1744
  "ME": {
1745
  "accuracy": 1.0,
1746
+ "count": 53
1747
  },
1748
  "UB": {
1749
  "accuracy": 1.0,
1750
+ "count": 439
1751
  },
1752
  "UD": {
1753
  "accuracy": 1.0,
1754
+ "count": 41
1755
  }
1756
  }
1757
  },
1758
  "sub_B3": {
1759
  "full_accuracy": 1.0,
1760
+ "n_examples": 100,
1761
  "per_subtask": {
1762
  "MD": {
1763
  "accuracy": 1.0,
1764
+ "count": 300
1765
  },
1766
  "MB": {
1767
  "accuracy": 1.0,
1768
+ "count": 100
1769
  },
1770
  "UB": {
1771
  "accuracy": 1.0,
1772
+ "count": 197
1773
  },
1774
  "UD": {
1775
  "accuracy": 1.0,
1776
+ "count": 103
1777
  }
1778
  }
1779
  },
1780
  "sub_B4": {
1781
  "full_accuracy": 1.0,
1782
+ "n_examples": 100,
1783
  "per_subtask": {
1784
  "MD": {
1785
  "accuracy": 1.0,
1786
+ "count": 200
1787
  },
1788
  "MB": {
1789
  "accuracy": 1.0,
1790
+ "count": 100
1791
  },
1792
  "UB": {
1793
  "accuracy": 1.0,
1794
+ "count": 247
1795
  },
1796
  "UD": {
1797
  "accuracy": 1.0,
1798
+ "count": 153
1799
  }
1800
  }
1801
  },
1802
  "sub_B5": {
1803
  "full_accuracy": 1.0,
1804
+ "n_examples": 100,
1805
  "per_subtask": {
1806
  "MD": {
1807
  "accuracy": 1.0,
1808
+ "count": 100
1809
  },
1810
  "MB": {
1811
  "accuracy": 1.0,
1812
+ "count": 100
1813
  },
1814
  "UB": {
1815
  "accuracy": 1.0,
1816
+ "count": 298
1817
  },
1818
  "UD": {
1819
  "accuracy": 1.0,
1820
+ "count": 202
1821
  }
1822
  }
1823
  }
1824
  },
1825
  "summary": {
1826
+ "overall_accuracy": 1.0,
1827
+ "total_examples": 2400,
1828
  "n_splits": 22
1829
  }
1830
  }
add_sub_baseline_50K/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16ab43d64dbfe964a136e7d0325cf995347d43a5180344dddcc116e4d205eef2
3
  size 650266922
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1d271990035c114502ee453f5a961d61cc2caa9996b45559249726b42c2cb6a
3
  size 650266922
add_sub_baseline_50K/train_config.json CHANGED
@@ -1,35 +1,84 @@
1
  {
2
- "mode": "baseline",
3
- "ops": "add_sub",
4
- "n_digits": 6,
5
- "n_layer": 2,
6
- "n_head": 3,
7
- "n_embd": 510,
8
- "abs_vocab": 0,
9
  "K": 4,
 
 
 
 
 
 
10
  "alpha_info_gain": 10.0,
11
  "alpha_abs": 0.1,
12
  "alpha_soft_zipf": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "batch_size": 64,
 
14
  "num_epochs": 20,
15
- "dataset_size": 50000,
16
- "lr": 8e-05,
 
 
 
17
  "output_dir": "ckpt/sweep/add_sub_baseline_50K",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "device": "cuda",
19
  "push_to_hub": true,
20
  "no_wandb": false,
21
  "n_params": 162490082,
22
  "run_name": "add_sub_baseline_50K",
23
- "git_commit": "800625019270114adcda289bbd550c4f1109a514",
24
- "timestamp": "2026-04-12T01:58:11.886382+00:00",
25
  "tokenizer": "Qwen/Qwen3-0.6B",
26
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
27
  "dataset_config": "add_sub_6digit",
28
  "model_repo": "thoughtworks/arithmetic-sorl",
29
  "trainer_version": "sft",
30
- "wandb_run_id": "p7yzb1qj",
31
- "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/p7yzb1qj",
32
- "final_accuracy": 0.9914285714285714,
33
- "sft_accuracy": 0.9914285714285714,
34
  "eval_method": "ArithmeticEvaluator"
35
  }
 
1
  {
2
+ "num_rollouts": 4,
 
 
 
 
 
 
3
  "K": 4,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
  "alpha_info_gain": 10.0,
11
  "alpha_abs": 0.1,
12
  "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 4e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 468,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
  "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
  "num_epochs": 20,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 781,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
  "output_dir": "ckpt/sweep/add_sub_baseline_50K",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 0,
65
+ "dataset_size": 50000,
66
+ "mode": "baseline",
67
  "device": "cuda",
68
  "push_to_hub": true,
69
  "no_wandb": false,
70
  "n_params": 162490082,
71
  "run_name": "add_sub_baseline_50K",
72
+ "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
+ "timestamp": "2026-04-12T17:38:15.462759+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "sft",
79
+ "wandb_run_id": "2phtgprv",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/2phtgprv",
81
+ "final_accuracy": 1.0,
82
+ "sft_accuracy": 1.0,
83
  "eval_method": "ArithmeticEvaluator"
84
  }