amirali1985 commited on
Commit
ff911bb
·
verified ·
1 Parent(s): 6c58fe8

Upload add_sub_sorl_v1_abs50_50K

Browse files
add_sub_sorl_v1_abs50_50K/metrics.json CHANGED
@@ -153,1069 +153,1069 @@
153
  7788
154
  ],
155
  "loss": [
156
- 14.632190704345703,
157
- 9.926382064819336,
158
- 6.634365081787109,
159
- 4.12091064453125,
160
- 2.3600664138793945,
161
- 2.5965561866760254,
162
- 2.2668251991271973,
163
- 2.4945991039276123,
164
- 1.87929368019104,
165
- 1.770228624343872,
166
- 1.4810309410095215,
167
- 1.115319013595581,
168
- 1.2603116035461426,
169
- 1.118563175201416,
170
- 0.7639645338058472,
171
- 0.5783902406692505,
172
- -0.2102416753768921,
173
- -1.0637309551239014,
174
- -2.982658863067627,
175
- -5.1272735595703125,
176
- -6.820440769195557,
177
- -8.708982467651367,
178
- -9.34764575958252,
179
- -9.479778289794922,
180
- -9.714715003967285,
181
- -9.847146987915039,
182
- -10.891656875610352,
183
- -10.567233085632324,
184
- -10.487039566040039,
185
- -10.561330795288086,
186
- -11.278342247009277,
187
- -11.53654956817627,
188
- -11.39672565460205,
189
- -12.42699146270752,
190
- -12.283132553100586,
191
- -12.111838340759277,
192
- -12.558882713317871,
193
- -12.664121627807617,
194
- -11.877008438110352,
195
- -13.080121040344238,
196
- -12.89445972442627,
197
- -13.657825469970703,
198
- -13.041352272033691,
199
- -12.739060401916504,
200
- -13.011455535888672,
201
- -13.518850326538086,
202
- -13.68348217010498,
203
- -13.794760704040527,
204
- -12.652961730957031,
205
- -13.282352447509766,
206
- -12.864669799804688,
207
- -12.425497055053711,
208
- -13.485628128051758,
209
- -13.69277286529541,
210
- -13.639493942260742,
211
- -13.366686820983887,
212
- -13.520065307617188,
213
- -13.455315589904785,
214
- -13.581616401672363,
215
- -13.953703880310059,
216
- -13.787495613098145,
217
- -13.657554626464844,
218
- -13.825128555297852,
219
- -13.480307579040527,
220
- -12.544189453125,
221
- -11.929388999938965,
222
- -10.827520370483398,
223
- -8.075313568115234,
224
- -6.391434192657471,
225
- -5.907987594604492,
226
- -4.740344047546387,
227
- -5.025309085845947,
228
- -4.2770867347717285,
229
- -4.229391574859619,
230
- -3.865729570388794,
231
- -4.085846424102783,
232
- -3.6969783306121826,
233
- -3.788874387741089,
234
- -3.3547849655151367,
235
- -3.1135661602020264,
236
- -2.8369789123535156,
237
- -2.6105422973632812,
238
- -3.406431198120117,
239
- -2.858949661254883,
240
- -2.1879334449768066,
241
- -2.340407133102417,
242
- -1.6988248825073242,
243
- -1.7582203149795532,
244
- -2.0069541931152344,
245
- -2.3115696907043457,
246
- -1.6048082113265991,
247
- -3.4057414531707764,
248
- -1.5541704893112183,
249
- -1.587241768836975,
250
- -1.341800570487976,
251
- -1.3259270191192627,
252
- -1.2093465328216553,
253
- -1.1008269786834717,
254
- -0.8763017654418945,
255
- -1.4300707578659058,
256
- -1.191908597946167,
257
- -1.174930453300476,
258
- -1.2244458198547363,
259
- -0.4350616931915283,
260
- -1.2295445203781128,
261
- -0.4361853301525116,
262
- -0.5482154488563538,
263
- -1.4029738903045654,
264
- -0.9040775895118713,
265
- -0.5001223087310791,
266
- -0.3795071244239807,
267
- -0.7955602407455444,
268
- -0.8618952631950378,
269
- -0.6030458807945251,
270
- -0.8650072813034058,
271
- -0.632997989654541,
272
- -0.8306871652603149,
273
- -0.6155402660369873,
274
- -0.47851070761680603,
275
- -0.4381093382835388,
276
- -0.7446098327636719,
277
- -0.604732871055603,
278
- -0.586620569229126,
279
- -0.5812572240829468,
280
- -0.5657588839530945,
281
- -0.40892261266708374,
282
- -0.34598445892333984,
283
- -0.37908679246902466,
284
- -0.6343741416931152,
285
- -0.48928260803222656,
286
- -0.4414480924606323,
287
- -0.5256525874137878,
288
- -0.5549529790878296,
289
- -0.5228503942489624,
290
- -0.43323570489883423,
291
- -0.35606762766838074,
292
- -0.3718273639678955,
293
- -0.3244103789329529,
294
- -0.31916630268096924,
295
- -0.36536484956741333,
296
- -0.5733921527862549,
297
- -0.36300128698349,
298
- -0.16675259172916412,
299
- -0.3644050359725952,
300
- -0.3043482303619385,
301
- -0.24840393662452698,
302
- -0.19103999435901642,
303
- -0.1850191354751587,
304
- -0.3407224416732788,
305
- -0.25857996940612793
306
  ],
307
  "base_loss": [
308
- 9.482434272766113,
309
- 7.117786884307861,
310
- 6.046176433563232,
311
- 4.258971214294434,
312
- 2.312073230743408,
313
- 1.998913049697876,
314
- 1.8657360076904297,
315
- 1.941304087638855,
316
- 1.8812999725341797,
317
- 1.8264588117599487,
318
- 1.827309250831604,
319
- 1.793294906616211,
320
- 1.8274389505386353,
321
- 1.8220479488372803,
322
- 1.8286134004592896,
323
- 1.7788950204849243,
324
- 1.8032491207122803,
325
- 1.7887276411056519,
326
- 2.006676197052002,
327
- 1.9466220140457153,
328
- 1.938077688217163,
329
- 2.0098798274993896,
330
- 1.9562937021255493,
331
- 1.7426707744598389,
332
- 1.8475419282913208,
333
- 1.8222671747207642,
334
- 1.7838033437728882,
335
- 1.7463515996932983,
336
- 1.697568655014038,
337
- 1.7143851518630981,
338
- 1.7419077157974243,
339
- 1.7188345193862915,
340
- 1.658122181892395,
341
- 1.7801390886306763,
342
- 1.6910394430160522,
343
- 1.6635606288909912,
344
- 1.7399853467941284,
345
- 1.6727386713027954,
346
- 1.5781800746917725,
347
- 1.6815193891525269,
348
- 1.6525733470916748,
349
- 1.708761215209961,
350
- 1.6608943939208984,
351
- 1.6103297472000122,
352
- 1.623249888420105,
353
- 1.6843823194503784,
354
- 1.7061597108840942,
355
- 1.6853832006454468,
356
- 1.5944339036941528,
357
- 1.6551153659820557,
358
- 1.5582751035690308,
359
- 1.5246549844741821,
360
- 1.6163057088851929,
361
- 1.616803526878357,
362
- 1.6256102323532104,
363
- 1.5938681364059448,
364
- 1.6049398183822632,
365
- 1.5970059633255005,
366
- 1.624014973640442,
367
- 1.625469446182251,
368
- 1.5937610864639282,
369
- 1.5731550455093384,
370
- 1.5809533596038818,
371
- 1.5393472909927368,
372
- 1.435717225074768,
373
- 1.369104266166687,
374
- 1.2582679986953735,
375
- 0.9665323495864868,
376
- 0.7821284532546997,
377
- 0.7074981331825256,
378
- 0.5917485356330872,
379
- 0.6081733107566833,
380
- 0.5498444437980652,
381
- 0.5120163559913635,
382
- 0.4534907341003418,
383
- 0.4987371265888214,
384
- 0.4344933032989502,
385
- 0.4616098999977112,
386
- 0.3905290961265564,
387
- 0.36903122067451477,
388
- 0.3468772768974304,
389
- 0.3053329885005951,
390
- 0.39532873034477234,
391
- 0.33305686712265015,
392
- 0.2736288011074066,
393
- 0.27657410502433777,
394
- 0.21591803431510925,
395
- 0.20601941645145416,
396
- 0.23737052083015442,
397
- 0.27516016364097595,
398
- 0.20490236580371857,
399
- 0.4041613042354584,
400
- 0.1861131340265274,
401
- 0.19129757583141327,
402
- 0.1886499673128128,
403
- 0.18159404397010803,
404
- 0.14922840893268585,
405
- 0.13664014637470245,
406
- 0.10598627477884293,
407
- 0.1828942596912384,
408
- 0.1439986377954483,
409
- 0.14368221163749695,
410
- 0.14707587659358978,
411
- 0.07975024729967117,
412
- 0.14613859355449677,
413
- 0.06020959094166756,
414
- 0.07016318291425705,
415
- 0.1634124517440796,
416
- 0.1087348610162735,
417
- 0.082696333527565,
418
- 0.07368756830692291,
419
- 0.10071809589862823,
420
- 0.10751090198755264,
421
- 0.07716800272464752,
422
- 0.10648422688245773,
423
- 0.07751994580030441,
424
- 0.09940018504858017,
425
- 0.07651485502719879,
426
- 0.05863216146826744,
427
- 0.05505487695336342,
428
- 0.09117519855499268,
429
- 0.07521039247512817,
430
- 0.07184553146362305,
431
- 0.07176653295755386,
432
- 0.0699726790189743,
433
- 0.052560459822416306,
434
- 0.044058240950107574,
435
- 0.048487093299627304,
436
- 0.08156915754079819,
437
- 0.061087582260370255,
438
- 0.06119650602340698,
439
- 0.06394444406032562,
440
- 0.07004927843809128,
441
- 0.0659731775522232,
442
- 0.05378962680697441,
443
- 0.0477081723511219,
444
- 0.04793006181716919,
445
- 0.04166543111205101,
446
- 0.04095941409468651,
447
- 0.04708099737763405,
448
- 0.07109672576189041,
449
- 0.045931022614240646,
450
- 0.024872872978448868,
451
- 0.04558694362640381,
452
- 0.03894834965467453,
453
- 0.03414854779839516,
454
- 0.027124684303998947,
455
- 0.027090419083833694,
456
- 0.044619057327508926,
457
- 0.033715397119522095
458
  ],
459
  "info_loss": [
460
- -0.12326622009277344,
461
- -0.18317556381225586,
462
- -0.21192121505737305,
463
- -0.1614665985107422,
464
- -0.09911632537841797,
465
- -0.03676164150238037,
466
- -0.05518639087677002,
467
- -0.03937363624572754,
468
- -0.09468269348144531,
469
- -0.09933149814605713,
470
- -0.127937912940979,
471
- -0.1626497507095337,
472
- -0.15077292919158936,
473
- -0.16379308700561523,
474
- -0.19910144805908203,
475
- -0.20818078517913818,
476
- -0.2778512239456177,
477
- -0.351151704788208,
478
- -0.5528711080551147,
479
- -0.7527784109115601,
480
- -0.9174032211303711,
481
- -1.1097712516784668,
482
- -1.1687355041503906,
483
- -1.156583309173584,
484
- -1.1916966438293457,
485
- -1.1981340646743774,
486
- -1.2985475063323975,
487
- -1.2606419324874878,
488
- -1.246760606765747,
489
- -1.2560533285140991,
490
- -1.331277847290039,
491
- -1.3520653247833252,
492
- -1.3316826820373535,
493
- -1.4475840330123901,
494
- -1.423694372177124,
495
- -1.401245355606079,
496
- -1.4543802738189697,
497
- -1.456540822982788,
498
- -1.3692126274108887,
499
- -1.4987409114837646,
500
- -1.4760082960128784,
501
- -1.5583420991897583,
502
- -1.4900978803634644,
503
- -1.4563288688659668,
504
- -1.4812694787979126,
505
- -1.5390629768371582,
506
- -1.5584094524383545,
507
- -1.56727933883667,
508
- -1.4423127174377441,
509
- -1.5100535154342651,
510
- -1.4591412544250488,
511
- -1.4114735126495361,
512
- -1.5255085229873657,
513
- -1.547141432762146,
514
- -1.54146146774292,
515
- -1.510321855545044,
516
- -1.5257158279418945,
517
- -1.5199824571609497,
518
- -1.5334408283233643,
519
- -1.5722233057022095,
520
- -1.5500802993774414,
521
- -1.5357627868652344,
522
- -1.552409052848816,
523
- -1.5149730443954468,
524
- -1.40906822681427,
525
- -1.341101884841919,
526
- -1.2200331687927246,
527
- -0.9157827496528625,
528
- -0.7277940511703491,
529
- -0.6731217503547668,
530
- -0.5456058979034424,
531
- -0.5737265944480896,
532
- -0.49359437823295593,
533
- -0.48544979095458984,
534
- -0.4402371644973755,
535
- -0.467752605676651,
536
- -0.42211639881134033,
537
- -0.434103399515152,
538
- -0.38376203179359436,
539
- -0.3583239018917084,
540
- -0.3261827230453491,
541
- -0.29966747760772705,
542
- -0.38840124011039734,
543
- -0.32828328013420105,
544
- -0.25409969687461853,
545
- -0.2704025208950043,
546
- -0.1985175758600235,
547
- -0.20338541269302368,
548
- -0.23216019570827484,
549
- -0.26735031604766846,
550
- -0.18867464363574982,
551
- -0.38847246766090393,
552
- -0.182339146733284,
553
- -0.18704256415367126,
554
- -0.1616264432668686,
555
- -0.15969634056091309,
556
- -0.14530818164348602,
557
- -0.13175378739833832,
558
- -0.10473348200321198,
559
- -0.171867236495018,
560
- -0.1426738202571869,
561
- -0.13931553065776825,
562
- -0.14417681097984314,
563
- -0.05767478048801422,
564
- -0.14531880617141724,
565
- -0.05537816509604454,
566
- -0.06925474852323532,
567
- -0.162682443857193,
568
- -0.10662886500358582,
569
- -0.06461586803197861,
570
- -0.05278106778860092,
571
- -0.09546349942684174,
572
- -0.10324031114578247,
573
- -0.07626307010650635,
574
- -0.10292515903711319,
575
- -0.07728055119514465,
576
- -0.09876345098018646,
577
- -0.07488290220499039,
578
- -0.05830881744623184,
579
- -0.05468850955367088,
580
- -0.09090279042720795,
581
- -0.07504025101661682,
582
- -0.07148993760347366,
583
- -0.07156261801719666,
584
- -0.0697956308722496,
585
- -0.05241704359650612,
586
- -0.04384676739573479,
587
- -0.048265501856803894,
588
- -0.07867752760648727,
589
- -0.06092023849487305,
590
- -0.0558161623775959,
591
- -0.06381774693727493,
592
- -0.06975483149290085,
593
- -0.06581415981054306,
594
- -0.053555652499198914,
595
- -0.0474986732006073,
596
- -0.04777195677161217,
597
- -0.04158833995461464,
598
- -0.04086068645119667,
599
- -0.046969663351774216,
600
- -0.07098674029111862,
601
- -0.04583430290222168,
602
- -0.02477315440773964,
603
- -0.045496866106987,
604
- -0.03886625915765762,
605
- -0.033758021891117096,
606
- -0.026911893859505653,
607
- -0.026989782229065895,
608
- -0.0445425882935524,
609
- -0.03364324942231178
610
  ],
611
  "abs_loss": [
612
- 3.8987743854522705,
613
- 3.835925340652466,
614
- 3.731152057647705,
615
- 3.3448033332824707,
616
- 2.9310805797576904,
617
- 2.8018741607666016,
618
- 2.832192897796631,
619
- 2.8128607273101807,
620
- 2.843754291534424,
621
- 2.762322187423706,
622
- 2.6791672706604004,
623
- 2.8312129974365234,
624
- 2.7969703674316406,
625
- 2.793354034423828,
626
- 2.753342628479004,
627
- 2.7262606620788574,
628
- 2.620635986328125,
629
- 2.5968563556671143,
630
- 2.31925106048584,
631
- 2.0016016960144043,
632
- 1.9219343662261963,
633
- 1.7101284265518188,
634
- 1.7175776958465576,
635
- 1.6339759826660156,
636
- 1.8100486993789673,
637
- 1.4601774215698242,
638
- 1.461230993270874,
639
- 1.3801701068878174,
640
- 1.3301787376403809,
641
- 1.3739302158355713,
642
- 1.587360143661499,
643
- 1.3912031650543213,
644
- 1.3633630275726318,
645
- 1.4589648246765137,
646
- 1.4376378059387207,
647
- 1.2283248901367188,
648
- 1.3686951398849487,
649
- 1.261014461517334,
650
- 1.1936320066452026,
651
- 1.2106618881225586,
652
- 1.1354954242706299,
653
- 1.202142357826233,
654
- 1.1697142124176025,
655
- 1.216151237487793,
656
- 0.9472143054008484,
657
- 1.0352338552474976,
658
- 1.2005023956298828,
659
- 0.9383926391601562,
660
- 0.9692818522453308,
661
- 0.8716940879821777,
662
- 0.9582070112228394,
663
- 1.061899185180664,
664
- 0.9574461579322815,
665
- 1.0999226570129395,
666
- 0.8257886171340942,
667
- 0.9105151891708374,
668
- 0.7686405181884766,
669
- 0.7620456218719482,
670
- 0.8083024621009827,
671
- 0.7366126775741577,
672
- 0.64865642786026,
673
- 0.6970773935317993,
674
- 0.5407489538192749,
675
- 0.9302119016647339,
676
- 0.6019563674926758,
677
- 0.6286913156509399,
678
- 0.6697531938552856,
679
- 0.85337895154953,
680
- 0.605627715587616,
681
- 0.6994856595993042,
682
- 0.6913561820983887,
683
- 0.7227469086647034,
684
- 0.8089389204978943,
685
- 0.679562509059906,
686
- 0.5462037324905396,
687
- 0.5866653919219971,
688
- 0.48199528455734253,
689
- 0.49292898178100586,
690
- 0.5071792602539062,
691
- 0.6359560489654541,
692
- 0.4345041811466217,
693
- 0.5750279426574707,
694
- 0.5141332149505615,
695
- 0.48265522718429565,
696
- 0.38371920585632324,
697
- 0.6727322936058044,
698
- 0.524191677570343,
699
- 0.46154582500457764,
700
- 0.38243192434310913,
701
- 0.5157103538513184,
702
- 0.48300936818122864,
703
- 0.5282471179962158,
704
- 0.47535738348960876,
705
- 0.4935920834541321,
706
- 0.4585495889186859,
707
- 0.4682696759700775,
708
- 0.3801548182964325,
709
- 0.3888419568538666,
710
- 0.314349889755249,
711
- 0.40311598777770996,
712
- 0.393927663564682,
713
- 0.3618808388710022,
714
- 0.31192538142204285,
715
- 0.29459425806999207,
716
- 0.38845521211624146,
717
- 0.3338332772254944,
718
- 0.45690685510635376,
719
- 0.2870320677757263,
720
- 0.30454182624816895,
721
- 0.39305591583251953,
722
- 0.4107697606086731,
723
- 0.2857040762901306,
724
- 0.40057629346847534,
725
- 0.2520090937614441,
726
- 0.3365999460220337,
727
- 0.3793593645095825,
728
- 0.2781757414340973,
729
- 0.23415732383728027,
730
- 0.17023888230323792,
731
- 0.236487478017807,
732
- 0.29216936230659485,
733
- 0.18099826574325562,
734
- 0.28724172711372375,
735
- 0.15562580525875092,
736
- 0.24116644263267517,
737
- 0.29714399576187134,
738
- 0.23464536666870117,
739
- 0.25069287419319153,
740
- 0.24226970970630646,
741
- 0.27992311120033264,
742
- 0.286587655544281,
743
- 0.17398861050605774,
744
- 0.28588318824768066,
745
- 0.2069043666124344,
746
- 0.18253867328166962,
747
- 0.24283462762832642,
748
- 0.26914891600608826,
749
- 0.22705870866775513,
750
- 0.1970517933368683,
751
- 0.17965485155582428,
752
- 0.2492925226688385,
753
- 0.1700528860092163,
754
- 0.16555199027061462,
755
- 0.22087526321411133,
756
- 0.22362147271633148,
757
- 0.23984941840171814,
758
- 0.23964354395866394,
759
- 0.17572689056396484,
760
- 0.18303079903125763,
761
- 0.20619693398475647
762
  ],
763
  "zipf_loss": [
764
- 5.992541313171387,
765
- 4.256757736206055,
766
- 2.3342857360839844,
767
- 1.142124891281128,
768
- 0.7460483908653259,
769
- 0.6850723028182983,
770
- 0.6697337031364441,
771
- 0.6657452583312988,
772
- 0.6604452133178711,
773
- 0.6608524918556213,
774
- 0.6651840209960938,
775
- 0.6654003262519836,
776
- 0.6609048247337341,
777
- 0.6551106572151184,
778
- 0.6510313749313354,
779
- 0.6086770296096802,
780
- 0.502957820892334,
781
- 0.39937275648117065,
782
- 0.3074512779712677,
783
- 0.25372886657714844,
784
- 0.22332003712654114,
785
- 0.20783749222755432,
786
- 0.2116575390100479,
787
- 0.17998555302619934,
788
- 0.1737050563097,
789
- 0.1659075915813446,
790
- 0.16389304399490356,
791
- 0.15481875836849213,
792
- 0.14997941255569458,
793
- 0.14742501080036163,
794
- 0.13379204273223877,
795
- 0.1261483132839203,
796
- 0.1256428062915802,
797
- 0.12281343340873718,
798
- 0.1190091073513031,
799
- 0.11422161012887955,
800
- 0.10806499421596527,
801
- 0.1024467945098877,
802
- 0.1175742894411087,
803
- 0.10470254719257355,
804
- 0.09950097650289536,
805
- 0.09661951661109924,
806
- 0.08176100254058838,
807
- 0.09228333085775375,
808
- 0.0832676887512207,
809
- 0.08387405425310135,
810
- 0.07440327852964401,
811
- 0.09880977869033813,
812
- 0.07880298793315887,
813
- 0.07589824497699738,
814
- 0.07264722883701324,
815
- 0.05839239060878754,
816
- 0.057406291365623474,
817
- 0.051846496760845184,
818
- 0.06693202257156372,
819
- 0.051612261682748795,
820
- 0.055289462208747864,
821
- 0.07129812985658646,
822
- 0.04794581979513168,
823
- 0.06939932703971863,
824
- 0.05468002334237099,
825
- 0.057209789752960205,
826
- 0.0639333575963974,
827
- 0.03705426678061485,
828
- 0.05057953670620918,
829
- 0.04965557903051376,
830
- 0.04756723344326019,
831
- 0.030643729493021965,
832
- 0.043815724551677704,
833
- 0.04578302800655365,
834
- 0.05483042076230049,
835
- 0.03150896355509758,
836
- 0.028118720278143883,
837
- 0.04513361677527428,
838
- 0.028530914336442947,
839
- 0.034275688230991364,
840
- 0.04149279743432999,
841
- 0.04125683009624481,
842
- 0.04158829152584076,
843
- 0.03704618662595749,
844
- 0.034520722925662994,
845
- 0.023296881467103958,
846
- 0.030839255079627037,
847
- 0.042560875415802,
848
- 0.04106279835104942,
849
- 0.019770771265029907,
850
- 0.01801365055143833,
851
- 0.023459654301404953,
852
- 0.03903378173708916,
853
- 0.03520215302705765,
854
- 0.02873474918305874,
855
- 0.02199714072048664,
856
- 0.03557206690311432,
857
- 0.042527176439762115,
858
- 0.03995896130800247,
859
- 0.042615462094545364,
860
- 0.05649139732122421,
861
- 0.04118660092353821,
862
- 0.0336117222905159,
863
- 0.06539583206176758,
864
- 0.05143829807639122,
865
- 0.03835456073284149,
866
- 0.03905396908521652,
867
- 0.03247641399502754,
868
- 0.038659460842609406,
869
- 0.02400336042046547,
870
- 0.028478197753429413,
871
- 0.03173477202653885,
872
- 0.023022141307592392,
873
- 0.024034500122070312,
874
- 0.03353904187679291,
875
- 0.029786277562379837,
876
- 0.02293926477432251,
877
- 0.05721593275666237,
878
- 0.02410007268190384,
879
- 0.02435165084898472,
880
- 0.02972968854010105,
881
- 0.03335817903280258,
882
- 0.0289214625954628,
883
- 0.030072137713432312,
884
- 0.044025976210832596,
885
- 0.05235940217971802,
886
- 0.027709070593118668,
887
- 0.04703980311751366,
888
- 0.03810810297727585,
889
- 0.03297300264239311,
890
- 0.024960467591881752,
891
- 0.03001183271408081,
892
- 0.04660506919026375,
893
- 0.030839864164590836,
894
- 0.02685823291540146,
895
- 0.03118157386779785,
896
- 0.043957799673080444,
897
- 0.04862763732671738,
898
- 0.03027738630771637,
899
- 0.04692744463682175,
900
- 0.031047239899635315,
901
- 0.02710168994963169,
902
- 0.028775956481695175,
903
- 0.03928530216217041,
904
- 0.04044933617115021,
905
- 0.03240543231368065,
906
- 0.039550866931676865,
907
- 0.022889159619808197,
908
- 0.023003853857517242,
909
- 0.031042777001857758,
910
- 0.02698989398777485,
911
- 0.04021558538079262,
912
- 0.04178132116794586,
913
- 0.023517422378063202
914
  ],
915
  "denoise_loss": [],
916
  "ortho_loss": [
917
- 0.6933014988899231,
918
- 0.37083250284194946,
919
- 0.16746486723423004,
920
- 0.0885070413351059,
921
- 0.06592030823230743,
922
- 0.053648799657821655,
923
- 0.04771490767598152,
924
- 0.04534199461340904,
925
- 0.0437336266040802,
926
- 0.045895759016275406,
927
- 0.05053376406431198,
928
- 0.0523027703166008,
929
- 0.05737421289086342,
930
- 0.061693500727415085,
931
- 0.06343631446361542,
932
- 0.06562791019678116,
933
- 0.06717279553413391,
934
- 0.07206566631793976,
935
- 0.07831596583127975,
936
- 0.07906866073608398,
937
- 0.08471731096506119,
938
- 0.09428071230649948,
939
- 0.10492909699678421,
940
- 0.11721401661634445,
941
- 0.12516844272613525,
942
- 0.1334349811077118,
943
- 0.14114034175872803,
944
- 0.1503930687904358,
945
- 0.15645001828670502,
946
- 0.16175302863121033,
947
- 0.1684732288122177,
948
- 0.16980968415737152,
949
- 0.1736874133348465,
950
- 0.17783427238464355,
951
- 0.18060865998268127,
952
- 0.18240122497081757,
953
- 0.1839177906513214,
954
- 0.18623745441436768,
955
- 0.1907666027545929,
956
- 0.19191065430641174,
957
- 0.19565843045711517,
958
- 0.19815172255039215,
959
- 0.20228537917137146,
960
- 0.20597098767757416,
961
- 0.2075631469488144,
962
- 0.20883804559707642,
963
- 0.2080991268157959,
964
- 0.21085670590400696,
965
- 0.21062801778316498,
966
- 0.2116369903087616,
967
- 0.2120591253042221,
968
- 0.21061359345912933,
969
- 0.20848363637924194,
970
- 0.20663155615329742,
971
- 0.2056088000535965,
972
- 0.20723474025726318,
973
- 0.2063807100057602,
974
- 0.20915858447551727,
975
- 0.20919714868068695,
976
- 0.20870265364646912,
977
- 0.2069307267665863,
978
- 0.20666652917861938,
979
- 0.2061752825975418,
980
- 0.2027379870414734,
981
- 0.20436672866344452,
982
- 0.20579716563224792,
983
- 0.20884613692760468,
984
- 0.21337348222732544,
985
- 0.21834859251976013,
986
- 0.21869564056396484,
987
- 0.2199230194091797,
988
- 0.22768796980381012,
989
- 0.23089125752449036,
990
- 0.23446932435035706,
991
- 0.23750188946723938,
992
- 0.2428743839263916,
993
- 0.2425122857093811,
994
- 0.24684441089630127,
995
- 0.24865762889385223,
996
- 0.24414007365703583,
997
- 0.2449251115322113,
998
- 0.24844960868358612,
999
- 0.2532559931278229,
1000
- 0.2549189329147339,
1001
- 0.25830599665641785,
1002
- 0.256623238325119,
1003
- 0.25953999161720276,
1004
- 0.2646258771419525,
1005
- 0.26612550020217896,
1006
- 0.26787811517715454,
1007
- 0.2701391279697418,
1008
- 0.27274689078330994,
1009
- 0.274588942527771,
1010
- 0.27538907527923584,
1011
- 0.27801740169525146,
1012
- 0.2798006534576416,
1013
- 0.27908483147621155,
1014
- 0.2781679928302765,
1015
- 0.28107044100761414,
1016
- 0.2800515592098236,
1017
- 0.28154537081718445,
1018
- 0.2851882874965668,
1019
- 0.29015690088272095,
1020
- 0.2916761636734009,
1021
- 0.29237350821495056,
1022
- 0.29165220260620117,
1023
- 0.295254647731781,
1024
- 0.29890671372413635,
1025
- 0.30145129561424255,
1026
- 0.3019912540912628,
1027
- 0.30194753408432007,
1028
- 0.3042812943458557,
1029
- 0.30524149537086487,
1030
- 0.3049549162387848,
1031
- 0.3069272041320801,
1032
- 0.30974724888801575,
1033
- 0.31134963035583496,
1034
- 0.31236183643341064,
1035
- 0.3140103220939636,
1036
- 0.3156379163265228,
1037
- 0.3186745047569275,
1038
- 0.31933704018592834,
1039
- 0.3208130896091461,
1040
- 0.3216458857059479,
1041
- 0.32485339045524597,
1042
- 0.3259000778198242,
1043
- 0.3260437250137329,
1044
- 0.32846739888191223,
1045
- 0.3293222486972809,
1046
- 0.33021095395088196,
1047
- 0.33130118250846863,
1048
- 0.33234986662864685,
1049
- 0.3325884938240051,
1050
- 0.33358755707740784,
1051
- 0.3331451416015625,
1052
- 0.33463388681411743,
1053
- 0.33540332317352295,
1054
- 0.3352293372154236,
1055
- 0.3365360498428345,
1056
- 0.33647823333740234,
1057
- 0.3373063802719116,
1058
- 0.3373261094093323,
1059
- 0.3375466763973236,
1060
- 0.33824118971824646,
1061
- 0.3386029601097107,
1062
- 0.33914104104042053,
1063
- 0.3400459289550781,
1064
- 0.3409873843193054,
1065
- 0.3414129614830017,
1066
- 0.3418258726596832
1067
  ],
1068
  "lr": [
1069
- 8.376068376068378e-06,
1070
- 1.6923076923076924e-05,
1071
- 2.5470085470085475e-05,
1072
- 3.401709401709402e-05,
1073
- 4e-05,
1074
- 4e-05,
1075
- 4e-05,
1076
- 4e-05,
1077
- 4e-05,
1078
- 4e-05,
1079
- 4e-05,
1080
- 4e-05,
1081
- 4e-05,
1082
- 4e-05,
1083
- 4e-05,
1084
- 4e-05,
1085
- 4e-05,
1086
- 4e-05,
1087
- 4e-05,
1088
- 4e-05,
1089
- 4e-05,
1090
- 4e-05,
1091
- 4e-05,
1092
- 4e-05,
1093
- 4e-05,
1094
- 4e-05,
1095
- 4e-05,
1096
- 4e-05,
1097
- 4e-05,
1098
- 4e-05,
1099
- 4e-05,
1100
- 4e-05,
1101
- 4e-05,
1102
- 4e-05,
1103
- 4e-05,
1104
- 4e-05,
1105
- 4e-05,
1106
- 4e-05,
1107
- 4e-05,
1108
- 4e-05,
1109
- 4e-05,
1110
- 4e-05,
1111
- 4e-05,
1112
- 4e-05,
1113
- 4e-05,
1114
- 4e-05,
1115
- 4e-05,
1116
- 4e-05,
1117
- 4e-05,
1118
- 4e-05,
1119
- 4e-05,
1120
- 4e-05,
1121
- 4e-05,
1122
- 4e-05,
1123
- 4e-05,
1124
- 4e-05,
1125
- 4e-05,
1126
- 4e-05,
1127
- 4e-05,
1128
- 4e-05,
1129
- 4e-05,
1130
- 4e-05,
1131
- 4e-05,
1132
- 4e-05,
1133
- 4e-05,
1134
- 4e-05,
1135
- 4e-05,
1136
- 4e-05,
1137
- 4e-05,
1138
- 4e-05,
1139
- 4e-05,
1140
- 4e-05,
1141
- 4e-05,
1142
- 4e-05,
1143
- 4e-05,
1144
- 4e-05,
1145
- 4e-05,
1146
- 4e-05,
1147
- 4e-05,
1148
- 4e-05,
1149
- 4e-05,
1150
- 4e-05,
1151
- 4e-05,
1152
- 4e-05,
1153
- 4e-05,
1154
- 4e-05,
1155
- 4e-05,
1156
- 4e-05,
1157
- 4e-05,
1158
- 4e-05,
1159
- 4e-05,
1160
- 3.993593461639863e-05,
1161
- 3.934273662008964e-05,
1162
- 3.874953862378065e-05,
1163
- 3.815634062747166e-05,
1164
- 3.7563142631162665e-05,
1165
- 3.696994463485368e-05,
1166
- 3.637674663854469e-05,
1167
- 3.5783548642235693e-05,
1168
- 3.519035064592671e-05,
1169
- 3.459715264961771e-05,
1170
- 3.400395465330873e-05,
1171
- 3.341075665699973e-05,
1172
- 3.281755866069075e-05,
1173
- 3.2224360664381764e-05,
1174
- 3.1251515950435014e-05,
1175
- 3.065831795412602e-05,
1176
- 3.0065119957817037e-05,
1177
- 2.947192196150804e-05,
1178
- 2.887872396519905e-05,
1179
- 2.8285525968890065e-05,
1180
- 2.769232797258107e-05,
1181
- 2.709912997627208e-05,
1182
- 2.6505931979963087e-05,
1183
- 2.5912733983654104e-05,
1184
- 2.5319535987345117e-05,
1185
- 2.472633799103612e-05,
1186
- 2.4133139994727132e-05,
1187
- 2.3539941998418135e-05,
1188
- 2.2946744002109148e-05,
1189
- 2.1973899288162408e-05,
1190
- 2.138070129185341e-05,
1191
- 2.0787503295544424e-05,
1192
- 2.019430529923544e-05,
1193
- 1.9601107302926443e-05,
1194
- 1.900790930661746e-05,
1195
- 1.8414711310308462e-05,
1196
- 1.7821513313999475e-05,
1197
- 1.7228315317690488e-05,
1198
- 1.663511732138149e-05,
1199
- 1.6041919325072507e-05,
1200
- 1.544872132876351e-05,
1201
- 1.4855523332454524e-05,
1202
- 1.4262325336145537e-05,
1203
- 1.366912733983654e-05,
1204
- 1.2696282625889797e-05,
1205
- 1.2103084629580812e-05,
1206
- 1.1509886633271816e-05,
1207
- 1.0916688636962829e-05,
1208
- 1.0323490640653833e-05,
1209
- 9.730292644344846e-06,
1210
- 9.137094648035861e-06,
1211
- 8.543896651726864e-06,
1212
- 7.950698655417878e-06,
1213
- 7.357500659108881e-06,
1214
- 6.764302662799895e-06,
1215
- 6.171104666490909e-06,
1216
- 5.577906670181913e-06,
1217
- 4.984708673872927e-06,
1218
- 4.391510677563931e-06
1219
  ],
1220
  "emb_lr": [],
1221
  "eval_step": [
@@ -1232,18 +1232,18 @@
1232
  ],
1233
  "eval_accuracy": [
1234
  0.0,
1235
- 0.18,
1236
- 0.74,
1237
- 0.94,
1238
- 0.99,
1239
  0.95,
1240
  0.99,
 
 
 
1241
  1.0,
1242
- 0.99,
1243
  1.0
1244
  ]
1245
  },
1246
- "final_accuracy": 0.9983333333333333,
1247
  "sft_eval": {
1248
  "config": {
1249
  "ops": "add_sub",
@@ -1254,21 +1254,21 @@
1254
  },
1255
  "splits": {
1256
  "add_S0": {
1257
- "full_accuracy": 0.98,
1258
  "n_examples": 100,
1259
  "per_subtask": {
1260
  "SA": {
1261
- "accuracy": 0.9983471074380166,
1262
  "count": 605
1263
  },
1264
  "SS": {
1265
- "accuracy": 0.9894736842105263,
1266
  "count": 95
1267
  }
1268
  }
1269
  },
1270
  "add_S1": {
1271
- "full_accuracy": 0.98,
1272
  "n_examples": 100,
1273
  "per_subtask": {
1274
  "SA": {
@@ -1284,13 +1284,13 @@
1284
  "count": 31
1285
  },
1286
  "UC": {
1287
- "accuracy": 1.0,
1288
  "count": 296
1289
  }
1290
  }
1291
  },
1292
  "add_S2": {
1293
- "full_accuracy": 0.96,
1294
  "n_examples": 100,
1295
  "per_subtask": {
1296
  "SA": {
@@ -1298,11 +1298,11 @@
1298
  "count": 163
1299
  },
1300
  "SC": {
1301
- "accuracy": 0.9769230769230769,
1302
  "count": 130
1303
  },
1304
  "SS": {
1305
- "accuracy": 0.9770114942528736,
1306
  "count": 87
1307
  },
1308
  "UC": {
@@ -1316,7 +1316,7 @@
1316
  }
1317
  },
1318
  "add_S3": {
1319
- "full_accuracy": 0.76,
1320
  "n_examples": 100,
1321
  "per_subtask": {
1322
  "SA": {
@@ -1332,7 +1332,7 @@
1332
  "count": 49
1333
  },
1334
  "UC": {
1335
- "accuracy": 0.8709677419354839,
1336
  "count": 186
1337
  },
1338
  "US": {
@@ -1342,7 +1342,7 @@
1342
  }
1343
  },
1344
  "add_S4": {
1345
- "full_accuracy": 0.55,
1346
  "n_examples": 100,
1347
  "per_subtask": {
1348
  "SA": {
@@ -1358,17 +1358,17 @@
1358
  "count": 23
1359
  },
1360
  "UC": {
1361
- "accuracy": 0.8,
1362
  "count": 160
1363
  },
1364
  "US": {
1365
- "accuracy": 0.9022801302931596,
1366
  "count": 307
1367
  }
1368
  }
1369
  },
1370
  "add_S5": {
1371
- "full_accuracy": 0.32,
1372
  "n_examples": 100,
1373
  "per_subtask": {
1374
  "SA": {
@@ -1380,17 +1380,17 @@
1380
  "count": 100
1381
  },
1382
  "UC": {
1383
- "accuracy": 0.49,
1384
  "count": 100
1385
  },
1386
  "US": {
1387
- "accuracy": 0.6975,
1388
  "count": 400
1389
  }
1390
  }
1391
  },
1392
  "add_S6": {
1393
- "full_accuracy": 0.61,
1394
  "n_examples": 100,
1395
  "per_subtask": {
1396
  "SC": {
@@ -1398,17 +1398,17 @@
1398
  "count": 100
1399
  },
1400
  "UC": {
1401
- "accuracy": 0.67,
1402
  "count": 100
1403
  },
1404
  "US": {
1405
- "accuracy": 0.782,
1406
  "count": 500
1407
  }
1408
  }
1409
  },
1410
  "add_random": {
1411
- "full_accuracy": 0.975,
1412
  "n_examples": 200,
1413
  "per_subtask": {
1414
  "SA": {
@@ -1424,7 +1424,7 @@
1424
  "count": 56
1425
  },
1426
  "UC": {
1427
- "accuracy": 0.9924385633270322,
1428
  "count": 529
1429
  },
1430
  "US": {
@@ -1434,7 +1434,7 @@
1434
  }
1435
  },
1436
  "add_C3": {
1437
- "full_accuracy": 0.83,
1438
  "n_examples": 100,
1439
  "per_subtask": {
1440
  "SA": {
@@ -1446,17 +1446,17 @@
1446
  "count": 100
1447
  },
1448
  "UC": {
1449
- "accuracy": 0.9119170984455959,
1450
  "count": 193
1451
  },
1452
  "US": {
1453
- "accuracy": 0.9626168224299065,
1454
  "count": 107
1455
  }
1456
  }
1457
  },
1458
  "add_C4": {
1459
- "full_accuracy": 0.8,
1460
  "n_examples": 100,
1461
  "per_subtask": {
1462
  "SA": {
@@ -1468,17 +1468,17 @@
1468
  "count": 100
1469
  },
1470
  "UC": {
1471
- "accuracy": 0.92578125,
1472
  "count": 256
1473
  },
1474
  "US": {
1475
- "accuracy": 0.9513888888888888,
1476
  "count": 144
1477
  }
1478
  }
1479
  },
1480
  "add_C5": {
1481
- "full_accuracy": 0.84,
1482
  "n_examples": 100,
1483
  "per_subtask": {
1484
  "SA": {
@@ -1490,17 +1490,17 @@
1490
  "count": 100
1491
  },
1492
  "UC": {
1493
- "accuracy": 0.9575163398692811,
1494
  "count": 306
1495
  },
1496
  "US": {
1497
- "accuracy": 0.9639175257731959,
1498
  "count": 194
1499
  }
1500
  }
1501
  },
1502
  "add_C6": {
1503
- "full_accuracy": 0.8,
1504
  "n_examples": 100,
1505
  "per_subtask": {
1506
  "SC": {
@@ -1508,21 +1508,21 @@
1508
  "count": 100
1509
  },
1510
  "UC": {
1511
- "accuracy": 0.9480874316939891,
1512
  "count": 366
1513
  },
1514
  "US": {
1515
- "accuracy": 0.9914529914529915,
1516
  "count": 234
1517
  }
1518
  }
1519
  },
1520
  "sub_M0": {
1521
- "full_accuracy": 0.97,
1522
  "n_examples": 100,
1523
  "per_subtask": {
1524
  "MD": {
1525
- "accuracy": 0.9950083194675541,
1526
  "count": 601
1527
  },
1528
  "ME": {
@@ -1532,7 +1532,7 @@
1532
  }
1533
  },
1534
  "sub_M1": {
1535
- "full_accuracy": 0.98,
1536
  "n_examples": 100,
1537
  "per_subtask": {
1538
  "MD": {
@@ -1540,7 +1540,7 @@
1540
  "count": 279
1541
  },
1542
  "MB": {
1543
- "accuracy": 0.993103448275862,
1544
  "count": 145
1545
  },
1546
  "ME": {
@@ -1580,7 +1580,7 @@
1580
  }
1581
  },
1582
  "sub_M3": {
1583
- "full_accuracy": 0.54,
1584
  "n_examples": 100,
1585
  "per_subtask": {
1586
  "MD": {
@@ -1596,7 +1596,7 @@
1596
  "count": 56
1597
  },
1598
  "UB": {
1599
- "accuracy": 0.6912751677852349,
1600
  "count": 149
1601
  },
1602
  "UD": {
@@ -1606,7 +1606,7 @@
1606
  }
1607
  },
1608
  "sub_M4": {
1609
- "full_accuracy": 0.16,
1610
  "n_examples": 100,
1611
  "per_subtask": {
1612
  "MD": {
@@ -1618,17 +1618,17 @@
1618
  "count": 100
1619
  },
1620
  "UB": {
1621
- "accuracy": 0.38,
1622
  "count": 100
1623
  },
1624
  "UD": {
1625
- "accuracy": 0.7733333333333333,
1626
  "count": 300
1627
  }
1628
  }
1629
  },
1630
  "sub_M5": {
1631
- "full_accuracy": 0.02,
1632
  "n_examples": 100,
1633
  "per_subtask": {
1634
  "MD": {
@@ -1640,25 +1640,25 @@
1640
  "count": 100
1641
  },
1642
  "UB": {
1643
- "accuracy": 0.28,
1644
  "count": 100
1645
  },
1646
  "UD": {
1647
- "accuracy": 0.5325,
1648
  "count": 400
1649
  }
1650
  }
1651
  },
1652
  "sub_random": {
1653
- "full_accuracy": 0.985,
1654
  "n_examples": 200,
1655
  "per_subtask": {
1656
  "MD": {
1657
- "accuracy": 0.9983333333333333,
1658
  "count": 600
1659
  },
1660
  "MB": {
1661
- "accuracy": 0.9962546816479401,
1662
  "count": 267
1663
  },
1664
  "ME": {
@@ -1676,11 +1676,11 @@
1676
  }
1677
  },
1678
  "sub_B3": {
1679
- "full_accuracy": 0.94,
1680
  "n_examples": 100,
1681
  "per_subtask": {
1682
  "MD": {
1683
- "accuracy": 1.0,
1684
  "count": 300
1685
  },
1686
  "MB": {
@@ -1688,7 +1688,7 @@
1688
  "count": 100
1689
  },
1690
  "UB": {
1691
- "accuracy": 0.9695431472081218,
1692
  "count": 197
1693
  },
1694
  "UD": {
@@ -1698,11 +1698,11 @@
1698
  }
1699
  },
1700
  "sub_B4": {
1701
- "full_accuracy": 0.73,
1702
  "n_examples": 100,
1703
  "per_subtask": {
1704
  "MD": {
1705
- "accuracy": 1.0,
1706
  "count": 200
1707
  },
1708
  "MB": {
@@ -1710,17 +1710,17 @@
1710
  "count": 100
1711
  },
1712
  "UB": {
1713
- "accuracy": 0.8906882591093117,
1714
  "count": 247
1715
  },
1716
  "UD": {
1717
- "accuracy": 0.9477124183006536,
1718
  "count": 153
1719
  }
1720
  }
1721
  },
1722
  "sub_B5": {
1723
- "full_accuracy": 0.69,
1724
  "n_examples": 100,
1725
  "per_subtask": {
1726
  "MD": {
@@ -1732,18 +1732,18 @@
1732
  "count": 100
1733
  },
1734
  "UB": {
1735
- "accuracy": 0.9194630872483222,
1736
  "count": 298
1737
  },
1738
  "UD": {
1739
- "accuracy": 0.8960396039603961,
1740
  "count": 202
1741
  }
1742
  }
1743
  }
1744
  },
1745
  "summary": {
1746
- "overall_accuracy": 0.7658333333333334,
1747
  "total_examples": 2400,
1748
  "n_splits": 22
1749
  }
@@ -1872,7 +1872,7 @@
1872
  }
1873
  },
1874
  "add_S5": {
1875
- "full_accuracy": 0.99,
1876
  "n_examples": 100,
1877
  "per_subtask": {
1878
  "SA": {
@@ -1884,7 +1884,7 @@
1884
  "count": 100
1885
  },
1886
  "UC": {
1887
- "accuracy": 0.99,
1888
  "count": 100
1889
  },
1890
  "US": {
@@ -2110,7 +2110,7 @@
2110
  }
2111
  },
2112
  "sub_M4": {
2113
- "full_accuracy": 0.99,
2114
  "n_examples": 100,
2115
  "per_subtask": {
2116
  "MD": {
@@ -2122,7 +2122,7 @@
2122
  "count": 100
2123
  },
2124
  "UB": {
2125
- "accuracy": 0.99,
2126
  "count": 100
2127
  },
2128
  "UD": {
@@ -2132,7 +2132,7 @@
2132
  }
2133
  },
2134
  "sub_M5": {
2135
- "full_accuracy": 0.99,
2136
  "n_examples": 100,
2137
  "per_subtask": {
2138
  "MD": {
@@ -2144,7 +2144,7 @@
2144
  "count": 100
2145
  },
2146
  "UB": {
2147
- "accuracy": 0.99,
2148
  "count": 100
2149
  },
2150
  "UD": {
@@ -2247,11 +2247,11 @@
2247
  }
2248
  },
2249
  "summary": {
2250
- "overall_accuracy": 0.9983333333333333,
2251
  "total_examples": 2400,
2252
  "n_splits": 22
2253
  }
2254
  },
2255
- "sorl_overall_accuracy": 0.9983333333333333,
2256
- "sft_overall_accuracy": 0.7658333333333334
2257
  }
 
153
  7788
154
  ],
155
  "loss": [
156
+ 12.67892837524414,
157
+ 8.191485404968262,
158
+ 4.352431774139404,
159
+ 2.522091865539551,
160
+ 2.2780187129974365,
161
+ 2.380795955657959,
162
+ 2.0039758682250977,
163
+ 1.9988082647323608,
164
+ 1.7572784423828125,
165
+ 1.3259050846099854,
166
+ 1.6416562795639038,
167
+ 1.219172477722168,
168
+ 1.2386716604232788,
169
+ 0.8806266784667969,
170
+ -0.20963618159294128,
171
+ -9.859777450561523,
172
+ -9.113763809204102,
173
+ -9.5258150100708,
174
+ -10.300243377685547,
175
+ -9.634173393249512,
176
+ -10.759025573730469,
177
+ -10.662871360778809,
178
+ -11.247064590454102,
179
+ -10.378228187561035,
180
+ -11.903186798095703,
181
+ -11.019442558288574,
182
+ -11.83310317993164,
183
+ -10.5678129196167,
184
+ -11.108699798583984,
185
+ -10.90021800994873,
186
+ -11.947670936584473,
187
+ -11.865416526794434,
188
+ -10.908041954040527,
189
+ -12.68329906463623,
190
+ -12.241023063659668,
191
+ -11.298798561096191,
192
+ -12.071434020996094,
193
+ -12.021255493164062,
194
+ -11.684292793273926,
195
+ -12.938379287719727,
196
+ -12.888108253479004,
197
+ -13.309358596801758,
198
+ -12.430334091186523,
199
+ -12.229331970214844,
200
+ -12.266661643981934,
201
+ -13.700284004211426,
202
+ -14.133016586303711,
203
+ -13.916891098022461,
204
+ -12.834698677062988,
205
+ -12.934271812438965,
206
+ -12.887510299682617,
207
+ -11.524942398071289,
208
+ -12.328432083129883,
209
+ -10.240029335021973,
210
+ -10.574095726013184,
211
+ -9.970398902893066,
212
+ -8.172858238220215,
213
+ -6.089545726776123,
214
+ -5.253147602081299,
215
+ -5.1576738357543945,
216
+ -4.68107795715332,
217
+ -3.675668954849243,
218
+ -3.848482608795166,
219
+ -3.109677314758301,
220
+ -2.806471586227417,
221
+ -3.232398509979248,
222
+ -3.178936004638672,
223
+ -2.6559717655181885,
224
+ -3.095219373703003,
225
+ -2.731898546218872,
226
+ -1.9003783464431763,
227
+ -2.4217422008514404,
228
+ -1.971528172492981,
229
+ -2.045112371444702,
230
+ -2.311786413192749,
231
+ -1.8736507892608643,
232
+ -1.5437003374099731,
233
+ -2.029801368713379,
234
+ -1.5818710327148438,
235
+ -1.4641690254211426,
236
+ -1.4351928234100342,
237
+ -1.4494510889053345,
238
+ -1.6452739238739014,
239
+ -1.4723519086837769,
240
+ -1.5316369533538818,
241
+ -1.464418649673462,
242
+ -0.7855635285377502,
243
+ -1.0341215133666992,
244
+ -1.2656478881835938,
245
+ -1.4580594301223755,
246
+ -0.8084070682525635,
247
+ -0.99681556224823,
248
+ -1.617496132850647,
249
+ -1.0002158880233765,
250
+ -1.0948678255081177,
251
+ -1.0256470441818237,
252
+ -0.6143571138381958,
253
+ -0.9969443082809448,
254
+ -0.5588001608848572,
255
+ -1.7324066162109375,
256
+ -1.3033020496368408,
257
+ -0.5088945627212524,
258
+ -0.9286077618598938,
259
+ -0.32349681854248047,
260
+ -0.5033729672431946,
261
+ -0.47450047731399536,
262
+ -0.4403214752674103,
263
+ -0.5367404818534851,
264
+ -0.3997519612312317,
265
+ -0.7537015080451965,
266
+ -0.39303842186927795,
267
+ -0.40187329053878784,
268
+ -0.5620273351669312,
269
+ -0.4098934233188629,
270
+ -0.3252432942390442,
271
+ -0.44802966713905334,
272
+ -0.5599377155303955,
273
+ -0.33734801411628723,
274
+ -0.37426117062568665,
275
+ -0.5350562334060669,
276
+ -0.36393025517463684,
277
+ -0.3666161298751831,
278
+ -0.38534849882125854,
279
+ -0.305103063583374,
280
+ -0.5940544009208679,
281
+ -0.19180399179458618,
282
+ -0.4214673638343811,
283
+ -0.12443143129348755,
284
+ -0.2960967719554901,
285
+ -0.3407119810581207,
286
+ -0.24766972661018372,
287
+ -0.157465860247612,
288
+ -0.012223627418279648,
289
+ -0.2642403244972229,
290
+ -0.2627325654029846,
291
+ -0.10439814627170563,
292
+ -0.09931731224060059,
293
+ -0.23622454702854156,
294
+ -0.2288854718208313,
295
+ -0.07412329316139221,
296
+ -0.2846939265727997,
297
+ -0.1377047598361969,
298
+ 0.013486653566360474,
299
+ -0.14698845148086548,
300
+ -0.11050604283809662,
301
+ -0.073063924908638,
302
+ -0.08298930525779724,
303
+ -0.02777700126171112,
304
+ -0.09498019516468048,
305
+ -0.011802252382040024
306
  ],
307
  "base_loss": [
308
+ 8.175139427185059,
309
+ 6.075762748718262,
310
+ 3.783433198928833,
311
+ 2.0279407501220703,
312
+ 1.9594535827636719,
313
+ 1.8934462070465088,
314
+ 1.8501828908920288,
315
+ 1.903078317642212,
316
+ 1.8371549844741821,
317
+ 1.7587207555770874,
318
+ 1.7393006086349487,
319
+ 1.6968368291854858,
320
+ 1.743001937866211,
321
+ 1.7118394374847412,
322
+ 1.7587076425552368,
323
+ 2.1217286586761475,
324
+ 1.7817907333374023,
325
+ 1.7153218984603882,
326
+ 1.7984426021575928,
327
+ 1.675990343093872,
328
+ 1.6814289093017578,
329
+ 1.6889286041259766,
330
+ 1.7132903337478638,
331
+ 1.5579944849014282,
332
+ 1.7281142473220825,
333
+ 1.638057827949524,
334
+ 1.6829408407211304,
335
+ 1.6178058385849,
336
+ 1.5718839168548584,
337
+ 1.5950709581375122,
338
+ 1.6310195922851562,
339
+ 1.622833490371704,
340
+ 1.525478720664978,
341
+ 1.6557903289794922,
342
+ 1.5996358394622803,
343
+ 1.524327278137207,
344
+ 1.6040916442871094,
345
+ 1.53284752368927,
346
+ 1.466318130493164,
347
+ 1.5987164974212646,
348
+ 1.6175482273101807,
349
+ 1.628091812133789,
350
+ 1.5662003755569458,
351
+ 1.4833447933197021,
352
+ 1.5381290912628174,
353
+ 1.600269079208374,
354
+ 1.673256754875183,
355
+ 1.6239449977874756,
356
+ 1.4986480474472046,
357
+ 1.5241094827651978,
358
+ 1.4789596796035767,
359
+ 1.3508294820785522,
360
+ 1.420906901359558,
361
+ 1.217974305152893,
362
+ 1.2073873281478882,
363
+ 1.1402941942214966,
364
+ 0.9508804082870483,
365
+ 0.724185585975647,
366
+ 0.6423880457878113,
367
+ 0.610244870185852,
368
+ 0.5481444597244263,
369
+ 0.4364292323589325,
370
+ 0.44845670461654663,
371
+ 0.3712286353111267,
372
+ 0.3391755521297455,
373
+ 0.38700053095817566,
374
+ 0.37053346633911133,
375
+ 0.32595524191856384,
376
+ 0.3669354021549225,
377
+ 0.3271975517272949,
378
+ 0.2374112904071808,
379
+ 0.2933013439178467,
380
+ 0.2643923759460449,
381
+ 0.27279627323150635,
382
+ 0.27454128861427307,
383
+ 0.23557938635349274,
384
+ 0.18664655089378357,
385
+ 0.23771674931049347,
386
+ 0.19177906215190887,
387
+ 0.18533799052238464,
388
+ 0.16908080875873566,
389
+ 0.1830829232931137,
390
+ 0.1944981813430786,
391
+ 0.178822860121727,
392
+ 0.17905712127685547,
393
+ 0.1718132644891739,
394
+ 0.09683103859424591,
395
+ 0.12245485931634903,
396
+ 0.14752057194709778,
397
+ 0.17010627686977386,
398
+ 0.09792697429656982,
399
+ 0.12507063150405884,
400
+ 0.2109687775373459,
401
+ 0.11979462206363678,
402
+ 0.12996187806129456,
403
+ 0.1257127821445465,
404
+ 0.10164304822683334,
405
+ 0.11947854608297348,
406
+ 0.06947648525238037,
407
+ 0.20376794040203094,
408
+ 0.15444506704807281,
409
+ 0.06554622203111649,
410
+ 0.11583036929368973,
411
+ 0.04441644623875618,
412
+ 0.08033192157745361,
413
+ 0.05915657430887222,
414
+ 0.057279665023088455,
415
+ 0.0654909536242485,
416
+ 0.05282890051603317,
417
+ 0.09097499400377274,
418
+ 0.050892170518636703,
419
+ 0.054239530116319656,
420
+ 0.06922085583209991,
421
+ 0.0526835136115551,
422
+ 0.04487794265151024,
423
+ 0.05662527307868004,
424
+ 0.06981002539396286,
425
+ 0.04629513993859291,
426
+ 0.048202354460954666,
427
+ 0.06608594208955765,
428
+ 0.04808187484741211,
429
+ 0.05681002140045166,
430
+ 0.04994560033082962,
431
+ 0.04187649488449097,
432
+ 0.07288409769535065,
433
+ 0.027385542169213295,
434
+ 0.052197426557540894,
435
+ 0.029422065243124962,
436
+ 0.041962604969739914,
437
+ 0.04330725595355034,
438
+ 0.03315514698624611,
439
+ 0.023204902186989784,
440
+ 0.02981250360608101,
441
+ 0.03681867569684982,
442
+ 0.03531672805547714,
443
+ 0.020786035805940628,
444
+ 0.018126586452126503,
445
+ 0.03248458728194237,
446
+ 0.03152026981115341,
447
+ 0.013861392624676228,
448
+ 0.03954928368330002,
449
+ 0.022461380809545517,
450
+ 0.00311906635761261,
451
+ 0.020407771691679955,
452
+ 0.017763862386345863,
453
+ 0.013093484565615654,
454
+ 0.014994284138083458,
455
+ 0.009757052175700665,
456
+ 0.02090434916317463,
457
+ 0.007689598947763443
458
  ],
459
  "info_loss": [
460
+ -0.1406078338623047,
461
+ -0.09821081161499023,
462
+ -0.07674527168273926,
463
+ -0.04948115348815918,
464
+ -0.06285381317138672,
465
+ -0.0457533597946167,
466
+ -0.07926464080810547,
467
+ -0.08498406410217285,
468
+ -0.10213601589202881,
469
+ -0.13344013690948486,
470
+ -0.08259034156799316,
471
+ -0.09507668018341064,
472
+ -0.08322620391845703,
473
+ -0.11139428615570068,
474
+ -0.2232828140258789,
475
+ -1.2267482280731201,
476
+ -1.113345742225647,
477
+ -1.1485538482666016,
478
+ -1.2319607734680176,
479
+ -1.1535847187042236,
480
+ -1.264868140220642,
481
+ -1.2549021244049072,
482
+ -1.3155022859573364,
483
+ -1.2114543914794922,
484
+ -1.3819411993026733,
485
+ -1.2823961973190308,
486
+ -1.3693645000457764,
487
+ -1.2348055839538574,
488
+ -1.2836614847183228,
489
+ -1.2640974521636963,
490
+ -1.372936487197876,
491
+ -1.363095760345459,
492
+ -1.25725519657135,
493
+ -1.4489041566848755,
494
+ -1.3969898223876953,
495
+ -1.2950832843780518,
496
+ -1.38006591796875,
497
+ -1.3681342601776123,
498
+ -1.3281453847885132,
499
+ -1.4650731086730957,
500
+ -1.4617775678634644,
501
+ -1.503858208656311,
502
+ -1.4098341464996338,
503
+ -1.381147861480713,
504
+ -1.3913785219192505,
505
+ -1.5395087003707886,
506
+ -1.589634656906128,
507
+ -1.5628442764282227,
508
+ -1.4419020414352417,
509
+ -1.4551329612731934,
510
+ -1.4442074298858643,
511
+ -1.2961931228637695,
512
+ -1.382449746131897,
513
+ -1.1550946235656738,
514
+ -1.1871305704116821,
515
+ -1.11911940574646,
516
+ -0.9211147427558899,
517
+ -0.6894612908363342,
518
+ -0.5988896489143372,
519
+ -0.5864489078521729,
520
+ -0.5306638479232788,
521
+ -0.41922348737716675,
522
+ -0.43873268365859985,
523
+ -0.35574495792388916,
524
+ -0.3218325078487396,
525
+ -0.3698967695236206,
526
+ -0.3617374002933502,
527
+ -0.30592790246009827,
528
+ -0.35313671827316284,
529
+ -0.3120341897010803,
530
+ -0.22083844244480133,
531
+ -0.2789214253425598,
532
+ -0.23039886355400085,
533
+ -0.2383309006690979,
534
+ -0.2649160921573639,
535
+ -0.21758969128131866,
536
+ -0.17926090955734253,
537
+ -0.23309476673603058,
538
+ -0.1844792366027832,
539
+ -0.1725887954235077,
540
+ -0.1671394556760788,
541
+ -0.1700686663389206,
542
+ -0.1907423436641693,
543
+ -0.17181335389614105,
544
+ -0.17715275287628174,
545
+ -0.16998039186000824,
546
+ -0.09564366936683655,
547
+ -0.12137357145547867,
548
+ -0.14595358073711395,
549
+ -0.169093519449234,
550
+ -0.0965929850935936,
551
+ -0.11820736527442932,
552
+ -0.18986539542675018,
553
+ -0.11808352172374725,
554
+ -0.1273777186870575,
555
+ -0.12252979725599289,
556
+ -0.07934901863336563,
557
+ -0.11888580024242401,
558
+ -0.06833749264478683,
559
+ -0.19960084557533264,
560
+ -0.15267471969127655,
561
+ -0.06464723497629166,
562
+ -0.11036423593759537,
563
+ -0.041933849453926086,
564
+ -0.06360318511724472,
565
+ -0.0584120973944664,
566
+ -0.056712184101343155,
567
+ -0.06440874189138412,
568
+ -0.052568674087524414,
569
+ -0.08996984362602234,
570
+ -0.048663992434740067,
571
+ -0.05403685197234154,
572
+ -0.06870617717504501,
573
+ -0.05249454826116562,
574
+ -0.04473206400871277,
575
+ -0.056433867663145065,
576
+ -0.0695694237947464,
577
+ -0.045398302376270294,
578
+ -0.04746449738740921,
579
+ -0.06530661135911942,
580
+ -0.047672662883996964,
581
+ -0.04788694903254509,
582
+ -0.04977806285023689,
583
+ -0.040316712111234665,
584
+ -0.07268588244915009,
585
+ -0.027013925835490227,
586
+ -0.05200999230146408,
587
+ -0.021213047206401825,
588
+ -0.041462358087301254,
589
+ -0.04304017499089241,
590
+ -0.032975614070892334,
591
+ -0.02306353859603405,
592
+ -0.011100322008132935,
593
+ -0.03651665151119232,
594
+ -0.03525616601109505,
595
+ -0.0206893440335989,
596
+ -0.017962217330932617,
597
+ -0.0323893241584301,
598
+ -0.03146279975771904,
599
+ -0.013802221976220608,
600
+ -0.03790447488427162,
601
+ -0.02238229103386402,
602
+ -0.003057879628613591,
603
+ -0.0203342754393816,
604
+ -0.017628828063607216,
605
+ -0.013046156615018845,
606
+ -0.014944502152502537,
607
+ -0.009699908085167408,
608
+ -0.016671348363161087,
609
+ -0.007629582192748785
610
  ],
611
  "abs_loss": [
612
+ 3.8940813541412354,
613
+ 3.7336649894714355,
614
+ 3.1092171669006348,
615
+ 2.842017889022827,
616
+ 2.721194267272949,
617
+ 2.7295258045196533,
618
+ 2.8091413974761963,
619
+ 2.8224925994873047,
620
+ 2.7906839847564697,
621
+ 2.74942684173584,
622
+ 2.5859627723693848,
623
+ 2.0573041439056396,
624
+ 1.4365499019622803,
625
+ 1.277630090713501,
626
+ 1.2900981903076172,
627
+ 1.6139904260635376,
628
+ 1.0711069107055664,
629
+ 1.2892038822174072,
630
+ 1.235854148864746,
631
+ 1.1754491329193115,
632
+ 1.2126238346099854,
633
+ 1.0512967109680176,
634
+ 1.0807137489318848,
635
+ 1.055290937423706,
636
+ 1.098618745803833,
637
+ 0.9691460132598877,
638
+ 1.195742130279541,
639
+ 0.9021354913711548,
640
+ 0.815509557723999,
641
+ 0.901701807975769,
642
+ 1.0237162113189697,
643
+ 0.9478892683982849,
644
+ 0.9123111963272095,
645
+ 1.037805438041687,
646
+ 0.8250418901443481,
647
+ 0.7932180166244507,
648
+ 0.87883460521698,
649
+ 0.7833858132362366,
650
+ 0.7834078073501587,
651
+ 0.7488945722579956,
652
+ 0.6982825398445129,
653
+ 0.6155017018318176,
654
+ 0.7811354994773865,
655
+ 0.7143194675445557,
656
+ 0.5621134042739868,
657
+ 0.5227675437927246,
658
+ 0.5837538242340088,
659
+ 0.5188642740249634,
660
+ 0.545297384262085,
661
+ 0.4773949682712555,
662
+ 0.49671515822410583,
663
+ 0.4398581385612488,
664
+ 0.48808902502059937,
665
+ 0.517977774143219,
666
+ 0.5309848189353943,
667
+ 0.45294398069381714,
668
+ 0.4019848704338074,
669
+ 0.519283652305603,
670
+ 0.3544163703918457,
671
+ 0.33730629086494446,
672
+ 0.2914344072341919,
673
+ 0.426680326461792,
674
+ 0.3723861575126648,
675
+ 0.37098586559295654,
676
+ 0.3665745258331299,
677
+ 0.3464042544364929,
678
+ 0.3030303120613098,
679
+ 0.31845754384994507,
680
+ 0.2541038393974304,
681
+ 0.2829054594039917,
682
+ 0.34755414724349976,
683
+ 0.41225311160087585,
684
+ 0.3649803400039673,
685
+ 0.2859313189983368,
686
+ 0.25069183111190796,
687
+ 0.2457413375377655,
688
+ 0.3258231282234192,
689
+ 0.26403719186782837,
690
+ 0.23960518836975098,
691
+ 0.25264933705329895,
692
+ 0.3009341359138489,
693
+ 0.23419302701950073,
694
+ 0.2310016006231308,
695
+ 0.25005489587783813,
696
+ 0.2157013714313507,
697
+ 0.24478872120380402,
698
+ 0.18399158120155334,
699
+ 0.25713664293289185,
700
+ 0.20990487933158875,
701
+ 0.1861613243818283,
702
+ 0.18391621112823486,
703
+ 0.21477115154266357,
704
+ 0.1756722778081894,
705
+ 0.21409966051578522,
706
+ 0.23994490504264832,
707
+ 0.22935672104358673,
708
+ 0.2253214418888092,
709
+ 0.1604989767074585,
710
+ 0.19127511978149414,
711
+ 0.22364018857479095,
712
+ 0.19713473320007324,
713
+ 0.16757990419864655,
714
+ 0.1519395262002945,
715
+ 0.1956753432750702,
716
+ 0.1744147539138794,
717
+ 0.2321409285068512,
718
+ 0.13768763840198517,
719
+ 0.23297205567359924,
720
+ 0.14285501837730408,
721
+ 0.1547240912914276,
722
+ 0.18175172805786133,
723
+ 0.16273121535778046,
724
+ 0.1803196370601654,
725
+ 0.18007409572601318,
726
+ 0.15982943773269653,
727
+ 0.23455564677715302,
728
+ 0.17059823870658875,
729
+ 0.13607117533683777,
730
+ 0.1691424548625946,
731
+ 0.1645020842552185,
732
+ 0.1407187283039093,
733
+ 0.15654246509075165,
734
+ 0.16038723289966583,
735
+ 0.16506516933441162,
736
+ 0.131463885307312,
737
+ 0.1274046152830124,
738
+ 0.1848076581954956,
739
+ 0.1635836660861969,
740
+ 0.1434631198644638,
741
+ 0.16331462562084198,
742
+ 0.12345422804355621,
743
+ 0.1436048448085785,
744
+ 0.19684480130672455,
745
+ 0.16364730894565582,
746
+ 0.16607016324996948,
747
+ 0.20140975713729858,
748
+ 0.08408752083778381,
749
+ 0.1640971601009369,
750
+ 0.15419058501720428,
751
+ 0.12306832522153854,
752
+ 0.0888054147362709,
753
+ 0.10666976869106293,
754
+ 0.19911688566207886,
755
+ 0.143904447555542,
756
+ 0.12154616415500641,
757
+ 0.11547724157571793,
758
+ 0.13714294135570526,
759
+ 0.12812788784503937,
760
+ 0.16408056020736694,
761
+ 0.1393263339996338
762
  ],
763
  "zipf_loss": [
764
+ 5.520458698272705,
765
+ 2.724464178085327,
766
+ 1.0255296230316162,
767
+ 0.7047606706619263,
768
+ 0.6749839186668396,
769
+ 0.6719306707382202,
770
+ 0.6655250787734985,
771
+ 0.6633213758468628,
772
+ 0.6624152064323425,
773
+ 0.6266430616378784,
774
+ 0.4696628153324127,
775
+ 0.2673720121383667,
776
+ 0.18427684903144836,
777
+ 0.1549670696258545,
778
+ 0.13547447323799133,
779
+ 0.12457747757434845,
780
+ 0.13079211115837097,
781
+ 0.11548108607530594,
782
+ 0.09733611345291138,
783
+ 0.1081380546092987,
784
+ 0.08696448802947998,
785
+ 0.0920911505818367,
786
+ 0.0865960344672203,
787
+ 0.0727919340133667,
788
+ 0.07824882119894028,
789
+ 0.06954751908779144,
790
+ 0.05802743881940842,
791
+ 0.07222349941730499,
792
+ 0.07448042929172516,
793
+ 0.05551524832844734,
794
+ 0.0483030267059803,
795
+ 0.04791921377182007,
796
+ 0.04779967665672302,
797
+ 0.04617128521203995,
798
+ 0.04673522710800171,
799
+ 0.04838429391384125,
800
+ 0.037250399589538574,
801
+ 0.04890108108520508,
802
+ 0.05250226706266403,
803
+ 0.03874621540307999,
804
+ 0.0422917976975441,
805
+ 0.039581410586833954,
806
+ 0.023693282157182693,
807
+ 0.027369800955057144,
808
+ 0.05278322100639343,
809
+ 0.04225717484951019,
810
+ 0.03169860690832138,
811
+ 0.03571942821145058,
812
+ 0.031144462525844574,
813
+ 0.04520869255065918,
814
+ 0.025932321324944496,
815
+ 0.04217460751533508,
816
+ 0.026349296793341637,
817
+ 0.041144728660583496,
818
+ 0.03672437369823456,
819
+ 0.03520722687244415,
820
+ 0.04721111059188843,
821
+ 0.028953010216355324,
822
+ 0.057918816804885864,
823
+ 0.06283990293741226,
824
+ 0.04827304184436798,
825
+ 0.03746875375509262,
826
+ 0.0531487762928009,
827
+ 0.03944481164216995,
828
+ 0.036020535975694656,
829
+ 0.04492820054292679,
830
+ 0.0376015342772007,
831
+ 0.04550633579492569,
832
+ 0.04380195587873459,
833
+ 0.03295513242483139,
834
+ 0.035839494317770004,
835
+ 0.032945387065410614,
836
+ 0.03157005086541176,
837
+ 0.03680720180273056,
838
+ 0.03776375204324722,
839
+ 0.04209259897470474,
840
+ 0.02967984601855278,
841
+ 0.03702588379383087,
842
+ 0.047181881964206696,
843
+ 0.0511159673333168,
844
+ 0.03702745959162712,
845
+ 0.04473336040973663,
846
+ 0.04455123096704483,
847
+ 0.04195331037044525,
848
+ 0.039263349026441574,
849
+ 0.03909315913915634,
850
+ 0.05564294755458832,
851
+ 0.03144568204879761,
852
+ 0.025376860052347183,
853
+ 0.04415329545736313,
854
+ 0.04120419919490814,
855
+ 0.03871038556098938,
856
+ 0.05262188985943794,
857
+ 0.03941471129655838,
858
+ 0.024953138083219528,
859
+ 0.05100245401263237,
860
+ 0.054957855492830276,
861
+ 0.056385260075330734,
862
+ 0.035970814526081085,
863
+ 0.037469856441020966,
864
+ 0.049286577850580215,
865
+ 0.05527356639504433,
866
+ 0.04401029646396637,
867
+ 0.031857676804065704,
868
+ 0.034885551780462265,
869
+ 0.02724980190396309,
870
+ 0.0557519905269146,
871
+ 0.01855882629752159,
872
+ 0.05882035940885544,
873
+ 0.03954955190420151,
874
+ 0.02453412488102913,
875
+ 0.06798255443572998,
876
+ 0.03778166323900223,
877
+ 0.0443611666560173,
878
+ 0.06121644005179405,
879
+ 0.03622821718454361,
880
+ 0.04888666421175003,
881
+ 0.056732747703790665,
882
+ 0.0352671816945076,
883
+ 0.03547373414039612,
884
+ 0.05064262077212334,
885
+ 0.0397891066968441,
886
+ 0.04644780606031418,
887
+ 0.039681047201156616,
888
+ 0.04677397012710571,
889
+ 0.03820924460887909,
890
+ 0.027954362332820892,
891
+ 0.041918620467185974,
892
+ 0.062217921018600464,
893
+ 0.030051065608859062,
894
+ 0.03658584505319595,
895
+ 0.03560413420200348,
896
+ 0.04928261414170265,
897
+ 0.047742798924446106,
898
+ 0.037905339151620865,
899
+ 0.06156828626990318,
900
+ 0.053769513964653015,
901
+ 0.03877438232302666,
902
+ 0.038803182542324066,
903
+ 0.03773069381713867,
904
+ 0.0459209568798542,
905
+ 0.05298977717757225,
906
+ 0.021034693345427513,
907
+ 0.02155609056353569,
908
+ 0.03586374223232269,
909
+ 0.032756440341472626,
910
+ 0.03774715214967728,
911
+ 0.04665224254131317,
912
+ 0.03442087769508362,
913
+ 0.04287134110927582
914
  ],
915
  "denoise_loss": [],
916
  "ortho_loss": [
917
+ 0.47937828302383423,
918
+ 0.1507955640554428,
919
+ 0.059393465518951416,
920
+ 0.04442813619971275,
921
+ 0.0324716791510582,
922
+ 0.026790229603648186,
923
+ 0.02599812112748623,
924
+ 0.026838652789592743,
925
+ 0.03308980539441109,
926
+ 0.03936305269598961,
927
+ 0.046892400830984116,
928
+ 0.054657381027936935,
929
+ 0.06615152955055237,
930
+ 0.07190698385238647,
931
+ 0.08240365236997604,
932
+ 0.09521664679050446,
933
+ 0.10861148685216904,
934
+ 0.1214362159371376,
935
+ 0.1258803755044937,
936
+ 0.1299659013748169,
937
+ 0.1402484029531479,
938
+ 0.14295075833797455,
939
+ 0.149370938539505,
940
+ 0.15252968668937683,
941
+ 0.1605893224477768,
942
+ 0.16262884438037872,
943
+ 0.1668718159198761,
944
+ 0.1677456796169281,
945
+ 0.17001178860664368,
946
+ 0.16833247244358063,
947
+ 0.17318005859851837,
948
+ 0.17326097190380096,
949
+ 0.17671826481819153,
950
+ 0.17965927720069885,
951
+ 0.17709869146347046,
952
+ 0.17812584340572357,
953
+ 0.18094243109226227,
954
+ 0.17931658029556274,
955
+ 0.1780029535293579,
956
+ 0.1778276562690735,
957
+ 0.17606784403324127,
958
+ 0.17577897012233734,
959
+ 0.17276717722415924,
960
+ 0.17193765938282013,
961
+ 0.17166247963905334,
962
+ 0.16964799165725708,
963
+ 0.16706812381744385,
964
+ 0.16757197678089142,
965
+ 0.16796070337295532,
966
+ 0.17077603936195374,
967
+ 0.16790732741355896,
968
+ 0.16808928549289703,
969
+ 0.17049027979373932,
970
+ 0.16959191858768463,
971
+ 0.16786660254001617,
972
+ 0.1698780059814453,
973
+ 0.17389650642871857,
974
+ 0.1759837120771408,
975
+ 0.1825551986694336,
976
+ 0.1941106617450714,
977
+ 0.20087893307209015,
978
+ 0.20311099290847778,
979
+ 0.20108184218406677,
980
+ 0.2060651034116745,
981
+ 0.2042357623577118,
982
+ 0.20703236758708954,
983
+ 0.21205003559589386,
984
+ 0.2158161699771881,
985
+ 0.2168900966644287,
986
+ 0.21410465240478516,
987
+ 0.21417385339736938,
988
+ 0.2129133939743042,
989
+ 0.21731425821781158,
990
+ 0.21567592024803162,
991
+ 0.21867820620536804,
992
+ 0.22142289578914642,
993
+ 0.2225620299577713,
994
+ 0.22172413766384125,
995
+ 0.2218010574579239,
996
+ 0.22009830176830292,
997
+ 0.21785126626491547,
998
+ 0.21678262948989868,
999
+ 0.21671107411384583,
1000
+ 0.2209888994693756,
1001
+ 0.22106041014194489,
1002
+ 0.22590550780296326,
1003
+ 0.22786492109298706,
1004
+ 0.23093664646148682,
1005
+ 0.22673366963863373,
1006
+ 0.22699661552906036,
1007
+ 0.23137319087982178,
1008
+ 0.23347441852092743,
1009
+ 0.23230887949466705,
1010
+ 0.23691625893115997,
1011
+ 0.23728600144386292,
1012
+ 0.23905602097511292,
1013
+ 0.24103665351867676,
1014
+ 0.23820726573467255,
1015
+ 0.23968391120433807,
1016
+ 0.240958109498024,
1017
+ 0.24235181510448456,
1018
+ 0.2462717443704605,
1019
+ 0.2495051473379135,
1020
+ 0.2500581443309784,
1021
+ 0.25138387084007263,
1022
+ 0.2525486350059509,
1023
+ 0.25259435176849365,
1024
+ 0.25351014733314514,
1025
+ 0.25713658332824707,
1026
+ 0.26032426953315735,
1027
+ 0.2599243223667145,
1028
+ 0.26285797357559204,
1029
+ 0.2655181884765625,
1030
+ 0.27248892188072205,
1031
+ 0.2755123972892761,
1032
+ 0.27839818596839905,
1033
+ 0.280465692281723,
1034
+ 0.28088274598121643,
1035
+ 0.28307604789733887,
1036
+ 0.28578928112983704,
1037
+ 0.28797510266304016,
1038
+ 0.28915101289749146,
1039
+ 0.2902125418186188,
1040
+ 0.28890639543533325,
1041
+ 0.28958800435066223,
1042
+ 0.28951144218444824,
1043
+ 0.28970345854759216,
1044
+ 0.28964078426361084,
1045
+ 0.28985023498535156,
1046
+ 0.2914946377277374,
1047
+ 0.2916889190673828,
1048
+ 0.29132041335105896,
1049
+ 0.2919837236404419,
1050
+ 0.29286372661590576,
1051
+ 0.29286906123161316,
1052
+ 0.29427701234817505,
1053
+ 0.2967914044857025,
1054
+ 0.2974070906639099,
1055
+ 0.29844793677330017,
1056
+ 0.2980978190898895,
1057
+ 0.29862624406814575,
1058
+ 0.29875603318214417,
1059
+ 0.300121009349823,
1060
+ 0.30075713992118835,
1061
+ 0.3011188507080078,
1062
+ 0.3021273612976074,
1063
+ 0.3024706542491913,
1064
+ 0.30297189950942993,
1065
+ 0.30314701795578003,
1066
+ 0.303840696811676
1067
  ],
1068
  "lr": [
1069
+ 1.6752136752136756e-05,
1070
+ 3.384615384615385e-05,
1071
+ 5.094017094017095e-05,
1072
+ 6.803418803418804e-05,
1073
+ 8e-05,
1074
+ 8e-05,
1075
+ 8e-05,
1076
+ 8e-05,
1077
+ 8e-05,
1078
+ 8e-05,
1079
+ 8e-05,
1080
+ 8e-05,
1081
+ 8e-05,
1082
+ 8e-05,
1083
+ 8e-05,
1084
+ 8e-05,
1085
+ 8e-05,
1086
+ 8e-05,
1087
+ 8e-05,
1088
+ 8e-05,
1089
+ 8e-05,
1090
+ 8e-05,
1091
+ 8e-05,
1092
+ 8e-05,
1093
+ 8e-05,
1094
+ 8e-05,
1095
+ 8e-05,
1096
+ 8e-05,
1097
+ 8e-05,
1098
+ 8e-05,
1099
+ 8e-05,
1100
+ 8e-05,
1101
+ 8e-05,
1102
+ 8e-05,
1103
+ 8e-05,
1104
+ 8e-05,
1105
+ 8e-05,
1106
+ 8e-05,
1107
+ 8e-05,
1108
+ 8e-05,
1109
+ 8e-05,
1110
+ 8e-05,
1111
+ 8e-05,
1112
+ 8e-05,
1113
+ 8e-05,
1114
+ 8e-05,
1115
+ 8e-05,
1116
+ 8e-05,
1117
+ 8e-05,
1118
+ 8e-05,
1119
+ 8e-05,
1120
+ 8e-05,
1121
+ 8e-05,
1122
+ 8e-05,
1123
+ 8e-05,
1124
+ 8e-05,
1125
+ 8e-05,
1126
+ 8e-05,
1127
+ 8e-05,
1128
+ 8e-05,
1129
+ 8e-05,
1130
+ 8e-05,
1131
+ 8e-05,
1132
+ 8e-05,
1133
+ 8e-05,
1134
+ 8e-05,
1135
+ 8e-05,
1136
+ 8e-05,
1137
+ 8e-05,
1138
+ 8e-05,
1139
+ 8e-05,
1140
+ 8e-05,
1141
+ 8e-05,
1142
+ 8e-05,
1143
+ 8e-05,
1144
+ 8e-05,
1145
+ 8e-05,
1146
+ 8e-05,
1147
+ 8e-05,
1148
+ 8e-05,
1149
+ 8e-05,
1150
+ 8e-05,
1151
+ 8e-05,
1152
+ 8e-05,
1153
+ 8e-05,
1154
+ 8e-05,
1155
+ 8e-05,
1156
+ 8e-05,
1157
+ 8e-05,
1158
+ 8e-05,
1159
+ 8e-05,
1160
+ 7.987186923279727e-05,
1161
+ 7.868547324017929e-05,
1162
+ 7.74990772475613e-05,
1163
+ 7.631268125494332e-05,
1164
+ 7.512628526232533e-05,
1165
+ 7.393988926970736e-05,
1166
+ 7.275349327708938e-05,
1167
+ 7.156709728447139e-05,
1168
+ 7.038070129185342e-05,
1169
+ 6.919430529923543e-05,
1170
+ 6.800790930661746e-05,
1171
+ 6.682151331399946e-05,
1172
+ 6.56351173213815e-05,
1173
+ 6.444872132876353e-05,
1174
+ 6.250303190087003e-05,
1175
+ 6.131663590825203e-05,
1176
+ 6.0130239915634074e-05,
1177
+ 5.894384392301608e-05,
1178
+ 5.77574479303981e-05,
1179
+ 5.657105193778013e-05,
1180
+ 5.538465594516214e-05,
1181
+ 5.419825995254416e-05,
1182
+ 5.3011863959926175e-05,
1183
+ 5.182546796730821e-05,
1184
+ 5.063907197469023e-05,
1185
+ 4.945267598207224e-05,
1186
+ 4.8266279989454265e-05,
1187
+ 4.707988399683627e-05,
1188
+ 4.5893488004218296e-05,
1189
+ 4.3947798576324816e-05,
1190
+ 4.276140258370682e-05,
1191
+ 4.157500659108885e-05,
1192
+ 4.038861059847088e-05,
1193
+ 3.9202214605852886e-05,
1194
+ 3.801581861323492e-05,
1195
+ 3.6829422620616924e-05,
1196
+ 3.564302662799895e-05,
1197
+ 3.4456630635380976e-05,
1198
+ 3.327023464276298e-05,
1199
+ 3.2083838650145014e-05,
1200
+ 3.089744265752702e-05,
1201
+ 2.971104666490905e-05,
1202
+ 2.8524650672291075e-05,
1203
+ 2.733825467967308e-05,
1204
+ 2.5392565251779594e-05,
1205
+ 2.4206169259161623e-05,
1206
+ 2.3019773266543632e-05,
1207
+ 2.1833377273925658e-05,
1208
+ 2.0646981281307667e-05,
1209
+ 1.9460585288689693e-05,
1210
+ 1.8274189296071722e-05,
1211
+ 1.7087793303453728e-05,
1212
+ 1.5901397310835757e-05,
1213
+ 1.4715001318217762e-05,
1214
+ 1.352860532559979e-05,
1215
+ 1.2342209332981818e-05,
1216
+ 1.1155813340363827e-05,
1217
+ 9.969417347745854e-06,
1218
+ 8.783021355127861e-06
1219
  ],
1220
  "emb_lr": [],
1221
  "eval_step": [
 
1232
  ],
1233
  "eval_accuracy": [
1234
  0.0,
1235
+ 0.44,
1236
+ 0.8,
 
 
1237
  0.95,
1238
  0.99,
1239
+ 0.95,
1240
+ 0.98,
1241
+ 0.95,
1242
  1.0,
 
1243
  1.0
1244
  ]
1245
  },
1246
+ "final_accuracy": 0.9995833333333334,
1247
  "sft_eval": {
1248
  "config": {
1249
  "ops": "add_sub",
 
1254
  },
1255
  "splits": {
1256
  "add_S0": {
1257
+ "full_accuracy": 1.0,
1258
  "n_examples": 100,
1259
  "per_subtask": {
1260
  "SA": {
1261
+ "accuracy": 1.0,
1262
  "count": 605
1263
  },
1264
  "SS": {
1265
+ "accuracy": 1.0,
1266
  "count": 95
1267
  }
1268
  }
1269
  },
1270
  "add_S1": {
1271
+ "full_accuracy": 0.97,
1272
  "n_examples": 100,
1273
  "per_subtask": {
1274
  "SA": {
 
1284
  "count": 31
1285
  },
1286
  "UC": {
1287
+ "accuracy": 0.9966216216216216,
1288
  "count": 296
1289
  }
1290
  }
1291
  },
1292
  "add_S2": {
1293
+ "full_accuracy": 1.0,
1294
  "n_examples": 100,
1295
  "per_subtask": {
1296
  "SA": {
 
1298
  "count": 163
1299
  },
1300
  "SC": {
1301
+ "accuracy": 1.0,
1302
  "count": 130
1303
  },
1304
  "SS": {
1305
+ "accuracy": 1.0,
1306
  "count": 87
1307
  },
1308
  "UC": {
 
1316
  }
1317
  },
1318
  "add_S3": {
1319
+ "full_accuracy": 0.99,
1320
  "n_examples": 100,
1321
  "per_subtask": {
1322
  "SA": {
 
1332
  "count": 49
1333
  },
1334
  "UC": {
1335
+ "accuracy": 0.9946236559139785,
1336
  "count": 186
1337
  },
1338
  "US": {
 
1342
  }
1343
  },
1344
  "add_S4": {
1345
+ "full_accuracy": 0.89,
1346
  "n_examples": 100,
1347
  "per_subtask": {
1348
  "SA": {
 
1358
  "count": 23
1359
  },
1360
  "UC": {
1361
+ "accuracy": 0.93125,
1362
  "count": 160
1363
  },
1364
  "US": {
1365
+ "accuracy": 1.0,
1366
  "count": 307
1367
  }
1368
  }
1369
  },
1370
  "add_S5": {
1371
+ "full_accuracy": 0.87,
1372
  "n_examples": 100,
1373
  "per_subtask": {
1374
  "SA": {
 
1380
  "count": 100
1381
  },
1382
  "UC": {
1383
+ "accuracy": 0.88,
1384
  "count": 100
1385
  },
1386
  "US": {
1387
+ "accuracy": 0.97,
1388
  "count": 400
1389
  }
1390
  }
1391
  },
1392
  "add_S6": {
1393
+ "full_accuracy": 0.75,
1394
  "n_examples": 100,
1395
  "per_subtask": {
1396
  "SC": {
 
1398
  "count": 100
1399
  },
1400
  "UC": {
1401
+ "accuracy": 0.75,
1402
  "count": 100
1403
  },
1404
  "US": {
1405
+ "accuracy": 0.934,
1406
  "count": 500
1407
  }
1408
  }
1409
  },
1410
  "add_random": {
1411
+ "full_accuracy": 0.995,
1412
  "n_examples": 200,
1413
  "per_subtask": {
1414
  "SA": {
 
1424
  "count": 56
1425
  },
1426
  "UC": {
1427
+ "accuracy": 1.0,
1428
  "count": 529
1429
  },
1430
  "US": {
 
1434
  }
1435
  },
1436
  "add_C3": {
1437
+ "full_accuracy": 0.99,
1438
  "n_examples": 100,
1439
  "per_subtask": {
1440
  "SA": {
 
1446
  "count": 100
1447
  },
1448
  "UC": {
1449
+ "accuracy": 0.9948186528497409,
1450
  "count": 193
1451
  },
1452
  "US": {
1453
+ "accuracy": 1.0,
1454
  "count": 107
1455
  }
1456
  }
1457
  },
1458
  "add_C4": {
1459
+ "full_accuracy": 0.97,
1460
  "n_examples": 100,
1461
  "per_subtask": {
1462
  "SA": {
 
1468
  "count": 100
1469
  },
1470
  "UC": {
1471
+ "accuracy": 0.98828125,
1472
  "count": 256
1473
  },
1474
  "US": {
1475
+ "accuracy": 1.0,
1476
  "count": 144
1477
  }
1478
  }
1479
  },
1480
  "add_C5": {
1481
+ "full_accuracy": 0.95,
1482
  "n_examples": 100,
1483
  "per_subtask": {
1484
  "SA": {
 
1490
  "count": 100
1491
  },
1492
  "UC": {
1493
+ "accuracy": 0.9836601307189542,
1494
  "count": 306
1495
  },
1496
  "US": {
1497
+ "accuracy": 1.0,
1498
  "count": 194
1499
  }
1500
  }
1501
  },
1502
  "add_C6": {
1503
+ "full_accuracy": 0.99,
1504
  "n_examples": 100,
1505
  "per_subtask": {
1506
  "SC": {
 
1508
  "count": 100
1509
  },
1510
  "UC": {
1511
+ "accuracy": 0.9972677595628415,
1512
  "count": 366
1513
  },
1514
  "US": {
1515
+ "accuracy": 1.0,
1516
  "count": 234
1517
  }
1518
  }
1519
  },
1520
  "sub_M0": {
1521
+ "full_accuracy": 1.0,
1522
  "n_examples": 100,
1523
  "per_subtask": {
1524
  "MD": {
1525
+ "accuracy": 1.0,
1526
  "count": 601
1527
  },
1528
  "ME": {
 
1532
  }
1533
  },
1534
  "sub_M1": {
1535
+ "full_accuracy": 0.99,
1536
  "n_examples": 100,
1537
  "per_subtask": {
1538
  "MD": {
 
1540
  "count": 279
1541
  },
1542
  "MB": {
1543
+ "accuracy": 1.0,
1544
  "count": 145
1545
  },
1546
  "ME": {
 
1580
  }
1581
  },
1582
  "sub_M3": {
1583
+ "full_accuracy": 0.89,
1584
  "n_examples": 100,
1585
  "per_subtask": {
1586
  "MD": {
 
1596
  "count": 56
1597
  },
1598
  "UB": {
1599
+ "accuracy": 0.9261744966442953,
1600
  "count": 149
1601
  },
1602
  "UD": {
 
1606
  }
1607
  },
1608
  "sub_M4": {
1609
+ "full_accuracy": 0.41,
1610
  "n_examples": 100,
1611
  "per_subtask": {
1612
  "MD": {
 
1618
  "count": 100
1619
  },
1620
  "UB": {
1621
+ "accuracy": 0.48,
1622
  "count": 100
1623
  },
1624
  "UD": {
1625
+ "accuracy": 0.9133333333333333,
1626
  "count": 300
1627
  }
1628
  }
1629
  },
1630
  "sub_M5": {
1631
+ "full_accuracy": 0.2,
1632
  "n_examples": 100,
1633
  "per_subtask": {
1634
  "MD": {
 
1640
  "count": 100
1641
  },
1642
  "UB": {
1643
+ "accuracy": 0.51,
1644
  "count": 100
1645
  },
1646
  "UD": {
1647
+ "accuracy": 0.735,
1648
  "count": 400
1649
  }
1650
  }
1651
  },
1652
  "sub_random": {
1653
+ "full_accuracy": 0.995,
1654
  "n_examples": 200,
1655
  "per_subtask": {
1656
  "MD": {
1657
+ "accuracy": 1.0,
1658
  "count": 600
1659
  },
1660
  "MB": {
1661
+ "accuracy": 1.0,
1662
  "count": 267
1663
  },
1664
  "ME": {
 
1676
  }
1677
  },
1678
  "sub_B3": {
1679
+ "full_accuracy": 0.89,
1680
  "n_examples": 100,
1681
  "per_subtask": {
1682
  "MD": {
1683
+ "accuracy": 0.9966666666666667,
1684
  "count": 300
1685
  },
1686
  "MB": {
 
1688
  "count": 100
1689
  },
1690
  "UB": {
1691
+ "accuracy": 0.949238578680203,
1692
  "count": 197
1693
  },
1694
  "UD": {
 
1698
  }
1699
  },
1700
  "sub_B4": {
1701
+ "full_accuracy": 0.85,
1702
  "n_examples": 100,
1703
  "per_subtask": {
1704
  "MD": {
1705
+ "accuracy": 0.995,
1706
  "count": 200
1707
  },
1708
  "MB": {
 
1710
  "count": 100
1711
  },
1712
  "UB": {
1713
+ "accuracy": 0.9473684210526315,
1714
  "count": 247
1715
  },
1716
  "UD": {
1717
+ "accuracy": 0.9738562091503268,
1718
  "count": 153
1719
  }
1720
  }
1721
  },
1722
  "sub_B5": {
1723
+ "full_accuracy": 0.91,
1724
  "n_examples": 100,
1725
  "per_subtask": {
1726
  "MD": {
 
1732
  "count": 100
1733
  },
1734
  "UB": {
1735
+ "accuracy": 0.9697986577181208,
1736
  "count": 298
1737
  },
1738
  "UD": {
1739
+ "accuracy": 0.9801980198019802,
1740
  "count": 202
1741
  }
1742
  }
1743
  }
1744
  },
1745
  "summary": {
1746
+ "overall_accuracy": 0.8954166666666666,
1747
  "total_examples": 2400,
1748
  "n_splits": 22
1749
  }
 
1872
  }
1873
  },
1874
  "add_S5": {
1875
+ "full_accuracy": 1.0,
1876
  "n_examples": 100,
1877
  "per_subtask": {
1878
  "SA": {
 
1884
  "count": 100
1885
  },
1886
  "UC": {
1887
+ "accuracy": 1.0,
1888
  "count": 100
1889
  },
1890
  "US": {
 
2110
  }
2111
  },
2112
  "sub_M4": {
2113
+ "full_accuracy": 1.0,
2114
  "n_examples": 100,
2115
  "per_subtask": {
2116
  "MD": {
 
2122
  "count": 100
2123
  },
2124
  "UB": {
2125
+ "accuracy": 1.0,
2126
  "count": 100
2127
  },
2128
  "UD": {
 
2132
  }
2133
  },
2134
  "sub_M5": {
2135
+ "full_accuracy": 1.0,
2136
  "n_examples": 100,
2137
  "per_subtask": {
2138
  "MD": {
 
2144
  "count": 100
2145
  },
2146
  "UB": {
2147
+ "accuracy": 1.0,
2148
  "count": 100
2149
  },
2150
  "UD": {
 
2247
  }
2248
  },
2249
  "summary": {
2250
+ "overall_accuracy": 0.9995833333333334,
2251
  "total_examples": 2400,
2252
  "n_splits": 22
2253
  }
2254
  },
2255
+ "sorl_overall_accuracy": 0.9995833333333334,
2256
+ "sft_overall_accuracy": 0.8954166666666666
2257
  }
add_sub_sorl_v1_abs50_50K/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6342cf5867d670c96b8fb7972956a9553f7a1f0ec1e2656c505d55780ccbe665
3
  size 650466940
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f20c93728acb4d46e808ec7e294845cd9e4f4f5e60f7f61517d13f7c28391a31
3
  size 650466940
add_sub_sorl_v1_abs50_50K/train_config.json CHANGED
@@ -17,7 +17,7 @@
17
  "target_vocab_util": 0.8,
18
  "min_abs_ppl": 0.0,
19
  "zipf_alpha": 1.0,
20
- "lr": 4e-05,
21
  "emb_lr_mult": 1.0,
22
  "weight_decay": 0.01,
23
  "warmup_steps": 234,
@@ -36,7 +36,7 @@
36
  "eval_every": 781,
37
  "save_every": 999999,
38
  "eval_samples": 100,
39
- "output_dir": "ckpt/sweep/as_sorl_abs50_K4_50K",
40
  "eval_K": 4,
41
  "alpha_traj": 0.0,
42
  "corrupt_method": "shuffle",
@@ -69,16 +69,16 @@
69
  "no_wandb": false,
70
  "n_params": 162540062,
71
  "run_name": "add_sub_sorl_v1_abs50_50K",
72
- "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
- "timestamp": "2026-04-12T12:20:29.793903+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "v1",
79
- "wandb_run_id": "wtwip5r3",
80
- "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/wtwip5r3",
81
- "final_accuracy": 0.9983333333333333,
82
- "sft_accuracy": 0.7658333333333334,
83
  "eval_method": "ArithmeticEvaluator"
84
  }
 
17
  "target_vocab_util": 0.8,
18
  "min_abs_ppl": 0.0,
19
  "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
  "emb_lr_mult": 1.0,
22
  "weight_decay": 0.01,
23
  "warmup_steps": 234,
 
36
  "eval_every": 781,
37
  "save_every": 999999,
38
  "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs50_50K",
40
  "eval_K": 4,
41
  "alpha_traj": 0.0,
42
  "corrupt_method": "shuffle",
 
69
  "no_wandb": false,
70
  "n_params": 162540062,
71
  "run_name": "add_sub_sorl_v1_abs50_50K",
72
+ "git_commit": "dc8dd776fb0c30a4c9073052dcc5e943e0fd80c6",
73
+ "timestamp": "2026-04-13T07:43:04.556438+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "v1",
79
+ "wandb_run_id": "bck2cmmy",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/bck2cmmy",
81
+ "final_accuracy": 0.9995833333333334,
82
+ "sft_accuracy": 0.8954166666666666,
83
  "eval_method": "ArithmeticEvaluator"
84
  }