amirali1985 commited on
Commit
d657071
·
verified ·
1 Parent(s): 087cbaa

Upload add_sub_sorl_v1_abs10_K1_25K_1L3H510d

Browse files
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/metrics.json CHANGED
@@ -70,512 +70,1072 @@
70
  3719,
71
  3769,
72
  3819,
73
- 3869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  ],
75
  "loss": [
76
  -2.59329891204834,
77
  7.806329250335693,
78
- 4.891926288604736,
79
- 3.912823438644409,
80
- 3.36497163772583,
81
- 3.413069248199463,
82
- 3.1604514122009277,
83
- 3.0855469703674316,
84
- 2.895873546600342,
85
- 2.8154397010803223,
86
- 2.8238906860351562,
87
- 2.5073657035827637,
88
- 1.2554247379302979,
89
- -0.46714138984680176,
90
- -5.484283447265625,
91
- -6.567154884338379,
92
- -6.758439064025879,
93
- -7.20958137512207,
94
- -8.60386848449707,
95
- -9.11019515991211,
96
- -9.290766716003418,
97
- -9.791807174682617,
98
- -10.01004695892334,
99
- -10.105368614196777,
100
- -9.49570083618164,
101
- -10.431818008422852,
102
- -10.326833724975586,
103
- -10.126893997192383,
104
- -10.160858154296875,
105
- -10.941141128540039,
106
- -10.631235122680664,
107
- -10.644250869750977,
108
- -10.591297149658203,
109
- -11.348092079162598,
110
- -11.023357391357422,
111
- -11.110491752624512,
112
- -11.359291076660156,
113
- -11.153099060058594,
114
- -10.987231254577637,
115
- -11.629621505737305,
116
- -10.848411560058594,
117
- -11.752678871154785,
118
- -11.179244995117188,
119
- -11.764345169067383,
120
- -11.807055473327637,
121
- -11.234673500061035,
122
- -12.306229591369629,
123
- -11.79053783416748,
124
- -12.127859115600586,
125
- -11.52390193939209,
126
- -11.549543380737305,
127
- -13.031827926635742,
128
- -12.10395336151123,
129
- -11.681001663208008,
130
- -12.33035945892334,
131
- -12.12756633758545,
132
- -12.05741024017334,
133
- -12.84146785736084,
134
- -12.221896171569824,
135
- -12.554245948791504,
136
- -12.93739128112793,
137
- -12.630762100219727,
138
- -12.096063613891602,
139
- -12.651780128479004,
140
- -13.059272766113281,
141
- -12.436356544494629,
142
- -12.79887580871582,
143
- -12.472254753112793,
144
- -11.984771728515625,
145
- -13.45839786529541
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  ],
147
  "base_loss": [
148
  10.196748733520508,
149
  6.539699077606201,
150
- 4.092434406280518,
151
- 2.346903085708618,
152
- 2.0746912956237793,
153
- 1.9594690799713135,
154
- 1.8837043046951294,
155
- 1.8864774703979492,
156
- 1.8490782976150513,
157
- 1.7932301759719849,
158
- 1.7640596628189087,
159
- 1.8651959896087646,
160
- 1.8547264337539673,
161
- 1.886150598526001,
162
- 1.9799963235855103,
163
- 1.80038583278656,
164
- 1.8168869018554688,
165
- 1.7473572492599487,
166
- 1.8770685195922852,
167
- 1.8492628335952759,
168
- 1.8485273122787476,
169
- 1.834829330444336,
170
- 1.843698263168335,
171
- 1.8508756160736084,
172
- 1.804511308670044,
173
- 1.85122549533844,
174
- 1.8213146924972534,
175
- 1.8022531270980835,
176
- 1.8216105699539185,
177
- 1.8896723985671997,
178
- 1.7682313919067383,
179
- 1.851951003074646,
180
- 1.8244448900222778,
181
- 1.860870122909546,
182
- 1.844719648361206,
183
- 1.8215335607528687,
184
- 1.8681211471557617,
185
- 1.7685410976409912,
186
- 1.77374267578125,
187
- 1.83017098903656,
188
- 1.780069351196289,
189
- 1.8695632219314575,
190
- 1.803731918334961,
191
- 1.794657588005066,
192
- 1.8132097721099854,
193
- 1.744333028793335,
194
- 1.850972056388855,
195
- 1.79466712474823,
196
- 1.865286111831665,
197
- 1.775697112083435,
198
- 1.831557273864746,
199
- 1.8576253652572632,
200
- 1.7820489406585693,
201
- 1.7218695878982544,
202
- 1.7814966440200806,
203
- 1.771523356437683,
204
- 1.748187780380249,
205
- 1.839253544807434,
206
- 1.7425659894943237,
207
- 1.7759649753570557,
208
- 1.8560504913330078,
209
- 1.794585108757019,
210
- 1.7335294485092163,
211
- 1.806968092918396,
212
- 1.8292748928070068,
213
- 1.7885210514068604,
214
- 1.8298585414886475,
215
- 1.7961997985839844,
216
- 1.723584532737732,
217
- 1.8568757772445679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  ],
219
  "info_loss": [
220
  -2.1446304321289062,
221
  -0.32575225830078125,
222
- -0.1383652687072754,
223
- -0.03788328170776367,
224
- -0.06099653244018555,
225
- -0.043364644050598145,
226
- -0.060524702072143555,
227
- -0.06769096851348877,
228
- -0.08283662796020508,
229
- -0.08506667613983154,
230
- -0.08107435703277588,
231
- -0.12192976474761963,
232
- -0.23974978923797607,
233
- -0.40476346015930176,
234
- -0.9116840362548828,
235
- -1.002124309539795,
236
- -1.0228478908538818,
237
- -1.060563087463379,
238
- -1.2133022546768188,
239
- -1.2610199451446533,
240
- -1.2785308361053467,
241
- -1.326535940170288,
242
- -1.3484688997268677,
243
- -1.3588534593582153,
244
- -1.2928407192230225,
245
- -1.3912382125854492,
246
- -1.3773154020309448,
247
- -1.3551386594772339,
248
- -1.3596680164337158,
249
- -1.44418203830719,
250
- -1.3997809886932373,
251
- -1.4097068309783936,
252
- -1.400420069694519,
253
- -1.4767446517944336,
254
- -1.4412634372711182,
255
- -1.4422601461410522,
256
- -1.468010663986206,
257
- -1.437137484550476,
258
- -1.421332597732544,
259
- -1.4890285730361938,
260
- -1.4050776958465576,
261
- -1.5023729801177979,
262
- -1.4319555759429932,
263
- -1.4917011260986328,
264
- -1.4962141513824463,
265
- -1.4313055276870728,
266
- -1.5444698333740234,
267
- -1.487035870552063,
268
- -1.5295355319976807,
269
- -1.4528287649154663,
270
- -1.4570941925048828,
271
- -1.6042754650115967,
272
- -1.5032556056976318,
273
- -1.4498255252838135,
274
- -1.5202281475067139,
275
- -1.4980217218399048,
276
- -1.488149881362915,
277
- -1.574017882347107,
278
- -1.498759150505066,
279
- -1.5380897521972656,
280
- -1.5861152410507202,
281
- -1.5452888011932373,
282
- -1.4880478382110596,
283
- -1.5506173372268677,
284
- -1.5903542041778564,
285
- -1.5227900743484497,
286
- -1.5617271661758423,
287
- -1.5277729034423828,
288
- -1.4715402126312256,
289
- -1.62960946559906
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  ],
291
  "abs_loss": [
292
  2.2833845615386963,
293
  2.1349236965179443,
294
- 1.895667552947998,
295
- 1.8790363073349,
296
- 1.8763422966003418,
297
- 1.863279938697815,
298
- 1.8707689046859741,
299
- 1.8497987985610962,
300
- 1.8467472791671753,
301
- 1.8501476049423218,
302
- 1.8510236740112305,
303
- 1.7979803085327148,
304
- 1.6018939018249512,
305
- 1.4351223707199097,
306
- 1.3627341985702515,
307
- 1.3827515840530396,
308
- 1.378796935081482,
309
- 1.3818906545639038,
310
- 1.3893027305603027,
311
- 1.376089096069336,
312
- 1.3647276163101196,
313
- 1.341958999633789,
314
- 1.3429460525512695,
315
- 1.2983181476593018,
316
- 1.3058513402938843,
317
- 1.3161550760269165,
318
- 1.269581913948059,
319
- 1.2819956541061401,
320
- 1.225730061531067,
321
- 1.2145700454711914,
322
- 1.2128896713256836,
323
- 1.181650161743164,
324
- 1.1704578399658203,
325
- 1.100685477256775,
326
- 1.1276469230651855,
327
- 1.0256637334823608,
328
- 0.9807112812995911,
329
- 0.9855987429618835,
330
- 1.0340598821640015,
331
- 0.962256133556366,
332
- 0.9177753925323486,
333
- 0.923234760761261,
334
- 0.9172429442405701,
335
- 0.9422297477722168,
336
- 0.9557555317878723,
337
- 0.959925651550293,
338
- 0.8680426478385925,
339
- 0.8737877011299133,
340
- 0.9473252296447754,
341
- 0.8711683750152588,
342
- 0.8707416653633118,
343
- 0.7836803793907166,
344
- 0.847979724407196,
345
- 0.7654786109924316,
346
- 0.793886661529541,
347
- 0.7679941058158875,
348
- 0.7534409165382385,
349
- 0.7690866589546204,
350
- 0.7134169936180115,
351
- 0.7436110973358154,
352
- 0.743655264377594,
353
- 0.6757664084434509,
354
- 0.6782504916191101,
355
- 0.684099018573761,
356
- 0.6790602207183838,
357
- 0.6778237223625183,
358
- 0.6263924241065979,
359
- 0.6698463559150696,
360
- 0.650520384311676,
361
- 0.6476595401763916
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  ],
363
  "zipf_loss": [
364
  8.427918434143066,
365
  4.310660362243652,
366
- 1.9935778379440308,
367
- 1.7568495273590088,
368
- 1.7126115560531616,
369
- 1.7009185552597046,
370
- 1.6949173212051392,
371
- 1.6909992694854736,
372
- 1.6904866695404053,
373
- 1.687861680984497,
374
- 1.6854722499847412,
375
- 1.6816692352294922,
376
- 1.6380068063735962,
377
- 1.550830364227295,
378
- 1.5162874460220337,
379
- 1.5154277086257935,
380
- 1.5152742862701416,
381
- 1.510502576828003,
382
- 1.513155460357666,
383
- 1.513131856918335,
384
- 1.5095428228378296,
385
- 1.5045267343521118,
386
- 1.4966490268707275,
387
- 1.5024579763412476,
388
- 1.4976110458374023,
389
- 1.4977227449417114,
390
- 1.4980478286743164,
391
- 1.4940409660339355,
392
- 1.49163818359375,
393
- 1.4895493984222412,
394
- 1.4770550727844238,
395
- 1.4827024936676025,
396
- 1.4714136123657227,
397
- 1.4484158754348755,
398
- 1.4317924976348877,
399
- 1.3880096673965454,
400
- 1.354623794555664,
401
- 1.3511745929718018,
402
- 1.3489454984664917,
403
- 1.3342679738998413,
404
- 1.3305190801620483,
405
- 1.309164047241211,
406
- 1.2448537349700928,
407
- 1.2637850046157837,
408
- 1.2463005781173706,
409
- 1.2380549907684326,
410
- 1.2006921768188477,
411
- 1.1977745294570923,
412
- 1.2074775695800781,
413
- 1.141572117805481,
414
- 1.1027672290802002,
415
- 1.0749335289001465,
416
- 1.061755895614624,
417
- 1.0188356637954712,
418
- 1.0110366344451904,
419
- 1.0043277740478516,
420
- 1.0005559921264648,
421
- 0.9825484752655029,
422
- 0.9517877101898193,
423
- 0.9763262271881104,
424
- 0.9933456182479858,
425
- 0.959964394569397,
426
- 0.9830599427223206,
427
- 0.9790149927139282,
428
- 0.9470869898796082,
429
- 0.935240626335144,
430
- 0.9258978962898254,
431
- 0.9422906637191772,
432
- 0.9419942498207092,
433
- 0.9160555005073547
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  ],
435
  "denoise_loss": [],
436
  "ortho_loss": [
437
  0.7092215418815613,
438
  0.46743276715278625,
439
- 0.372333288192749,
440
- 0.3163504898548126,
441
- 0.29026153683662415,
442
- 0.2792986333370209,
443
- 0.27686527371406555,
444
- 0.262327641248703,
445
- 0.24570715427398682,
446
- 0.2475493848323822,
447
- 0.24840138852596283,
448
- 0.25525224208831787,
449
- 0.27400314807891846,
450
- 0.2834039628505707,
451
- 0.28120777010917664,
452
- 0.2767947316169739,
453
- 0.2838645875453949,
454
- 0.26357901096343994,
455
- 0.24860280752182007,
456
- 0.2444341778755188,
457
- 0.2471916675567627,
458
- 0.2405933439731598,
459
- 0.24468615651130676,
460
- 0.24482561647891998,
461
- 0.240675151348114,
462
- 0.24134141206741333,
463
- 0.24261823296546936,
464
- 0.24529020488262177,
465
- 0.24550136923789978,
466
- 0.24657639861106873,
467
- 0.24326443672180176,
468
- 0.24152472615242004,
469
- 0.24578434228897095,
470
- 0.24196375906467438,
471
- 0.24180641770362854,
472
- 0.24166396260261536,
473
- 0.23602785170078278,
474
- 0.2411782592535019,
475
- 0.23615865409374237,
476
- 0.23971572518348694,
477
- 0.23798082768917084,
478
- 0.2350902259349823,
479
- 0.23152871429920197,
480
- 0.23074424266815186,
481
- 0.22700950503349304,
482
- 0.2275175005197525,
483
- 0.23137938976287842,
484
- 0.22892071306705475,
485
- 0.22994598746299744,
486
- 0.2339673489332199,
487
- 0.23359638452529907,
488
- 0.23373164236545563,
489
- 0.23107723891735077,
490
- 0.2339753359556198,
491
- 0.23272015154361725,
492
- 0.2336493581533432,
493
- 0.233825221657753,
494
- 0.2339244931936264,
495
- 0.235467329621315,
496
- 0.2342061847448349,
497
- 0.23358942568302155,
498
- 0.23396462202072144,
499
- 0.23547381162643433,
500
- 0.23512768745422363,
501
- 0.23589524626731873,
502
- 0.2354237139225006,
503
- 0.2354665845632553,
504
- 0.23491325974464417,
505
- 0.23564963042736053,
506
- 0.2355274111032486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  ],
508
  "lr": [
509
  1.6752136752136756e-05,
510
  3.384615384615385e-05,
511
- 4e-05,
512
- 4e-05,
513
- 4e-05,
514
- 4e-05,
515
- 4e-05,
516
- 4e-05,
517
- 4e-05,
518
- 4e-05,
519
- 4e-05,
520
- 4e-05,
521
- 4e-05,
522
- 4e-05,
523
- 4e-05,
524
- 4e-05,
525
- 4e-05,
526
- 4e-05,
527
- 4e-05,
528
- 4e-05,
529
- 4e-05,
530
- 4e-05,
531
- 4e-05,
532
- 4e-05,
533
- 4e-05,
534
- 4e-05,
535
- 4e-05,
536
- 4e-05,
537
- 4e-05,
538
- 4e-05,
539
- 4e-05,
540
- 4e-05,
541
- 4e-05,
542
- 4e-05,
543
- 4e-05,
544
- 4e-05,
545
- 4e-05,
546
- 4e-05,
547
- 4e-05,
548
- 4e-05,
549
- 4e-05,
550
- 4e-05,
551
- 3.9947798576324814e-05,
552
- 3.8761402583706826e-05,
553
- 3.757500659108885e-05,
554
- 3.6388610598470864e-05,
555
- 3.5202214605852884e-05,
556
- 3.401581861323491e-05,
557
- 3.282942262061693e-05,
558
- 3.0670181914052204e-05,
559
- 2.948378592143422e-05,
560
- 2.8297389928816243e-05,
561
- 2.711099393619826e-05,
562
- 2.5924597943580284e-05,
563
- 2.4738201950962303e-05,
564
- 2.3551805958344316e-05,
565
- 2.1392565251779595e-05,
566
- 2.020616925916161e-05,
567
- 1.901977326654364e-05,
568
- 1.783337727392566e-05,
569
- 1.6646981281307675e-05,
570
- 1.546058528868969e-05,
571
- 1.427418929607171e-05,
572
- 1.2114948589506984e-05,
573
- 1.0928552596889013e-05,
574
- 9.742156604271029e-06,
575
- 8.555760611653046e-06,
576
- 7.369364619035064e-06,
577
- 6.182968626417082e-06,
578
- 4.996572633799099e-06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
  ],
580
  "emb_lr": [],
581
  "eval_step": [
@@ -588,7 +1148,17 @@
588
  2696,
589
  3087,
590
  3478,
591
- 3869
 
 
 
 
 
 
 
 
 
 
592
  ],
593
  "eval_accuracy": [
594
  0.01,
@@ -600,10 +1170,20 @@
600
  0.0,
601
  0.0,
602
  0.0,
 
 
 
 
 
 
 
 
 
 
603
  0.0
604
  ]
605
  },
606
- "final_accuracy": 0.28291666666666665,
607
  "sft_eval": {
608
  "config": {
609
  "ops": "add_sub",
@@ -614,11 +1194,11 @@
614
  },
615
  "splits": {
616
  "add_S0": {
617
- "full_accuracy": 0.0,
618
  "n_examples": 100,
619
  "per_subtask": {
620
  "SA": {
621
- "accuracy": 0.2066115702479339,
622
  "count": 605
623
  },
624
  "SS": {
@@ -628,327 +1208,327 @@
628
  }
629
  },
630
  "add_S1": {
631
- "full_accuracy": 0.0,
632
  "n_examples": 100,
633
  "per_subtask": {
634
  "SA": {
635
- "accuracy": 0.2647058823529412,
636
  "count": 204
637
  },
638
  "SC": {
639
- "accuracy": 0.11242603550295859,
640
  "count": 169
641
  },
642
  "SS": {
643
- "accuracy": 0.6451612903225806,
644
  "count": 31
645
  },
646
  "UC": {
647
- "accuracy": 0.21621621621621623,
648
  "count": 296
649
  }
650
  }
651
  },
652
  "add_S2": {
653
- "full_accuracy": 0.0,
654
  "n_examples": 100,
655
  "per_subtask": {
656
  "SA": {
657
- "accuracy": 0.3987730061349693,
658
  "count": 163
659
  },
660
  "SC": {
661
- "accuracy": 0.13846153846153847,
662
  "count": 130
663
  },
664
  "SS": {
665
- "accuracy": 0.3448275862068966,
666
  "count": 87
667
  },
668
  "UC": {
669
- "accuracy": 0.35960591133004927,
670
  "count": 203
671
  },
672
  "US": {
673
- "accuracy": 0.6324786324786325,
674
  "count": 117
675
  }
676
  }
677
  },
678
  "add_S3": {
679
- "full_accuracy": 0.0,
680
  "n_examples": 100,
681
  "per_subtask": {
682
  "SA": {
683
- "accuracy": 0.48760330578512395,
684
  "count": 121
685
  },
686
  "SC": {
687
- "accuracy": 0.04132231404958678,
688
  "count": 121
689
  },
690
  "SS": {
691
- "accuracy": 0.6530612244897959,
692
  "count": 49
693
  },
694
  "UC": {
695
- "accuracy": 0.3548387096774194,
696
  "count": 186
697
  },
698
  "US": {
699
- "accuracy": 0.57847533632287,
700
  "count": 223
701
  }
702
  }
703
  },
704
  "add_S4": {
705
- "full_accuracy": 0.0,
706
  "n_examples": 100,
707
  "per_subtask": {
708
  "SA": {
709
- "accuracy": 0.47115384615384615,
710
  "count": 104
711
  },
712
  "SC": {
713
- "accuracy": 0.10377358490566038,
714
  "count": 106
715
  },
716
  "SS": {
717
- "accuracy": 0.5652173913043478,
718
  "count": 23
719
  },
720
  "UC": {
721
- "accuracy": 0.3875,
722
  "count": 160
723
  },
724
  "US": {
725
- "accuracy": 0.5700325732899023,
726
  "count": 307
727
  }
728
  }
729
  },
730
  "add_S5": {
731
- "full_accuracy": 0.0,
732
  "n_examples": 100,
733
  "per_subtask": {
734
  "SA": {
735
- "accuracy": 0.46,
736
  "count": 100
737
  },
738
  "SC": {
739
- "accuracy": 0.01,
740
  "count": 100
741
  },
742
  "UC": {
743
- "accuracy": 0.18,
744
  "count": 100
745
  },
746
  "US": {
747
- "accuracy": 0.09,
748
  "count": 400
749
  }
750
  }
751
  },
752
  "add_S6": {
753
- "full_accuracy": 0.02,
754
  "n_examples": 100,
755
  "per_subtask": {
756
  "SC": {
757
- "accuracy": 0.02,
758
  "count": 100
759
  },
760
  "UC": {
761
- "accuracy": 0.65,
762
  "count": 100
763
  },
764
  "US": {
765
- "accuracy": 0.648,
766
  "count": 500
767
  }
768
  }
769
  },
770
  "add_random": {
771
- "full_accuracy": 0.0,
772
  "n_examples": 200,
773
  "per_subtask": {
774
  "SA": {
775
- "accuracy": 0.2639821029082774,
776
  "count": 447
777
  },
778
  "SC": {
779
- "accuracy": 0.1125,
780
  "count": 320
781
  },
782
  "SS": {
783
- "accuracy": 0.5178571428571429,
784
  "count": 56
785
  },
786
  "UC": {
787
- "accuracy": 0.23062381852551986,
788
  "count": 529
789
  },
790
  "US": {
791
- "accuracy": 0.5625,
792
  "count": 48
793
  }
794
  }
795
  },
796
  "add_C3": {
797
- "full_accuracy": 0.0,
798
  "n_examples": 100,
799
  "per_subtask": {
800
  "SA": {
801
- "accuracy": 0.25,
802
  "count": 300
803
  },
804
  "SC": {
805
- "accuracy": 0.08,
806
  "count": 100
807
  },
808
  "UC": {
809
- "accuracy": 0.11398963730569948,
810
  "count": 193
811
  },
812
  "US": {
813
- "accuracy": 0.14953271028037382,
814
  "count": 107
815
  }
816
  }
817
  },
818
  "add_C4": {
819
- "full_accuracy": 0.0,
820
  "n_examples": 100,
821
  "per_subtask": {
822
  "SA": {
823
- "accuracy": 0.355,
824
  "count": 200
825
  },
826
  "SC": {
827
- "accuracy": 0.03,
828
  "count": 100
829
  },
830
  "UC": {
831
- "accuracy": 0.10546875,
832
  "count": 256
833
  },
834
  "US": {
835
- "accuracy": 0.24305555555555555,
836
  "count": 144
837
  }
838
  }
839
  },
840
  "add_C5": {
841
- "full_accuracy": 0.0,
842
  "n_examples": 100,
843
  "per_subtask": {
844
  "SA": {
845
- "accuracy": 0.5,
846
  "count": 100
847
  },
848
  "SC": {
849
- "accuracy": 0.09,
850
  "count": 100
851
  },
852
  "UC": {
853
- "accuracy": 0.13725490196078433,
854
  "count": 306
855
  },
856
  "US": {
857
- "accuracy": 0.3556701030927835,
858
  "count": 194
859
  }
860
  }
861
  },
862
  "add_C6": {
863
- "full_accuracy": 0.0,
864
  "n_examples": 100,
865
  "per_subtask": {
866
  "SC": {
867
- "accuracy": 0.12,
868
  "count": 100
869
  },
870
  "UC": {
871
- "accuracy": 0.2540983606557377,
872
  "count": 366
873
  },
874
  "US": {
875
- "accuracy": 0.8632478632478633,
876
  "count": 234
877
  }
878
  }
879
  },
880
  "sub_M0": {
881
- "full_accuracy": 0.0,
882
  "n_examples": 100,
883
  "per_subtask": {
884
  "MD": {
885
- "accuracy": 0.20465890183028287,
886
  "count": 601
887
  },
888
  "ME": {
889
- "accuracy": 1.0,
890
  "count": 99
891
  }
892
  }
893
  },
894
  "sub_M1": {
895
- "full_accuracy": 0.0,
896
  "n_examples": 100,
897
  "per_subtask": {
898
  "MD": {
899
- "accuracy": 0.3835125448028674,
900
  "count": 279
901
  },
902
  "MB": {
903
- "accuracy": 0.0,
904
  "count": 145
905
  },
906
  "ME": {
907
- "accuracy": 1.0,
908
  "count": 24
909
  },
910
  "UB": {
911
- "accuracy": 0.09523809523809523,
912
  "count": 252
913
  }
914
  }
915
  },
916
  "sub_M2": {
917
- "full_accuracy": 0.0,
918
  "n_examples": 100,
919
  "per_subtask": {
920
  "MD": {
921
- "accuracy": 0.6150234741784038,
922
  "count": 213
923
  },
924
  "MB": {
925
- "accuracy": 0.0,
926
  "count": 113
927
  },
928
  "ME": {
929
- "accuracy": 1.0,
930
  "count": 85
931
  },
932
  "UB": {
933
- "accuracy": 0.16574585635359115,
934
  "count": 181
935
  },
936
  "UD": {
937
- "accuracy": 0.0,
938
  "count": 108
939
  }
940
  }
941
  },
942
  "sub_M3": {
943
- "full_accuracy": 0.0,
944
  "n_examples": 100,
945
  "per_subtask": {
946
  "MD": {
947
- "accuracy": 0.7597765363128491,
948
  "count": 179
949
  },
950
  "MB": {
951
- "accuracy": 0.0,
952
  "count": 103
953
  },
954
  "ME": {
@@ -956,33 +1536,33 @@
956
  "count": 56
957
  },
958
  "UB": {
959
- "accuracy": 0.12080536912751678,
960
  "count": 149
961
  },
962
  "UD": {
963
- "accuracy": 0.0,
964
  "count": 213
965
  }
966
  }
967
  },
968
  "sub_M4": {
969
- "full_accuracy": 0.0,
970
  "n_examples": 100,
971
  "per_subtask": {
972
  "MD": {
973
- "accuracy": 0.5,
974
  "count": 200
975
  },
976
  "MB": {
977
- "accuracy": 0.0,
978
  "count": 100
979
  },
980
  "UB": {
981
- "accuracy": 0.3,
982
  "count": 100
983
  },
984
  "UD": {
985
- "accuracy": 0.0,
986
  "count": 300
987
  }
988
  }
@@ -996,29 +1576,29 @@
996
  "count": 100
997
  },
998
  "MB": {
999
- "accuracy": 0.0,
1000
  "count": 100
1001
  },
1002
  "UB": {
1003
- "accuracy": 0.31,
1004
  "count": 100
1005
  },
1006
  "UD": {
1007
- "accuracy": 0.0,
1008
  "count": 400
1009
  }
1010
  }
1011
  },
1012
  "sub_random": {
1013
- "full_accuracy": 0.0,
1014
  "n_examples": 200,
1015
  "per_subtask": {
1016
  "MD": {
1017
- "accuracy": 0.3616666666666667,
1018
  "count": 600
1019
  },
1020
  "MB": {
1021
- "accuracy": 0.0,
1022
  "count": 267
1023
  },
1024
  "ME": {
@@ -1026,61 +1606,61 @@
1026
  "count": 53
1027
  },
1028
  "UB": {
1029
- "accuracy": 0.12072892938496584,
1030
  "count": 439
1031
  },
1032
  "UD": {
1033
- "accuracy": 0.0,
1034
  "count": 41
1035
  }
1036
  }
1037
  },
1038
  "sub_B3": {
1039
- "full_accuracy": 0.0,
1040
  "n_examples": 100,
1041
  "per_subtask": {
1042
  "MD": {
1043
- "accuracy": 0.3333333333333333,
1044
  "count": 300
1045
  },
1046
  "MB": {
1047
- "accuracy": 0.0,
1048
  "count": 100
1049
  },
1050
  "UB": {
1051
- "accuracy": 0.17766497461928935,
1052
  "count": 197
1053
  },
1054
  "UD": {
1055
- "accuracy": 0.0,
1056
  "count": 103
1057
  }
1058
  }
1059
  },
1060
  "sub_B4": {
1061
- "full_accuracy": 0.0,
1062
  "n_examples": 100,
1063
  "per_subtask": {
1064
  "MD": {
1065
- "accuracy": 0.5,
1066
  "count": 200
1067
  },
1068
  "MB": {
1069
- "accuracy": 0.0,
1070
  "count": 100
1071
  },
1072
  "UB": {
1073
- "accuracy": 0.145748987854251,
1074
  "count": 247
1075
  },
1076
  "UD": {
1077
- "accuracy": 0.0,
1078
  "count": 153
1079
  }
1080
  }
1081
  },
1082
  "sub_B5": {
1083
- "full_accuracy": 0.0,
1084
  "n_examples": 100,
1085
  "per_subtask": {
1086
  "MD": {
@@ -1088,22 +1668,22 @@
1088
  "count": 100
1089
  },
1090
  "MB": {
1091
- "accuracy": 0.0,
1092
  "count": 100
1093
  },
1094
  "UB": {
1095
- "accuracy": 0.11073825503355705,
1096
  "count": 298
1097
  },
1098
  "UD": {
1099
- "accuracy": 0.0,
1100
  "count": 202
1101
  }
1102
  }
1103
  }
1104
  },
1105
  "summary": {
1106
- "overall_accuracy": 0.0008333333333333334,
1107
  "total_examples": 2400,
1108
  "n_splits": 22
1109
  }
@@ -1118,29 +1698,29 @@
1118
  },
1119
  "splits": {
1120
  "add_S0": {
1121
- "full_accuracy": 0.74,
1122
  "n_examples": 100,
1123
  "per_subtask": {
1124
  "SA": {
1125
- "accuracy": 0.9603305785123967,
1126
  "count": 605
1127
  },
1128
  "SS": {
1129
- "accuracy": 0.9578947368421052,
1130
  "count": 95
1131
  }
1132
  }
1133
  },
1134
  "add_S1": {
1135
- "full_accuracy": 0.58,
1136
  "n_examples": 100,
1137
  "per_subtask": {
1138
  "SA": {
1139
- "accuracy": 0.9362745098039216,
1140
  "count": 204
1141
  },
1142
  "SC": {
1143
- "accuracy": 0.9585798816568047,
1144
  "count": 169
1145
  },
1146
  "SS": {
@@ -1148,65 +1728,65 @@
1148
  "count": 31
1149
  },
1150
  "UC": {
1151
- "accuracy": 0.875,
1152
  "count": 296
1153
  }
1154
  }
1155
  },
1156
  "add_S2": {
1157
- "full_accuracy": 0.35,
1158
  "n_examples": 100,
1159
  "per_subtask": {
1160
  "SA": {
1161
- "accuracy": 0.9693251533742331,
1162
  "count": 163
1163
  },
1164
  "SC": {
1165
- "accuracy": 0.9076923076923077,
1166
  "count": 130
1167
  },
1168
  "SS": {
1169
- "accuracy": 0.9080459770114943,
1170
  "count": 87
1171
  },
1172
  "UC": {
1173
- "accuracy": 0.7192118226600985,
1174
  "count": 203
1175
  },
1176
  "US": {
1177
- "accuracy": 0.9316239316239316,
1178
  "count": 117
1179
  }
1180
  }
1181
  },
1182
  "add_S3": {
1183
- "full_accuracy": 0.19,
1184
  "n_examples": 100,
1185
  "per_subtask": {
1186
  "SA": {
1187
- "accuracy": 0.9834710743801653,
1188
  "count": 121
1189
  },
1190
  "SC": {
1191
- "accuracy": 0.9504132231404959,
1192
  "count": 121
1193
  },
1194
  "SS": {
1195
- "accuracy": 0.9183673469387755,
1196
  "count": 49
1197
  },
1198
  "UC": {
1199
- "accuracy": 0.6612903225806451,
1200
  "count": 186
1201
  },
1202
  "US": {
1203
- "accuracy": 0.6860986547085202,
1204
  "count": 223
1205
  }
1206
  }
1207
  },
1208
  "add_S4": {
1209
- "full_accuracy": 0.2,
1210
  "n_examples": 100,
1211
  "per_subtask": {
1212
  "SA": {
@@ -1214,7 +1794,7 @@
1214
  "count": 104
1215
  },
1216
  "SC": {
1217
- "accuracy": 0.9433962264150944,
1218
  "count": 106
1219
  },
1220
  "SS": {
@@ -1222,17 +1802,17 @@
1222
  "count": 23
1223
  },
1224
  "UC": {
1225
- "accuracy": 0.64375,
1226
  "count": 160
1227
  },
1228
  "US": {
1229
- "accuracy": 0.5309446254071661,
1230
  "count": 307
1231
  }
1232
  }
1233
  },
1234
  "add_S5": {
1235
- "full_accuracy": 0.05,
1236
  "n_examples": 100,
1237
  "per_subtask": {
1238
  "SA": {
@@ -1240,21 +1820,21 @@
1240
  "count": 100
1241
  },
1242
  "SC": {
1243
- "accuracy": 0.98,
1244
  "count": 100
1245
  },
1246
  "UC": {
1247
- "accuracy": 0.36,
1248
  "count": 100
1249
  },
1250
  "US": {
1251
- "accuracy": 0.3325,
1252
  "count": 400
1253
  }
1254
  }
1255
  },
1256
  "add_S6": {
1257
- "full_accuracy": 0.03,
1258
  "n_examples": 100,
1259
  "per_subtask": {
1260
  "SC": {
@@ -1262,25 +1842,25 @@
1262
  "count": 100
1263
  },
1264
  "UC": {
1265
- "accuracy": 0.28,
1266
  "count": 100
1267
  },
1268
  "US": {
1269
- "accuracy": 0.298,
1270
  "count": 500
1271
  }
1272
  }
1273
  },
1274
  "add_random": {
1275
- "full_accuracy": 0.575,
1276
  "n_examples": 200,
1277
  "per_subtask": {
1278
  "SA": {
1279
- "accuracy": 0.9552572706935123,
1280
  "count": 447
1281
  },
1282
  "SC": {
1283
- "accuracy": 0.9625,
1284
  "count": 320
1285
  },
1286
  "SS": {
@@ -1288,21 +1868,21 @@
1288
  "count": 56
1289
  },
1290
  "UC": {
1291
- "accuracy": 0.8657844990548205,
1292
  "count": 529
1293
  },
1294
  "US": {
1295
- "accuracy": 0.7916666666666666,
1296
  "count": 48
1297
  }
1298
  }
1299
  },
1300
  "add_C3": {
1301
- "full_accuracy": 0.41,
1302
  "n_examples": 100,
1303
  "per_subtask": {
1304
  "SA": {
1305
- "accuracy": 0.9833333333333333,
1306
  "count": 300
1307
  },
1308
  "SC": {
@@ -1310,17 +1890,17 @@
1310
  "count": 100
1311
  },
1312
  "UC": {
1313
- "accuracy": 0.7150259067357513,
1314
  "count": 193
1315
  },
1316
  "US": {
1317
- "accuracy": 0.7102803738317757,
1318
  "count": 107
1319
  }
1320
  }
1321
  },
1322
  "add_C4": {
1323
- "full_accuracy": 0.32,
1324
  "n_examples": 100,
1325
  "per_subtask": {
1326
  "SA": {
@@ -1332,17 +1912,17 @@
1332
  "count": 100
1333
  },
1334
  "UC": {
1335
- "accuracy": 0.71875,
1336
  "count": 256
1337
  },
1338
  "US": {
1339
- "accuracy": 0.7291666666666666,
1340
  "count": 144
1341
  }
1342
  }
1343
  },
1344
  "add_C5": {
1345
- "full_accuracy": 0.23,
1346
  "n_examples": 100,
1347
  "per_subtask": {
1348
  "SA": {
@@ -1350,21 +1930,21 @@
1350
  "count": 100
1351
  },
1352
  "SC": {
1353
- "accuracy": 0.97,
1354
  "count": 100
1355
  },
1356
  "UC": {
1357
- "accuracy": 0.696078431372549,
1358
  "count": 306
1359
  },
1360
  "US": {
1361
- "accuracy": 0.6752577319587629,
1362
  "count": 194
1363
  }
1364
  }
1365
  },
1366
  "add_C6": {
1367
- "full_accuracy": 0.23,
1368
  "n_examples": 100,
1369
  "per_subtask": {
1370
  "SC": {
@@ -1372,57 +1952,57 @@
1372
  "count": 100
1373
  },
1374
  "UC": {
1375
- "accuracy": 0.7486338797814208,
1376
  "count": 366
1377
  },
1378
  "US": {
1379
- "accuracy": 0.7136752136752137,
1380
  "count": 234
1381
  }
1382
  }
1383
  },
1384
  "sub_M0": {
1385
- "full_accuracy": 0.61,
1386
  "n_examples": 100,
1387
  "per_subtask": {
1388
  "MD": {
1389
- "accuracy": 0.9234608985024958,
1390
  "count": 601
1391
  },
1392
  "ME": {
1393
- "accuracy": 0.9797979797979798,
1394
  "count": 99
1395
  }
1396
  }
1397
  },
1398
  "sub_M1": {
1399
- "full_accuracy": 0.45,
1400
  "n_examples": 100,
1401
  "per_subtask": {
1402
  "MD": {
1403
- "accuracy": 0.9605734767025089,
1404
  "count": 279
1405
  },
1406
  "MB": {
1407
- "accuracy": 0.9241379310344827,
1408
  "count": 145
1409
  },
1410
  "ME": {
1411
- "accuracy": 0.875,
1412
  "count": 24
1413
  },
1414
  "UB": {
1415
- "accuracy": 0.8015873015873016,
1416
  "count": 252
1417
  }
1418
  }
1419
  },
1420
  "sub_M2": {
1421
- "full_accuracy": 0.21,
1422
  "n_examples": 100,
1423
  "per_subtask": {
1424
  "MD": {
1425
- "accuracy": 0.9530516431924883,
1426
  "count": 213
1427
  },
1428
  "MB": {
@@ -1434,25 +2014,25 @@
1434
  "count": 85
1435
  },
1436
  "UB": {
1437
- "accuracy": 0.5524861878453039,
1438
  "count": 181
1439
  },
1440
  "UD": {
1441
- "accuracy": 0.7685185185185185,
1442
  "count": 108
1443
  }
1444
  }
1445
  },
1446
  "sub_M3": {
1447
- "full_accuracy": 0.07,
1448
  "n_examples": 100,
1449
  "per_subtask": {
1450
  "MD": {
1451
- "accuracy": 0.9888268156424581,
1452
  "count": 179
1453
  },
1454
  "MB": {
1455
- "accuracy": 0.941747572815534,
1456
  "count": 103
1457
  },
1458
  "ME": {
@@ -1460,39 +2040,39 @@
1460
  "count": 56
1461
  },
1462
  "UB": {
1463
- "accuracy": 0.4563758389261745,
1464
  "count": 149
1465
  },
1466
  "UD": {
1467
- "accuracy": 0.4788732394366197,
1468
  "count": 213
1469
  }
1470
  }
1471
  },
1472
  "sub_M4": {
1473
- "full_accuracy": 0.03,
1474
  "n_examples": 100,
1475
  "per_subtask": {
1476
  "MD": {
1477
- "accuracy": 0.935,
1478
  "count": 200
1479
  },
1480
  "MB": {
1481
- "accuracy": 0.96,
1482
  "count": 100
1483
  },
1484
  "UB": {
1485
- "accuracy": 0.34,
1486
  "count": 100
1487
  },
1488
  "UD": {
1489
- "accuracy": 0.23333333333333334,
1490
  "count": 300
1491
  }
1492
  }
1493
  },
1494
  "sub_M5": {
1495
- "full_accuracy": 0.06,
1496
  "n_examples": 100,
1497
  "per_subtask": {
1498
  "MD": {
@@ -1504,87 +2084,87 @@
1504
  "count": 100
1505
  },
1506
  "UB": {
1507
- "accuracy": 0.38,
1508
  "count": 100
1509
  },
1510
  "UD": {
1511
- "accuracy": 0.235,
1512
  "count": 400
1513
  }
1514
  }
1515
  },
1516
  "sub_random": {
1517
- "full_accuracy": 0.32,
1518
  "n_examples": 200,
1519
  "per_subtask": {
1520
  "MD": {
1521
- "accuracy": 0.935,
1522
  "count": 600
1523
  },
1524
  "MB": {
1525
- "accuracy": 0.898876404494382,
1526
  "count": 267
1527
  },
1528
  "ME": {
1529
- "accuracy": 0.9433962264150944,
1530
  "count": 53
1531
  },
1532
  "UB": {
1533
- "accuracy": 0.7562642369020501,
1534
  "count": 439
1535
  },
1536
  "UD": {
1537
- "accuracy": 0.7804878048780488,
1538
  "count": 41
1539
  }
1540
  }
1541
  },
1542
  "sub_B3": {
1543
- "full_accuracy": 0.13,
1544
  "n_examples": 100,
1545
  "per_subtask": {
1546
  "MD": {
1547
- "accuracy": 0.8833333333333333,
1548
  "count": 300
1549
  },
1550
  "MB": {
1551
- "accuracy": 0.95,
1552
  "count": 100
1553
  },
1554
  "UB": {
1555
- "accuracy": 0.5482233502538071,
1556
  "count": 197
1557
  },
1558
  "UD": {
1559
- "accuracy": 0.5533980582524272,
1560
  "count": 103
1561
  }
1562
  }
1563
  },
1564
  "sub_B4": {
1565
- "full_accuracy": 0.09,
1566
  "n_examples": 100,
1567
  "per_subtask": {
1568
  "MD": {
1569
- "accuracy": 0.95,
1570
  "count": 200
1571
  },
1572
  "MB": {
1573
- "accuracy": 0.97,
1574
  "count": 100
1575
  },
1576
  "UB": {
1577
- "accuracy": 0.5546558704453441,
1578
  "count": 247
1579
  },
1580
  "UD": {
1581
- "accuracy": 0.47058823529411764,
1582
  "count": 153
1583
  }
1584
  }
1585
  },
1586
  "sub_B5": {
1587
- "full_accuracy": 0.04,
1588
  "n_examples": 100,
1589
  "per_subtask": {
1590
  "MD": {
@@ -1596,22 +2176,22 @@
1596
  "count": 100
1597
  },
1598
  "UB": {
1599
- "accuracy": 0.540268456375839,
1600
  "count": 298
1601
  },
1602
  "UD": {
1603
- "accuracy": 0.4603960396039604,
1604
  "count": 202
1605
  }
1606
  }
1607
  }
1608
  },
1609
  "summary": {
1610
- "overall_accuracy": 0.28291666666666665,
1611
  "total_examples": 2400,
1612
  "n_splits": 22
1613
  }
1614
  },
1615
- "sorl_overall_accuracy": 0.28291666666666665,
1616
- "sft_overall_accuracy": 0.0008333333333333334
1617
  }
 
70
  3719,
71
  3769,
72
  3819,
73
+ 3869,
74
+ 3960,
75
+ 4010,
76
+ 4060,
77
+ 4110,
78
+ 4160,
79
+ 4210,
80
+ 4260,
81
+ 4351,
82
+ 4401,
83
+ 4451,
84
+ 4501,
85
+ 4551,
86
+ 4601,
87
+ 4651,
88
+ 4742,
89
+ 4792,
90
+ 4842,
91
+ 4892,
92
+ 4942,
93
+ 4992,
94
+ 5042,
95
+ 5133,
96
+ 5183,
97
+ 5233,
98
+ 5283,
99
+ 5333,
100
+ 5383,
101
+ 5433,
102
+ 5524,
103
+ 5574,
104
+ 5624,
105
+ 5674,
106
+ 5724,
107
+ 5774,
108
+ 5824,
109
+ 5915,
110
+ 5965,
111
+ 6015,
112
+ 6065,
113
+ 6115,
114
+ 6165,
115
+ 6215,
116
+ 6306,
117
+ 6356,
118
+ 6406,
119
+ 6456,
120
+ 6506,
121
+ 6556,
122
+ 6606,
123
+ 6697,
124
+ 6747,
125
+ 6797,
126
+ 6847,
127
+ 6897,
128
+ 6947,
129
+ 6997,
130
+ 7088,
131
+ 7138,
132
+ 7188,
133
+ 7238,
134
+ 7288,
135
+ 7338,
136
+ 7388,
137
+ 7479,
138
+ 7529,
139
+ 7579,
140
+ 7629,
141
+ 7679,
142
+ 7729,
143
+ 7779
144
  ],
145
  "loss": [
146
  -2.59329891204834,
147
  7.806329250335693,
148
+ 4.664865016937256,
149
+ 3.5807652473449707,
150
+ 3.15908145904541,
151
+ 2.915864944458008,
152
+ 3.148441791534424,
153
+ 0.36000847816467285,
154
+ -1.757477879524231,
155
+ -3.409620523452759,
156
+ -5.0319366455078125,
157
+ -7.095430374145508,
158
+ -8.759235382080078,
159
+ -8.767191886901855,
160
+ -8.945306777954102,
161
+ -5.980461597442627,
162
+ -5.657682418823242,
163
+ -5.555356502532959,
164
+ -6.722297668457031,
165
+ -4.748482704162598,
166
+ -3.466064929962158,
167
+ -0.8357032537460327,
168
+ -0.1377798318862915,
169
+ -0.3907341957092285,
170
+ 0.29442957043647766,
171
+ -0.10398596525192261,
172
+ -0.4516690969467163,
173
+ 0.37417611479759216,
174
+ -0.44442543387413025,
175
+ -0.3337244689464569,
176
+ -0.25515031814575195,
177
+ -0.3856278657913208,
178
+ -0.22291865944862366,
179
+ -0.17850244045257568,
180
+ -0.4978151321411133,
181
+ -0.4693702161312103,
182
+ -0.44592294096946716,
183
+ -0.2621464133262634,
184
+ -0.382561594247818,
185
+ -0.5315982699394226,
186
+ -0.3435952961444855,
187
+ -0.329256534576416,
188
+ -0.7089931964874268,
189
+ -0.6072415113449097,
190
+ -0.5729288458824158,
191
+ -0.4047969579696655,
192
+ -0.4896378219127655,
193
+ -0.2720121741294861,
194
+ -0.518795907497406,
195
+ -0.3424687385559082,
196
+ -0.6721977591514587,
197
+ -0.3405545949935913,
198
+ -0.34829750657081604,
199
+ -0.25543728470802307,
200
+ -0.5398901700973511,
201
+ -0.6598642468452454,
202
+ -0.459503173828125,
203
+ -0.5198010802268982,
204
+ -0.38155192136764526,
205
+ -0.5738978385925293,
206
+ -0.6543811559677124,
207
+ -0.6683095693588257,
208
+ -0.37674030661582947,
209
+ -0.2265310287475586,
210
+ -0.8545238971710205,
211
+ -0.2876709997653961,
212
+ -0.9567681550979614,
213
+ -0.35820356011390686,
214
+ -0.499785840511322,
215
+ -0.33761483430862427,
216
+ -0.40017256140708923,
217
+ -0.417964905500412,
218
+ -0.2706613540649414,
219
+ -0.4005718231201172,
220
+ -0.5140082240104675,
221
+ -0.22472795844078064,
222
+ -0.6303619742393494,
223
+ -0.380489319562912,
224
+ -0.43577492237091064,
225
+ 0.025171242654323578,
226
+ -0.4277156889438629,
227
+ -0.5149685144424438,
228
+ -0.3809179961681366,
229
+ -0.3976118862628937,
230
+ -0.5983619689941406,
231
+ -0.6481726765632629,
232
+ -0.6451705098152161,
233
+ -0.2953732907772064,
234
+ -0.3128582835197449,
235
+ -0.5169304013252258,
236
+ -0.44698846340179443,
237
+ -0.6919875144958496,
238
+ -0.13136500120162964,
239
+ -0.7428823709487915,
240
+ -0.26386335492134094,
241
+ -0.5457929968833923,
242
+ -0.48326900601387024,
243
+ -0.354233980178833,
244
+ -0.3980136811733246,
245
+ -0.4296197295188904,
246
+ -0.43872061371803284,
247
+ -0.5549288988113403,
248
+ -0.5576112270355225,
249
+ -0.4448431432247162,
250
+ -0.6850564479827881,
251
+ -0.08526279032230377,
252
+ -0.49758049845695496,
253
+ -0.528289794921875,
254
+ -0.4233647882938385,
255
+ -0.27047842741012573,
256
+ -0.5958406925201416,
257
+ -0.24503447115421295,
258
+ -0.5884737968444824,
259
+ -0.2966604232788086,
260
+ -0.11186803877353668,
261
+ -0.49717044830322266,
262
+ -0.5715808868408203,
263
+ -0.310215026140213,
264
+ -0.2626461088657379,
265
+ -0.3557012379169464,
266
+ -0.4658515751361847,
267
+ -0.07596707344055176,
268
+ -0.22857414186000824,
269
+ -0.4917992651462555,
270
+ 0.018878456205129623,
271
+ -0.4699482321739197,
272
+ -0.19665612280368805,
273
+ -0.45886844396591187,
274
+ -0.5627478957176208,
275
+ -0.020080439746379852,
276
+ -0.28417420387268066,
277
+ -0.10383808612823486,
278
+ -0.4331938624382019,
279
+ -0.027989462018013,
280
+ -0.572080135345459,
281
+ -0.3479749858379364,
282
+ -0.3199583888053894,
283
+ -0.14209412038326263,
284
+ -0.3254658579826355,
285
+ -0.3599850535392761
286
  ],
287
  "base_loss": [
288
  10.196748733520508,
289
  6.539699077606201,
290
+ 3.8725855350494385,
291
+ 2.0681710243225098,
292
+ 1.9675366878509521,
293
+ 1.9196069240570068,
294
+ 1.8533780574798584,
295
+ 1.951110601425171,
296
+ 2.0042569637298584,
297
+ 1.8366247415542603,
298
+ 1.7919471263885498,
299
+ 1.7820709943771362,
300
+ 1.7885769605636597,
301
+ 1.7231441736221313,
302
+ 1.7234703302383423,
303
+ 1.3017929792404175,
304
+ 1.300842523574829,
305
+ 1.2232086658477783,
306
+ 1.3535493612289429,
307
+ 1.1036676168441772,
308
+ 0.9509031176567078,
309
+ 0.5975162386894226,
310
+ 0.47567373514175415,
311
+ 0.5592564344406128,
312
+ 0.518327534198761,
313
+ 0.4585016071796417,
314
+ 0.4952283203601837,
315
+ 0.3824222981929779,
316
+ 0.42844533920288086,
317
+ 0.45068588852882385,
318
+ 0.2935049831867218,
319
+ 0.39543598890304565,
320
+ 0.3400234878063202,
321
+ 0.3020298182964325,
322
+ 0.41195863485336304,
323
+ 0.3548106253147125,
324
+ 0.33770138025283813,
325
+ 0.30280718207359314,
326
+ 0.325740247964859,
327
+ 0.3177395462989807,
328
+ 0.28193262219429016,
329
+ 0.2978305518627167,
330
+ 0.3271159529685974,
331
+ 0.33548837900161743,
332
+ 0.30633482336997986,
333
+ 0.27675923705101013,
334
+ 0.21956755220890045,
335
+ 0.29375362396240234,
336
+ 0.2891789972782135,
337
+ 0.32527607679367065,
338
+ 0.30730611085891724,
339
+ 0.2632617652416229,
340
+ 0.25507208704948425,
341
+ 0.2266814410686493,
342
+ 0.263988196849823,
343
+ 0.2839556336402893,
344
+ 0.2557508945465088,
345
+ 0.24259057641029358,
346
+ 0.2714501917362213,
347
+ 0.21652953326702118,
348
+ 0.27574509382247925,
349
+ 0.2669479250907898,
350
+ 0.2079036980867386,
351
+ 0.19827929139137268,
352
+ 0.2644518315792084,
353
+ 0.24134400486946106,
354
+ 0.3041193187236786,
355
+ 0.2472688853740692,
356
+ 0.24810060858726501,
357
+ 0.2032974809408188,
358
+ 0.2518690824508667,
359
+ 0.1975613385438919,
360
+ 0.19218994677066803,
361
+ 0.22987064719200134,
362
+ 0.21874548494815826,
363
+ 0.20414327085018158,
364
+ 0.1903039664030075,
365
+ 0.20824790000915527,
366
+ 0.25246912240982056,
367
+ 0.17222297191619873,
368
+ 0.22310911118984222,
369
+ 0.19762837886810303,
370
+ 0.2397158443927765,
371
+ 0.2757701277732849,
372
+ 0.2096136063337326,
373
+ 0.19665583968162537,
374
+ 0.21887147426605225,
375
+ 0.25790169835090637,
376
+ 0.20052920281887054,
377
+ 0.1768275797367096,
378
+ 0.20910362899303436,
379
+ 0.23822860419750214,
380
+ 0.19321534037590027,
381
+ 0.2246156632900238,
382
+ 0.1836516112089157,
383
+ 0.17393173277378082,
384
+ 0.1797213852405548,
385
+ 0.1975935846567154,
386
+ 0.17399263381958008,
387
+ 0.16129735112190247,
388
+ 0.22429142892360687,
389
+ 0.22638773918151855,
390
+ 0.14325550198554993,
391
+ 0.16796715557575226,
392
+ 0.22135309875011444,
393
+ 0.16441737115383148,
394
+ 0.22248207032680511,
395
+ 0.14299893379211426,
396
+ 0.1808304786682129,
397
+ 0.14688751101493835,
398
+ 0.17729976773262024,
399
+ 0.15668703615665436,
400
+ 0.19968543946743011,
401
+ 0.1618441343307495,
402
+ 0.13713142275810242,
403
+ 0.15012170374393463,
404
+ 0.18270301818847656,
405
+ 0.13032755255699158,
406
+ 0.13019099831581116,
407
+ 0.15042230486869812,
408
+ 0.18148109316825867,
409
+ 0.1746334582567215,
410
+ 0.1649618148803711,
411
+ 0.15130789577960968,
412
+ 0.1465824693441391,
413
+ 0.14087851345539093,
414
+ 0.14302660524845123,
415
+ 0.15380451083183289,
416
+ 0.15758132934570312,
417
+ 0.12044531852006912,
418
+ 0.15550915896892548,
419
+ 0.15038736164569855,
420
+ 0.1784907430410385,
421
+ 0.0905509814620018,
422
+ 0.14896859228610992,
423
+ 0.14195513725280762,
424
+ 0.15365047752857208,
425
+ 0.13856948912143707,
426
+ 0.11941786110401154,
427
+ 0.11631196737289429
428
  ],
429
  "info_loss": [
430
  -2.1446304321289062,
431
  -0.32575225830078125,
432
+ -0.13529229164123535,
433
+ -0.0395050048828125,
434
+ -0.06857085227966309,
435
+ -0.0878446102142334,
436
+ -0.05781435966491699,
437
+ -0.3329533338546753,
438
+ -0.5450968742370605,
439
+ -0.6930801868438721,
440
+ -0.8512004017829895,
441
+ -1.0552778244018555,
442
+ -1.2221078872680664,
443
+ -1.2155961990356445,
444
+ -1.2316124439239502,
445
+ -0.8885735273361206,
446
+ -0.8468337059020996,
447
+ -0.8221979141235352,
448
+ -0.9432716965675354,
449
+ -0.7168070077896118,
450
+ -0.5614590644836426,
451
+ -0.24384543299674988,
452
+ -0.15267881751060486,
453
+ -0.17980945110321045,
454
+ -0.10525521636009216,
455
+ -0.13857501745224,
456
+ -0.17203545570373535,
457
+ -0.07036077976226807,
458
+ -0.1279563307762146,
459
+ -0.10729706287384033,
460
+ -0.07537534832954407,
461
+ -0.09179061651229858,
462
+ -0.06815844774246216,
463
+ -0.0584297776222229,
464
+ -0.10185521841049194,
465
+ -0.09001076221466064,
466
+ -0.08385393023490906,
467
+ -0.06273548305034637,
468
+ -0.07663877308368683,
469
+ -0.09079703688621521,
470
+ -0.06756959855556488,
471
+ -0.06724132597446442,
472
+ -0.10811074078083038,
473
+ -0.09930633008480072,
474
+ -0.09256221354007721,
475
+ -0.07206308841705322,
476
+ -0.07471893727779388,
477
+ -0.06005033850669861,
478
+ -0.08448712527751923,
479
+ -0.0711066722869873,
480
+ -0.10168524086475372,
481
+ -0.06479458510875702,
482
+ -0.06599287688732147,
483
+ -0.052838653326034546,
484
+ -0.08438156545162201,
485
+ -0.09715546667575836,
486
+ -0.07497446238994598,
487
+ -0.08055919408798218,
488
+ -0.06902116537094116,
489
+ -0.08227460086345673,
490
+ -0.09579434990882874,
491
+ -0.09616953134536743,
492
+ -0.06199820339679718,
493
+ -0.04617848992347717,
494
+ -0.11456663906574249,
495
+ -0.05601660907268524,
496
+ -0.12871989607810974,
497
+ -0.06328007578849792,
498
+ -0.07753662765026093,
499
+ -0.05645681917667389,
500
+ -0.0682779997587204,
501
+ -0.06401722133159637,
502
+ -0.048893705010414124,
503
+ -0.06537330150604248,
504
+ -0.07560169696807861,
505
+ -0.045035943388938904,
506
+ -0.08440657705068588,
507
+ -0.06127925217151642,
508
+ -0.07112739980220795,
509
+ -0.016663089394569397,
510
+ -0.06749387085437775,
511
+ -0.07337355613708496,
512
+ -0.06437526643276215,
513
+ -0.0701342225074768,
514
+ -0.0825263261795044,
515
+ -0.08599703758955002,
516
+ -0.08863493800163269,
517
+ -0.05710361897945404,
518
+ -0.05359072983264923,
519
+ -0.07135023176670074,
520
+ -0.06739573180675507,
521
+ -0.09494532644748688,
522
+ -0.03425717353820801,
523
+ -0.09863488376140594,
524
+ -0.04627199470996857,
525
+ -0.07348194718360901,
526
+ -0.06861130893230438,
527
+ -0.056984856724739075,
528
+ -0.05861884355545044,
529
+ -0.06040181219577789,
530
+ -0.06799885630607605,
531
+ -0.0795242041349411,
532
+ -0.07134532183408737,
533
+ -0.06287021934986115,
534
+ -0.09188182651996613,
535
+ -0.026334315538406372,
536
+ -0.07318030297756195,
537
+ -0.06862989813089371,
538
+ -0.06157950311899185,
539
+ -0.04337166249752045,
540
+ -0.07880505174398422,
541
+ -0.041538387537002563,
542
+ -0.07995711266994476,
543
+ -0.04752259701490402,
544
+ -0.02607489377260208,
545
+ -0.06598459929227829,
546
+ -0.07677405327558517,
547
+ -0.04549660533666611,
548
+ -0.040807873010635376,
549
+ -0.05197954922914505,
550
+ -0.06574590504169464,
551
+ -0.02596910297870636,
552
+ -0.04056032747030258,
553
+ -0.06558245420455933,
554
+ -0.013828963041305542,
555
+ -0.06204167753458023,
556
+ -0.03477794677019119,
557
+ -0.061917684972286224,
558
+ -0.07310272008180618,
559
+ -0.01492491364479065,
560
+ -0.04469820111989975,
561
+ -0.02633901685476303,
562
+ -0.061958588659763336,
563
+ -0.012616284191608429,
564
+ -0.07271414995193481,
565
+ -0.04950237274169922,
566
+ -0.04833126813173294,
567
+ -0.0290987491607666,
568
+ -0.0452541783452034,
569
+ -0.04826049506664276
570
  ],
571
  "abs_loss": [
572
  2.2833845615386963,
573
  2.1349236965179443,
574
+ 1.878380298614502,
575
+ 1.8563865423202515,
576
+ 1.8354262113571167,
577
+ 1.8503063917160034,
578
+ 1.8424841165542603,
579
+ 1.4707140922546387,
580
+ 1.4383527040481567,
581
+ 1.38104248046875,
582
+ 1.3808784484863281,
583
+ 1.3518728017807007,
584
+ 1.3353508710861206,
585
+ 1.2989592552185059,
586
+ 1.2422951459884644,
587
+ 1.1344904899597168,
588
+ 1.0563679933547974,
589
+ 1.0411640405654907,
590
+ 1.009015679359436,
591
+ 0.9455917477607727,
592
+ 0.853651225566864,
593
+ 0.6120807528495789,
594
+ 0.611920177936554,
595
+ 0.5180513858795166,
596
+ 0.5024454593658447,
597
+ 0.46294403076171875,
598
+ 0.4781595766544342,
599
+ 0.45887699723243713,
600
+ 0.3136500120162964,
601
+ 0.28113308548927307,
602
+ 0.31741151213645935,
603
+ 0.29155611991882324,
604
+ 0.2602435350418091,
605
+ 0.24715155363082886,
606
+ 0.22644563019275665,
607
+ 0.1930122822523117,
608
+ 0.18013298511505127,
609
+ 0.16504378616809845,
610
+ 0.1325383186340332,
611
+ 0.14479666948318481,
612
+ 0.14696170389652252,
613
+ 0.1302548050880432,
614
+ 0.1305399388074875,
615
+ 0.12119248509407043,
616
+ 0.12439487129449844,
617
+ 0.14270903170108795,
618
+ 0.11244238168001175,
619
+ 0.09046735614538193,
620
+ 0.12737874686717987,
621
+ 0.09776115417480469,
622
+ 0.11427365988492966,
623
+ 0.09447143226861954,
624
+ 0.07885978370904922,
625
+ 0.0879351869225502,
626
+ 0.08410171419382095,
627
+ 0.07964954525232315,
628
+ 0.0896737352013588,
629
+ 0.0699366107583046,
630
+ 0.10011555999517441,
631
+ 0.0949270948767662,
632
+ 0.06844495981931686,
633
+ 0.06861484050750732,
634
+ 0.06629729270935059,
635
+ 0.06975804269313812,
636
+ 0.07258649915456772,
637
+ 0.0637349858880043,
638
+ 0.10536119341850281,
639
+ 0.06596854329109192,
640
+ 0.05788467451930046,
641
+ 0.07418698817491531,
642
+ 0.07811196893453598,
643
+ 0.08665874600410461,
644
+ 0.06501919031143188,
645
+ 0.05476010963320732,
646
+ 0.0577680766582489,
647
+ 0.0607517808675766,
648
+ 0.07218018174171448,
649
+ 0.05529147386550903,
650
+ 0.07476102560758591,
651
+ 0.06581460684537888,
652
+ 0.055221568793058395,
653
+ 0.057920634746551514,
654
+ 0.0611116886138916,
655
+ 0.05538506433367729,
656
+ 0.05207012966275215,
657
+ 0.05038874223828316,
658
+ 0.050429850816726685,
659
+ 0.07674344629049301,
660
+ 0.049894899129867554,
661
+ 0.0522899366915226,
662
+ 0.03763778880238533,
663
+ 0.053936153650283813,
664
+ 0.04713640734553337,
665
+ 0.05666078254580498,
666
+ 0.043657347559928894,
667
+ 0.04682837426662445,
668
+ 0.051996469497680664,
669
+ 0.0656459853053093,
670
+ 0.05155832692980766,
671
+ 0.04402616247534752,
672
+ 0.04095534607768059,
673
+ 0.033731523901224136,
674
+ 0.04032720625400543,
675
+ 0.0352306105196476,
676
+ 0.047472912818193436,
677
+ 0.03717735409736633,
678
+ 0.04514874890446663,
679
+ 0.034755889326334,
680
+ 0.05225687846541405,
681
+ 0.06242929399013519,
682
+ 0.06236463785171509,
683
+ 0.03574991598725319,
684
+ 0.03173217549920082,
685
+ 0.04282982647418976,
686
+ 0.047402460128068924,
687
+ 0.04407930746674538,
688
+ 0.050904735922813416,
689
+ 0.03305795416235924,
690
+ 0.049665722995996475,
691
+ 0.05792101100087166,
692
+ 0.048123691231012344,
693
+ 0.0334598682820797,
694
+ 0.02673262171447277,
695
+ 0.03756292536854744,
696
+ 0.04128307104110718,
697
+ 0.03372636064887047,
698
+ 0.04200674965977669,
699
+ 0.03905116394162178,
700
+ 0.041437190026044846,
701
+ 0.04416963458061218,
702
+ 0.03729776665568352,
703
+ 0.04110940173268318,
704
+ 0.03317956626415253,
705
+ 0.03276738524436951,
706
+ 0.03161971643567085,
707
+ 0.026516050100326538,
708
+ 0.04109155386686325,
709
+ 0.03907163813710213,
710
+ 0.021675271913409233,
711
+ 0.04133140295743942
712
  ],
713
  "zipf_loss": [
714
  8.427918434143066,
715
  4.310660362243652,
716
+ 1.9573644399642944,
717
+ 1.7220056056976318,
718
+ 1.6937105655670166,
719
+ 1.6896733045578003,
720
+ 1.6889588832855225,
721
+ 1.5913598537445068,
722
+ 1.5453985929489136,
723
+ 1.546452283859253,
724
+ 1.550032138824463,
725
+ 1.54008948802948,
726
+ 1.5397311449050903,
727
+ 1.5357297658920288,
728
+ 1.5231165885925293,
729
+ 1.4900315999984741,
730
+ 1.4041755199432373,
731
+ 1.3392977714538574,
732
+ 1.2559688091278076,
733
+ 1.2213599681854248,
734
+ 1.112257480621338,
735
+ 0.9440267086029053,
736
+ 0.8521426916122437,
737
+ 0.7962987422943115,
738
+ 0.7784097194671631,
739
+ 0.776968240737915,
740
+ 0.7256412506103516,
741
+ 0.6494739055633545,
742
+ 0.37532755732536316,
743
+ 0.2604469954967499,
744
+ 0.1733570396900177,
745
+ 0.10768669098615646,
746
+ 0.09261798858642578,
747
+ 0.07905035465955734,
748
+ 0.08613384515047073,
749
+ 0.05662556737661362,
750
+ 0.03690169379115105,
751
+ 0.04589689522981644,
752
+ 0.0448320172727108,
753
+ 0.04415290430188179,
754
+ 0.03547190874814987,
755
+ 0.03230065479874611,
756
+ 0.03194422274827957,
757
+ 0.03821423277258873,
758
+ 0.03391891345381737,
759
+ 0.02480378933250904,
760
+ 0.026739798486232758,
761
+ 0.025690853595733643,
762
+ 0.0241585411131382,
763
+ 0.03354579582810402,
764
+ 0.025921151041984558,
765
+ 0.03468235209584236,
766
+ 0.04867320880293846,
767
+ 0.037474267184734344,
768
+ 0.03152717649936676,
769
+ 0.019769782200455666,
770
+ 0.025523221120238304,
771
+ 0.03620664402842522,
772
+ 0.027197975665330887,
773
+ 0.022825945168733597,
774
+ 0.02097274735569954,
775
+ 0.019576314836740494,
776
+ 0.028708282858133316,
777
+ 0.029998784884810448,
778
+ 0.01943192072212696,
779
+ 0.02477763406932354,
780
+ 0.015775376930832863,
781
+ 0.02073144167661667,
782
+ 0.021691380068659782,
783
+ 0.016237128525972366,
784
+ 0.022927194833755493,
785
+ 0.015980161726474762,
786
+ 0.01958383247256279,
787
+ 0.017814546823501587,
788
+ 0.017486432567238808,
789
+ 0.015413029119372368,
790
+ 0.016181830316781998,
791
+ 0.018526144325733185,
792
+ 0.015553897246718407,
793
+ 0.012997702695429325,
794
+ 0.01859170012176037,
795
+ 0.015346556901931763,
796
+ 0.017007671296596527,
797
+ 0.02242172136902809,
798
+ 0.012080667540431023,
799
+ 0.010103044100105762,
800
+ 0.01726445183157921,
801
+ 0.0100869070738554,
802
+ 0.01753026247024536,
803
+ 0.014515362679958344,
804
+ 0.014101480133831501,
805
+ 0.013843490742146969,
806
+ 0.01327776163816452,
807
+ 0.013184715062379837,
808
+ 0.010839240625500679,
809
+ 0.010411908850073814,
810
+ 0.017923086881637573,
811
+ 0.011456401087343693,
812
+ 0.00902628991752863,
813
+ 0.008698401972651482,
814
+ 0.012880988419055939,
815
+ 0.010552301071584225,
816
+ 0.008553767576813698,
817
+ 0.012368827126920223,
818
+ 0.0076613957062363625,
819
+ 0.009945258498191833,
820
+ 0.00722561264410615,
821
+ 0.011534623801708221,
822
+ 0.006374058313667774,
823
+ 0.010107763111591339,
824
+ 0.0086736511439085,
825
+ 0.010087384842336178,
826
+ 0.008238681592047215,
827
+ 0.012438456527888775,
828
+ 0.007009214721620083,
829
+ 0.008145928382873535,
830
+ 0.008366182446479797,
831
+ 0.011117709800601006,
832
+ 0.01027502678334713,
833
+ 0.0078798308968544,
834
+ 0.005313994362950325,
835
+ 0.005744512192904949,
836
+ 0.009394066408276558,
837
+ 0.008961104787886143,
838
+ 0.006457311101257801,
839
+ 0.006217380054295063,
840
+ 0.0038960399106144905,
841
+ 0.002598780207335949,
842
+ 0.006554239895194769,
843
+ 0.0043064141646027565,
844
+ 0.003568857442587614,
845
+ 0.005053797736763954,
846
+ 0.004583287984132767,
847
+ 0.0043456582352519035,
848
+ 0.0029307929798960686,
849
+ 0.0024419911205768585,
850
+ 0.00559467077255249,
851
+ 0.00641672033816576,
852
+ 0.005490543320775032,
853
+ 0.002174779772758484
854
  ],
855
  "denoise_loss": [],
856
  "ortho_loss": [
857
  0.7092215418815613,
858
  0.46743276715278625,
859
+ 0.3644844591617584,
860
+ 0.25443753600120544,
861
+ 0.23383183777332306,
862
+ 0.20093315839767456,
863
+ 0.20074476301670074,
864
+ 0.23113077878952026,
865
+ 0.22607871890068054,
866
+ 0.2465149462223053,
867
+ 0.2687225341796875,
868
+ 0.24249237775802612,
869
+ 0.2577590048313141,
870
+ 0.266035795211792,
871
+ 0.2552751898765564,
872
+ 0.2689358592033386,
873
+ 0.269536554813385,
874
+ 0.2752798795700073,
875
+ 0.28143933415412903,
876
+ 0.2838402986526489,
877
+ 0.27934321761131287,
878
+ 0.27792537212371826,
879
+ 0.278600811958313,
880
+ 0.2771250605583191,
881
+ 0.2715357840061188,
882
+ 0.269247442483902,
883
+ 0.27251896262168884,
884
+ 0.26525673270225525,
885
+ 0.266304612159729,
886
+ 0.2764833867549896,
887
+ 0.26652833819389343,
888
+ 0.2704552710056305,
889
+ 0.2712864279747009,
890
+ 0.2649822235107422,
891
+ 0.27131664752960205,
892
+ 0.26084455847740173,
893
+ 0.2620142698287964,
894
+ 0.258016437292099,
895
+ 0.24880310893058777,
896
+ 0.24396663904190063,
897
+ 0.23598477244377136,
898
+ 0.2386299967765808,
899
+ 0.23005273938179016,
900
+ 0.22665658593177795,
901
+ 0.22420556843280792,
902
+ 0.2224186211824417,
903
+ 0.22427774965763092,
904
+ 0.21841591596603394,
905
+ 0.22097480297088623,
906
+ 0.21527259051799774,
907
+ 0.2151358276605606,
908
+ 0.2146364003419876,
909
+ 0.20944494009017944,
910
+ 0.21144999563694,
911
+ 0.20593683421611786,
912
+ 0.20386382937431335,
913
+ 0.19745765626430511,
914
+ 0.19894321262836456,
915
+ 0.19742432236671448,
916
+ 0.19608557224273682,
917
+ 0.18835635483264923,
918
+ 0.18813860416412354,
919
+ 0.18744516372680664,
920
+ 0.18192793428897858,
921
+ 0.1776714324951172,
922
+ 0.18126605451107025,
923
+ 0.18171174824237823,
924
+ 0.18377967178821564,
925
+ 0.18195678293704987,
926
+ 0.18419532477855682,
927
+ 0.1809682846069336,
928
+ 0.17715398967266083,
929
+ 0.1780245155096054,
930
+ 0.17764145135879517,
931
+ 0.17595191299915314,
932
+ 0.17716969549655914,
933
+ 0.17705252766609192,
934
+ 0.17322954535484314,
935
+ 0.1746148020029068,
936
+ 0.17197285592556,
937
+ 0.17283350229263306,
938
+ 0.17129376530647278,
939
+ 0.1728857010602951,
940
+ 0.17178265750408173,
941
+ 0.16882957518100739,
942
+ 0.16890282928943634,
943
+ 0.17142780125141144,
944
+ 0.17491458356380463,
945
+ 0.1703752726316452,
946
+ 0.16940584778785706,
947
+ 0.1734863966703415,
948
+ 0.1734330803155899,
949
+ 0.17536111176013947,
950
+ 0.17814159393310547,
951
+ 0.18004973232746124,
952
+ 0.1793045997619629,
953
+ 0.18151059746742249,
954
+ 0.17672353982925415,
955
+ 0.17982426285743713,
956
+ 0.18007126450538635,
957
+ 0.17900310456752777,
958
+ 0.18146604299545288,
959
+ 0.1807563602924347,
960
+ 0.17732296884059906,
961
+ 0.18004903197288513,
962
+ 0.18047833442687988,
963
+ 0.18000200390815735,
964
+ 0.1829339563846588,
965
+ 0.18131765723228455,
966
+ 0.18021774291992188,
967
+ 0.17930588126182556,
968
+ 0.1804979145526886,
969
+ 0.18043546378612518,
970
+ 0.18063659965991974,
971
+ 0.18216735124588013,
972
+ 0.1802050769329071,
973
+ 0.1801634132862091,
974
+ 0.18029654026031494,
975
+ 0.18130487203598022,
976
+ 0.18213582038879395,
977
+ 0.1824827641248703,
978
+ 0.18340104818344116,
979
+ 0.18511319160461426,
980
+ 0.185243159532547,
981
+ 0.18528737127780914,
982
+ 0.18479134142398834,
983
+ 0.18610772490501404,
984
+ 0.18649695813655853,
985
+ 0.18489259481430054,
986
+ 0.18530786037445068,
987
+ 0.18532846868038177,
988
+ 0.1857357919216156,
989
+ 0.18599264323711395,
990
+ 0.18614186346530914,
991
+ 0.18604278564453125,
992
+ 0.1854981780052185,
993
+ 0.18638573586940765,
994
+ 0.18637895584106445,
995
+ 0.18691058456897736,
996
+ 0.18711933493614197
997
  ],
998
  "lr": [
999
  1.6752136752136756e-05,
1000
  3.384615384615385e-05,
1001
+ 5.094017094017095e-05,
1002
+ 6.803418803418804e-05,
1003
+ 8e-05,
1004
+ 8e-05,
1005
+ 8e-05,
1006
+ 8e-05,
1007
+ 8e-05,
1008
+ 8e-05,
1009
+ 8e-05,
1010
+ 8e-05,
1011
+ 8e-05,
1012
+ 8e-05,
1013
+ 8e-05,
1014
+ 8e-05,
1015
+ 8e-05,
1016
+ 8e-05,
1017
+ 8e-05,
1018
+ 8e-05,
1019
+ 8e-05,
1020
+ 8e-05,
1021
+ 8e-05,
1022
+ 8e-05,
1023
+ 8e-05,
1024
+ 8e-05,
1025
+ 8e-05,
1026
+ 8e-05,
1027
+ 8e-05,
1028
+ 8e-05,
1029
+ 8e-05,
1030
+ 8e-05,
1031
+ 8e-05,
1032
+ 8e-05,
1033
+ 8e-05,
1034
+ 8e-05,
1035
+ 8e-05,
1036
+ 8e-05,
1037
+ 8e-05,
1038
+ 8e-05,
1039
+ 8e-05,
1040
+ 8e-05,
1041
+ 8e-05,
1042
+ 8e-05,
1043
+ 8e-05,
1044
+ 8e-05,
1045
+ 8e-05,
1046
+ 8e-05,
1047
+ 8e-05,
1048
+ 8e-05,
1049
+ 8e-05,
1050
+ 8e-05,
1051
+ 8e-05,
1052
+ 8e-05,
1053
+ 8e-05,
1054
+ 8e-05,
1055
+ 8e-05,
1056
+ 8e-05,
1057
+ 8e-05,
1058
+ 8e-05,
1059
+ 8e-05,
1060
+ 8e-05,
1061
+ 8e-05,
1062
+ 8e-05,
1063
+ 8e-05,
1064
+ 8e-05,
1065
+ 8e-05,
1066
+ 8e-05,
1067
+ 8e-05,
1068
+ 8e-05,
1069
+ 8e-05,
1070
+ 8e-05,
1071
+ 8e-05,
1072
+ 8e-05,
1073
+ 8e-05,
1074
+ 8e-05,
1075
+ 8e-05,
1076
+ 8e-05,
1077
+ 8e-05,
1078
+ 8e-05,
1079
+ 8e-05,
1080
+ 8e-05,
1081
+ 8e-05,
1082
+ 8e-05,
1083
+ 8e-05,
1084
+ 7.987186923279727e-05,
1085
+ 7.868547324017929e-05,
1086
+ 7.74990772475613e-05,
1087
+ 7.631268125494332e-05,
1088
+ 7.512628526232533e-05,
1089
+ 7.393988926970736e-05,
1090
+ 7.178064856314262e-05,
1091
+ 7.059425257052466e-05,
1092
+ 6.940785657790668e-05,
1093
+ 6.822146058528868e-05,
1094
+ 6.703506459267071e-05,
1095
+ 6.584866860005273e-05,
1096
+ 6.466227260743475e-05,
1097
+ 6.250303190087003e-05,
1098
+ 6.131663590825203e-05,
1099
+ 6.0130239915634074e-05,
1100
+ 5.894384392301608e-05,
1101
+ 5.77574479303981e-05,
1102
+ 5.657105193778013e-05,
1103
+ 5.538465594516214e-05,
1104
+ 5.3225415238597426e-05,
1105
+ 5.203901924597944e-05,
1106
+ 5.0852623253361464e-05,
1107
+ 4.966622726074349e-05,
1108
+ 4.8479831268125495e-05,
1109
+ 4.729343527550752e-05,
1110
+ 4.610703928288953e-05,
1111
+ 4.3947798576324816e-05,
1112
+ 4.276140258370682e-05,
1113
+ 4.157500659108885e-05,
1114
+ 4.038861059847088e-05,
1115
+ 3.9202214605852886e-05,
1116
+ 3.801581861323492e-05,
1117
+ 3.6829422620616924e-05,
1118
+ 3.46701819140522e-05,
1119
+ 3.348378592143423e-05,
1120
+ 3.229738992881624e-05,
1121
+ 3.111099393619827e-05,
1122
+ 2.9924597943580276e-05,
1123
+ 2.8738201950962302e-05,
1124
+ 2.7551805958344328e-05,
1125
+ 2.5392565251779594e-05,
1126
+ 2.4206169259161623e-05,
1127
+ 2.3019773266543632e-05,
1128
+ 2.1833377273925658e-05,
1129
+ 2.0646981281307667e-05,
1130
+ 1.9460585288689693e-05,
1131
+ 1.8274189296071722e-05,
1132
+ 1.6114948589506984e-05,
1133
+ 1.4928552596889012e-05,
1134
+ 1.374215660427102e-05,
1135
+ 1.2555760611653047e-05,
1136
+ 1.1369364619035074e-05,
1137
+ 1.0182968626417082e-05,
1138
+ 8.99657263379911e-06
1139
  ],
1140
  "emb_lr": [],
1141
  "eval_step": [
 
1148
  2696,
1149
  3087,
1150
  3478,
1151
+ 3869,
1152
+ 4260,
1153
+ 4651,
1154
+ 5042,
1155
+ 5433,
1156
+ 5824,
1157
+ 6215,
1158
+ 6606,
1159
+ 6997,
1160
+ 7388,
1161
+ 7779
1162
  ],
1163
  "eval_accuracy": [
1164
  0.01,
 
1170
  0.0,
1171
  0.0,
1172
  0.0,
1173
+ 0.0,
1174
+ 0.0,
1175
+ 0.0,
1176
+ 0.0,
1177
+ 0.0,
1178
+ 0.0,
1179
+ 0.0,
1180
+ 0.0,
1181
+ 0.0,
1182
+ 0.0,
1183
  0.0
1184
  ]
1185
  },
1186
+ "final_accuracy": 0.5895833333333333,
1187
  "sft_eval": {
1188
  "config": {
1189
  "ops": "add_sub",
 
1194
  },
1195
  "splits": {
1196
  "add_S0": {
1197
+ "full_accuracy": 0.91,
1198
  "n_examples": 100,
1199
  "per_subtask": {
1200
  "SA": {
1201
+ "accuracy": 0.9851239669421488,
1202
  "count": 605
1203
  },
1204
  "SS": {
 
1208
  }
1209
  },
1210
  "add_S1": {
1211
+ "full_accuracy": 0.86,
1212
  "n_examples": 100,
1213
  "per_subtask": {
1214
  "SA": {
1215
+ "accuracy": 0.9852941176470589,
1216
  "count": 204
1217
  },
1218
  "SC": {
1219
+ "accuracy": 0.9822485207100592,
1220
  "count": 169
1221
  },
1222
  "SS": {
1223
+ "accuracy": 1.0,
1224
  "count": 31
1225
  },
1226
  "UC": {
1227
+ "accuracy": 0.9695945945945946,
1228
  "count": 296
1229
  }
1230
  }
1231
  },
1232
  "add_S2": {
1233
+ "full_accuracy": 0.77,
1234
  "n_examples": 100,
1235
  "per_subtask": {
1236
  "SA": {
1237
+ "accuracy": 1.0,
1238
  "count": 163
1239
  },
1240
  "SC": {
1241
+ "accuracy": 0.9615384615384616,
1242
  "count": 130
1243
  },
1244
  "SS": {
1245
+ "accuracy": 0.9770114942528736,
1246
  "count": 87
1247
  },
1248
  "UC": {
1249
+ "accuracy": 0.9310344827586207,
1250
  "count": 203
1251
  },
1252
  "US": {
1253
+ "accuracy": 0.9743589743589743,
1254
  "count": 117
1255
  }
1256
  }
1257
  },
1258
  "add_S3": {
1259
+ "full_accuracy": 0.53,
1260
  "n_examples": 100,
1261
  "per_subtask": {
1262
  "SA": {
1263
+ "accuracy": 0.9834710743801653,
1264
  "count": 121
1265
  },
1266
  "SC": {
1267
+ "accuracy": 0.9669421487603306,
1268
  "count": 121
1269
  },
1270
  "SS": {
1271
+ "accuracy": 1.0,
1272
  "count": 49
1273
  },
1274
  "UC": {
1275
+ "accuracy": 0.8064516129032258,
1276
  "count": 186
1277
  },
1278
  "US": {
1279
+ "accuracy": 0.9551569506726457,
1280
  "count": 223
1281
  }
1282
  }
1283
  },
1284
  "add_S4": {
1285
+ "full_accuracy": 0.4,
1286
  "n_examples": 100,
1287
  "per_subtask": {
1288
  "SA": {
1289
+ "accuracy": 1.0,
1290
  "count": 104
1291
  },
1292
  "SC": {
1293
+ "accuracy": 0.9716981132075472,
1294
  "count": 106
1295
  },
1296
  "SS": {
1297
+ "accuracy": 1.0,
1298
  "count": 23
1299
  },
1300
  "UC": {
1301
+ "accuracy": 0.8125,
1302
  "count": 160
1303
  },
1304
  "US": {
1305
+ "accuracy": 0.8045602605863192,
1306
  "count": 307
1307
  }
1308
  }
1309
  },
1310
  "add_S5": {
1311
+ "full_accuracy": 0.27,
1312
  "n_examples": 100,
1313
  "per_subtask": {
1314
  "SA": {
1315
+ "accuracy": 1.0,
1316
  "count": 100
1317
  },
1318
  "SC": {
1319
+ "accuracy": 1.0,
1320
  "count": 100
1321
  },
1322
  "UC": {
1323
+ "accuracy": 0.58,
1324
  "count": 100
1325
  },
1326
  "US": {
1327
+ "accuracy": 0.6425,
1328
  "count": 400
1329
  }
1330
  }
1331
  },
1332
  "add_S6": {
1333
+ "full_accuracy": 0.61,
1334
  "n_examples": 100,
1335
  "per_subtask": {
1336
  "SC": {
1337
+ "accuracy": 1.0,
1338
  "count": 100
1339
  },
1340
  "UC": {
1341
+ "accuracy": 0.74,
1342
  "count": 100
1343
  },
1344
  "US": {
1345
+ "accuracy": 0.766,
1346
  "count": 500
1347
  }
1348
  }
1349
  },
1350
  "add_random": {
1351
+ "full_accuracy": 0.865,
1352
  "n_examples": 200,
1353
  "per_subtask": {
1354
  "SA": {
1355
+ "accuracy": 0.9910514541387024,
1356
  "count": 447
1357
  },
1358
  "SC": {
1359
+ "accuracy": 0.975,
1360
  "count": 320
1361
  },
1362
  "SS": {
1363
+ "accuracy": 0.9821428571428571,
1364
  "count": 56
1365
  },
1366
  "UC": {
1367
+ "accuracy": 0.9754253308128544,
1368
  "count": 529
1369
  },
1370
  "US": {
1371
+ "accuracy": 0.9583333333333334,
1372
  "count": 48
1373
  }
1374
  }
1375
  },
1376
  "add_C3": {
1377
+ "full_accuracy": 0.61,
1378
  "n_examples": 100,
1379
  "per_subtask": {
1380
  "SA": {
1381
+ "accuracy": 1.0,
1382
  "count": 300
1383
  },
1384
  "SC": {
1385
+ "accuracy": 1.0,
1386
  "count": 100
1387
  },
1388
  "UC": {
1389
+ "accuracy": 0.8082901554404145,
1390
  "count": 193
1391
  },
1392
  "US": {
1393
+ "accuracy": 0.9158878504672897,
1394
  "count": 107
1395
  }
1396
  }
1397
  },
1398
  "add_C4": {
1399
+ "full_accuracy": 0.65,
1400
  "n_examples": 100,
1401
  "per_subtask": {
1402
  "SA": {
1403
+ "accuracy": 1.0,
1404
  "count": 200
1405
  },
1406
  "SC": {
1407
+ "accuracy": 1.0,
1408
  "count": 100
1409
  },
1410
  "UC": {
1411
+ "accuracy": 0.88671875,
1412
  "count": 256
1413
  },
1414
  "US": {
1415
+ "accuracy": 0.8888888888888888,
1416
  "count": 144
1417
  }
1418
  }
1419
  },
1420
  "add_C5": {
1421
+ "full_accuracy": 0.51,
1422
  "n_examples": 100,
1423
  "per_subtask": {
1424
  "SA": {
1425
+ "accuracy": 1.0,
1426
  "count": 100
1427
  },
1428
  "SC": {
1429
+ "accuracy": 1.0,
1430
  "count": 100
1431
  },
1432
  "UC": {
1433
+ "accuracy": 0.8562091503267973,
1434
  "count": 306
1435
  },
1436
  "US": {
1437
+ "accuracy": 0.9278350515463918,
1438
  "count": 194
1439
  }
1440
  }
1441
  },
1442
  "add_C6": {
1443
+ "full_accuracy": 0.68,
1444
  "n_examples": 100,
1445
  "per_subtask": {
1446
  "SC": {
1447
+ "accuracy": 1.0,
1448
  "count": 100
1449
  },
1450
  "UC": {
1451
+ "accuracy": 0.9153005464480874,
1452
  "count": 366
1453
  },
1454
  "US": {
1455
+ "accuracy": 0.9700854700854701,
1456
  "count": 234
1457
  }
1458
  }
1459
  },
1460
  "sub_M0": {
1461
+ "full_accuracy": 0.88,
1462
  "n_examples": 100,
1463
  "per_subtask": {
1464
  "MD": {
1465
+ "accuracy": 0.9866888519134775,
1466
  "count": 601
1467
  },
1468
  "ME": {
1469
+ "accuracy": 0.9595959595959596,
1470
  "count": 99
1471
  }
1472
  }
1473
  },
1474
  "sub_M1": {
1475
+ "full_accuracy": 0.78,
1476
  "n_examples": 100,
1477
  "per_subtask": {
1478
  "MD": {
1479
+ "accuracy": 0.985663082437276,
1480
  "count": 279
1481
  },
1482
  "MB": {
1483
+ "accuracy": 0.9586206896551724,
1484
  "count": 145
1485
  },
1486
  "ME": {
1487
+ "accuracy": 0.9583333333333334,
1488
  "count": 24
1489
  },
1490
  "UB": {
1491
+ "accuracy": 0.9484126984126984,
1492
  "count": 252
1493
  }
1494
  }
1495
  },
1496
  "sub_M2": {
1497
+ "full_accuracy": 0.35,
1498
  "n_examples": 100,
1499
  "per_subtask": {
1500
  "MD": {
1501
+ "accuracy": 0.9765258215962441,
1502
  "count": 213
1503
  },
1504
  "MB": {
1505
+ "accuracy": 0.9734513274336283,
1506
  "count": 113
1507
  },
1508
  "ME": {
1509
+ "accuracy": 0.9764705882352941,
1510
  "count": 85
1511
  },
1512
  "UB": {
1513
+ "accuracy": 0.6629834254143646,
1514
  "count": 181
1515
  },
1516
  "UD": {
1517
+ "accuracy": 0.9074074074074074,
1518
  "count": 108
1519
  }
1520
  }
1521
  },
1522
  "sub_M3": {
1523
+ "full_accuracy": 0.11,
1524
  "n_examples": 100,
1525
  "per_subtask": {
1526
  "MD": {
1527
+ "accuracy": 0.994413407821229,
1528
  "count": 179
1529
  },
1530
  "MB": {
1531
+ "accuracy": 0.970873786407767,
1532
  "count": 103
1533
  },
1534
  "ME": {
 
1536
  "count": 56
1537
  },
1538
  "UB": {
1539
+ "accuracy": 0.5167785234899329,
1540
  "count": 149
1541
  },
1542
  "UD": {
1543
+ "accuracy": 0.6384976525821596,
1544
  "count": 213
1545
  }
1546
  }
1547
  },
1548
  "sub_M4": {
1549
+ "full_accuracy": 0.09,
1550
  "n_examples": 100,
1551
  "per_subtask": {
1552
  "MD": {
1553
+ "accuracy": 1.0,
1554
  "count": 200
1555
  },
1556
  "MB": {
1557
+ "accuracy": 0.99,
1558
  "count": 100
1559
  },
1560
  "UB": {
1561
+ "accuracy": 0.34,
1562
  "count": 100
1563
  },
1564
  "UD": {
1565
+ "accuracy": 0.4,
1566
  "count": 300
1567
  }
1568
  }
 
1576
  "count": 100
1577
  },
1578
  "MB": {
1579
+ "accuracy": 1.0,
1580
  "count": 100
1581
  },
1582
  "UB": {
1583
+ "accuracy": 0.24,
1584
  "count": 100
1585
  },
1586
  "UD": {
1587
+ "accuracy": 0.22,
1588
  "count": 400
1589
  }
1590
  }
1591
  },
1592
  "sub_random": {
1593
+ "full_accuracy": 0.705,
1594
  "n_examples": 200,
1595
  "per_subtask": {
1596
  "MD": {
1597
+ "accuracy": 0.985,
1598
  "count": 600
1599
  },
1600
  "MB": {
1601
+ "accuracy": 0.9662921348314607,
1602
  "count": 267
1603
  },
1604
  "ME": {
 
1606
  "count": 53
1607
  },
1608
  "UB": {
1609
+ "accuracy": 0.9020501138952164,
1610
  "count": 439
1611
  },
1612
  "UD": {
1613
+ "accuracy": 0.975609756097561,
1614
  "count": 41
1615
  }
1616
  }
1617
  },
1618
  "sub_B3": {
1619
+ "full_accuracy": 0.38,
1620
  "n_examples": 100,
1621
  "per_subtask": {
1622
  "MD": {
1623
+ "accuracy": 0.99,
1624
  "count": 300
1625
  },
1626
  "MB": {
1627
+ "accuracy": 1.0,
1628
  "count": 100
1629
  },
1630
  "UB": {
1631
+ "accuracy": 0.7055837563451777,
1632
  "count": 197
1633
  },
1634
  "UD": {
1635
+ "accuracy": 0.6893203883495146,
1636
  "count": 103
1637
  }
1638
  }
1639
  },
1640
  "sub_B4": {
1641
+ "full_accuracy": 0.27,
1642
  "n_examples": 100,
1643
  "per_subtask": {
1644
  "MD": {
1645
+ "accuracy": 1.0,
1646
  "count": 200
1647
  },
1648
  "MB": {
1649
+ "accuracy": 0.99,
1650
  "count": 100
1651
  },
1652
  "UB": {
1653
+ "accuracy": 0.6882591093117408,
1654
  "count": 247
1655
  },
1656
  "UD": {
1657
+ "accuracy": 0.6405228758169934,
1658
  "count": 153
1659
  }
1660
  }
1661
  },
1662
  "sub_B5": {
1663
+ "full_accuracy": 0.2,
1664
  "n_examples": 100,
1665
  "per_subtask": {
1666
  "MD": {
 
1668
  "count": 100
1669
  },
1670
  "MB": {
1671
+ "accuracy": 1.0,
1672
  "count": 100
1673
  },
1674
  "UB": {
1675
+ "accuracy": 0.7248322147651006,
1676
  "count": 298
1677
  },
1678
  "UD": {
1679
+ "accuracy": 0.6287128712871287,
1680
  "count": 202
1681
  }
1682
  }
1683
  }
1684
  },
1685
  "summary": {
1686
+ "overall_accuracy": 0.5416666666666666,
1687
  "total_examples": 2400,
1688
  "n_splits": 22
1689
  }
 
1698
  },
1699
  "splits": {
1700
  "add_S0": {
1701
+ "full_accuracy": 0.96,
1702
  "n_examples": 100,
1703
  "per_subtask": {
1704
  "SA": {
1705
+ "accuracy": 0.9933884297520661,
1706
  "count": 605
1707
  },
1708
  "SS": {
1709
+ "accuracy": 1.0,
1710
  "count": 95
1711
  }
1712
  }
1713
  },
1714
  "add_S1": {
1715
+ "full_accuracy": 0.9,
1716
  "n_examples": 100,
1717
  "per_subtask": {
1718
  "SA": {
1719
+ "accuracy": 0.9852941176470589,
1720
  "count": 204
1721
  },
1722
  "SC": {
1723
+ "accuracy": 0.9881656804733728,
1724
  "count": 169
1725
  },
1726
  "SS": {
 
1728
  "count": 31
1729
  },
1730
  "UC": {
1731
+ "accuracy": 0.9831081081081081,
1732
  "count": 296
1733
  }
1734
  }
1735
  },
1736
  "add_S2": {
1737
+ "full_accuracy": 0.69,
1738
  "n_examples": 100,
1739
  "per_subtask": {
1740
  "SA": {
1741
+ "accuracy": 0.9877300613496932,
1742
  "count": 163
1743
  },
1744
  "SC": {
1745
+ "accuracy": 0.9384615384615385,
1746
  "count": 130
1747
  },
1748
  "SS": {
1749
+ "accuracy": 0.9310344827586207,
1750
  "count": 87
1751
  },
1752
  "UC": {
1753
+ "accuracy": 0.9064039408866995,
1754
  "count": 203
1755
  },
1756
  "US": {
1757
+ "accuracy": 0.9914529914529915,
1758
  "count": 117
1759
  }
1760
  }
1761
  },
1762
  "add_S3": {
1763
+ "full_accuracy": 0.57,
1764
  "n_examples": 100,
1765
  "per_subtask": {
1766
  "SA": {
1767
+ "accuracy": 1.0,
1768
  "count": 121
1769
  },
1770
  "SC": {
1771
+ "accuracy": 0.9752066115702479,
1772
  "count": 121
1773
  },
1774
  "SS": {
1775
+ "accuracy": 0.9795918367346939,
1776
  "count": 49
1777
  },
1778
  "UC": {
1779
+ "accuracy": 0.8279569892473119,
1780
  "count": 186
1781
  },
1782
  "US": {
1783
+ "accuracy": 0.8923766816143498,
1784
  "count": 223
1785
  }
1786
  }
1787
  },
1788
  "add_S4": {
1789
+ "full_accuracy": 0.58,
1790
  "n_examples": 100,
1791
  "per_subtask": {
1792
  "SA": {
 
1794
  "count": 104
1795
  },
1796
  "SC": {
1797
+ "accuracy": 0.9905660377358491,
1798
  "count": 106
1799
  },
1800
  "SS": {
 
1802
  "count": 23
1803
  },
1804
  "UC": {
1805
+ "accuracy": 0.825,
1806
  "count": 160
1807
  },
1808
  "US": {
1809
+ "accuracy": 0.8306188925081434,
1810
  "count": 307
1811
  }
1812
  }
1813
  },
1814
  "add_S5": {
1815
+ "full_accuracy": 0.37,
1816
  "n_examples": 100,
1817
  "per_subtask": {
1818
  "SA": {
 
1820
  "count": 100
1821
  },
1822
  "SC": {
1823
+ "accuracy": 1.0,
1824
  "count": 100
1825
  },
1826
  "UC": {
1827
+ "accuracy": 0.54,
1828
  "count": 100
1829
  },
1830
  "US": {
1831
+ "accuracy": 0.6425,
1832
  "count": 400
1833
  }
1834
  }
1835
  },
1836
  "add_S6": {
1837
+ "full_accuracy": 0.61,
1838
  "n_examples": 100,
1839
  "per_subtask": {
1840
  "SC": {
 
1842
  "count": 100
1843
  },
1844
  "UC": {
1845
+ "accuracy": 0.69,
1846
  "count": 100
1847
  },
1848
  "US": {
1849
+ "accuracy": 0.77,
1850
  "count": 500
1851
  }
1852
  }
1853
  },
1854
  "add_random": {
1855
+ "full_accuracy": 0.925,
1856
  "n_examples": 200,
1857
  "per_subtask": {
1858
  "SA": {
1859
+ "accuracy": 0.9932885906040269,
1860
  "count": 447
1861
  },
1862
  "SC": {
1863
+ "accuracy": 0.9875,
1864
  "count": 320
1865
  },
1866
  "SS": {
 
1868
  "count": 56
1869
  },
1870
  "UC": {
1871
+ "accuracy": 0.9848771266540642,
1872
  "count": 529
1873
  },
1874
  "US": {
1875
+ "accuracy": 1.0,
1876
  "count": 48
1877
  }
1878
  }
1879
  },
1880
  "add_C3": {
1881
+ "full_accuracy": 0.62,
1882
  "n_examples": 100,
1883
  "per_subtask": {
1884
  "SA": {
1885
+ "accuracy": 1.0,
1886
  "count": 300
1887
  },
1888
  "SC": {
 
1890
  "count": 100
1891
  },
1892
  "UC": {
1893
+ "accuracy": 0.8186528497409327,
1894
  "count": 193
1895
  },
1896
  "US": {
1897
+ "accuracy": 0.8785046728971962,
1898
  "count": 107
1899
  }
1900
  }
1901
  },
1902
  "add_C4": {
1903
+ "full_accuracy": 0.61,
1904
  "n_examples": 100,
1905
  "per_subtask": {
1906
  "SA": {
 
1912
  "count": 100
1913
  },
1914
  "UC": {
1915
+ "accuracy": 0.87890625,
1916
  "count": 256
1917
  },
1918
  "US": {
1919
+ "accuracy": 0.8611111111111112,
1920
  "count": 144
1921
  }
1922
  }
1923
  },
1924
  "add_C5": {
1925
+ "full_accuracy": 0.59,
1926
  "n_examples": 100,
1927
  "per_subtask": {
1928
  "SA": {
 
1930
  "count": 100
1931
  },
1932
  "SC": {
1933
+ "accuracy": 1.0,
1934
  "count": 100
1935
  },
1936
  "UC": {
1937
+ "accuracy": 0.8823529411764706,
1938
  "count": 306
1939
  },
1940
  "US": {
1941
+ "accuracy": 0.8969072164948454,
1942
  "count": 194
1943
  }
1944
  }
1945
  },
1946
  "add_C6": {
1947
+ "full_accuracy": 0.69,
1948
  "n_examples": 100,
1949
  "per_subtask": {
1950
  "SC": {
 
1952
  "count": 100
1953
  },
1954
  "UC": {
1955
+ "accuracy": 0.912568306010929,
1956
  "count": 366
1957
  },
1958
  "US": {
1959
+ "accuracy": 0.9444444444444444,
1960
  "count": 234
1961
  }
1962
  }
1963
  },
1964
  "sub_M0": {
1965
+ "full_accuracy": 0.89,
1966
  "n_examples": 100,
1967
  "per_subtask": {
1968
  "MD": {
1969
+ "accuracy": 0.9850249584026622,
1970
  "count": 601
1971
  },
1972
  "ME": {
1973
+ "accuracy": 0.9696969696969697,
1974
  "count": 99
1975
  }
1976
  }
1977
  },
1978
  "sub_M1": {
1979
+ "full_accuracy": 0.82,
1980
  "n_examples": 100,
1981
  "per_subtask": {
1982
  "MD": {
1983
+ "accuracy": 0.982078853046595,
1984
  "count": 279
1985
  },
1986
  "MB": {
1987
+ "accuracy": 0.9793103448275862,
1988
  "count": 145
1989
  },
1990
  "ME": {
1991
+ "accuracy": 0.9583333333333334,
1992
  "count": 24
1993
  },
1994
  "UB": {
1995
+ "accuracy": 0.9603174603174603,
1996
  "count": 252
1997
  }
1998
  }
1999
  },
2000
  "sub_M2": {
2001
+ "full_accuracy": 0.33,
2002
  "n_examples": 100,
2003
  "per_subtask": {
2004
  "MD": {
2005
+ "accuracy": 0.971830985915493,
2006
  "count": 213
2007
  },
2008
  "MB": {
 
2014
  "count": 85
2015
  },
2016
  "UB": {
2017
+ "accuracy": 0.6464088397790055,
2018
  "count": 181
2019
  },
2020
  "UD": {
2021
+ "accuracy": 0.9444444444444444,
2022
  "count": 108
2023
  }
2024
  }
2025
  },
2026
  "sub_M3": {
2027
+ "full_accuracy": 0.22,
2028
  "n_examples": 100,
2029
  "per_subtask": {
2030
  "MD": {
2031
+ "accuracy": 0.9832402234636871,
2032
  "count": 179
2033
  },
2034
  "MB": {
2035
+ "accuracy": 0.9805825242718447,
2036
  "count": 103
2037
  },
2038
  "ME": {
 
2040
  "count": 56
2041
  },
2042
  "UB": {
2043
+ "accuracy": 0.5704697986577181,
2044
  "count": 149
2045
  },
2046
  "UD": {
2047
+ "accuracy": 0.7136150234741784,
2048
  "count": 213
2049
  }
2050
  }
2051
  },
2052
  "sub_M4": {
2053
+ "full_accuracy": 0.2,
2054
  "n_examples": 100,
2055
  "per_subtask": {
2056
  "MD": {
2057
+ "accuracy": 0.99,
2058
  "count": 200
2059
  },
2060
  "MB": {
2061
+ "accuracy": 1.0,
2062
  "count": 100
2063
  },
2064
  "UB": {
2065
+ "accuracy": 0.49,
2066
  "count": 100
2067
  },
2068
  "UD": {
2069
+ "accuracy": 0.49,
2070
  "count": 300
2071
  }
2072
  }
2073
  },
2074
  "sub_M5": {
2075
+ "full_accuracy": 0.1,
2076
  "n_examples": 100,
2077
  "per_subtask": {
2078
  "MD": {
 
2084
  "count": 100
2085
  },
2086
  "UB": {
2087
+ "accuracy": 0.4,
2088
  "count": 100
2089
  },
2090
  "UD": {
2091
+ "accuracy": 0.33,
2092
  "count": 400
2093
  }
2094
  }
2095
  },
2096
  "sub_random": {
2097
+ "full_accuracy": 0.735,
2098
  "n_examples": 200,
2099
  "per_subtask": {
2100
  "MD": {
2101
+ "accuracy": 0.9766666666666667,
2102
  "count": 600
2103
  },
2104
  "MB": {
2105
+ "accuracy": 0.9588014981273408,
2106
  "count": 267
2107
  },
2108
  "ME": {
2109
+ "accuracy": 1.0,
2110
  "count": 53
2111
  },
2112
  "UB": {
2113
+ "accuracy": 0.9225512528473804,
2114
  "count": 439
2115
  },
2116
  "UD": {
2117
+ "accuracy": 0.975609756097561,
2118
  "count": 41
2119
  }
2120
  }
2121
  },
2122
  "sub_B3": {
2123
+ "full_accuracy": 0.44,
2124
  "n_examples": 100,
2125
  "per_subtask": {
2126
  "MD": {
2127
+ "accuracy": 0.98,
2128
  "count": 300
2129
  },
2130
  "MB": {
2131
+ "accuracy": 0.99,
2132
  "count": 100
2133
  },
2134
  "UB": {
2135
+ "accuracy": 0.7360406091370558,
2136
  "count": 197
2137
  },
2138
  "UD": {
2139
+ "accuracy": 0.7766990291262136,
2140
  "count": 103
2141
  }
2142
  }
2143
  },
2144
  "sub_B4": {
2145
+ "full_accuracy": 0.39,
2146
  "n_examples": 100,
2147
  "per_subtask": {
2148
  "MD": {
2149
+ "accuracy": 1.0,
2150
  "count": 200
2151
  },
2152
  "MB": {
2153
+ "accuracy": 0.99,
2154
  "count": 100
2155
  },
2156
  "UB": {
2157
+ "accuracy": 0.7530364372469636,
2158
  "count": 247
2159
  },
2160
  "UD": {
2161
+ "accuracy": 0.7058823529411765,
2162
  "count": 153
2163
  }
2164
  }
2165
  },
2166
  "sub_B5": {
2167
+ "full_accuracy": 0.27,
2168
  "n_examples": 100,
2169
  "per_subtask": {
2170
  "MD": {
 
2176
  "count": 100
2177
  },
2178
  "UB": {
2179
+ "accuracy": 0.7583892617449665,
2180
  "count": 298
2181
  },
2182
  "UD": {
2183
+ "accuracy": 0.7128712871287128,
2184
  "count": 202
2185
  }
2186
  }
2187
  }
2188
  },
2189
  "summary": {
2190
+ "overall_accuracy": 0.5895833333333333,
2191
  "total_examples": 2400,
2192
  "n_splits": 22
2193
  }
2194
  },
2195
+ "sorl_overall_accuracy": 0.5895833333333333,
2196
+ "sft_overall_accuracy": 0.5416666666666666
2197
  }
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83dc63f41a380d62231535ea7934fea7ce172db8bc245f6beae0be7d7b4c8b71
3
  size 634679036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76aa98787a44b36dd1e9fd52457c04af3c9ebd3f0bc0c822d63b94dee465388a
3
  size 634679036
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/train_config.json CHANGED
@@ -17,10 +17,10 @@
17
  "target_vocab_util": 0.8,
18
  "min_abs_ppl": 0.0,
19
  "zipf_alpha": 1.0,
20
- "lr": 4e-05,
21
  "emb_lr_mult": 1.0,
22
  "weight_decay": 0.01,
23
- "warmup_steps": 117,
24
  "cooldown_frac": 0.4,
25
  "max_grad_norm": 1.0,
26
  "vq_abs_pretrain_steps": 0,
@@ -30,7 +30,7 @@
30
  "vq_abs_pretrain_target_vectors": 20000,
31
  "batch_size": 64,
32
  "gradient_accumulation_steps": 1,
33
- "num_epochs": 10,
34
  "emb_warmup_steps": 0,
35
  "log_every": 50,
36
  "eval_every": 390,
@@ -69,16 +69,16 @@
69
  "no_wandb": false,
70
  "n_params": 158593426,
71
  "run_name": "add_sub_sorl_v1_abs10_K1_25K_1L3H510d",
72
- "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
- "timestamp": "2026-04-12T16:20:24.628526+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "v1",
79
- "wandb_run_id": "ybv7051n",
80
- "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/ybv7051n",
81
- "final_accuracy": 0.28291666666666665,
82
- "sft_accuracy": 0.0008333333333333334,
83
  "eval_method": "ArithmeticEvaluator"
84
  }
 
17
  "target_vocab_util": 0.8,
18
  "min_abs_ppl": 0.0,
19
  "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
  "emb_lr_mult": 1.0,
22
  "weight_decay": 0.01,
23
+ "warmup_steps": 234,
24
  "cooldown_frac": 0.4,
25
  "max_grad_norm": 1.0,
26
  "vq_abs_pretrain_steps": 0,
 
30
  "vq_abs_pretrain_target_vectors": 20000,
31
  "batch_size": 64,
32
  "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
  "emb_warmup_steps": 0,
35
  "log_every": 50,
36
  "eval_every": 390,
 
69
  "no_wandb": false,
70
  "n_params": 158593426,
71
  "run_name": "add_sub_sorl_v1_abs10_K1_25K_1L3H510d",
72
+ "git_commit": "57deaa28d9c21e39ddac5ef448d6e1be992fba91",
73
+ "timestamp": "2026-04-13T09:55:13.754039+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "v1",
79
+ "wandb_run_id": "u2cu9qs9",
80
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/u2cu9qs9",
81
+ "final_accuracy": 0.5895833333333333,
82
+ "sft_accuracy": 0.5416666666666666,
83
  "eval_method": "ArithmeticEvaluator"
84
  }