duyvu8373 commited on
Commit
b2cb193
·
verified ·
1 Parent(s): 64b925d

Upload 12 files

Browse files
Files changed (6) hide show
  1. config.json +1 -1
  2. model.safetensors +1 -1
  3. optimizer.pt +1 -1
  4. rng_state.pth +1 -1
  5. trainer_state.json +979 -979
  6. training_args.bin +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "duyvu8373/vit5-base-newformat",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "duyvu8373/multi-task-vit5-base",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b8d03fa856c5d64d31a525a2f83e77eff70c981b437371a32dce28085f04ee1
3
  size 903834408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e57bbab697654055aa509ab3a4fccda8b4866ca3cb3a56d421ceac79bcb8977
3
  size 903834408
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d30f7feaee41a4f9b5c388cc38559f402dd54a209ad878521a81bdf4b2536934
3
  size 1807824186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa3ddc50d497f7baa523f553588741d4aa5c987db9f41ad48c618e2f3f9fae4
3
  size 1807824186
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19d4f0b5cd6717dc7a747c0d2115e48d9f25a9154783d18b643e092c46773f4c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2293d625f6837f6eefe9f505563f0a094937fc4f424feda9102b4b1beab3b7
3
  size 14244
trainer_state.json CHANGED
@@ -10,1454 +10,1454 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_bp": 0.9523463051958837,
14
  "eval_counts": [
15
- 3593,
16
- 3028,
17
- 2503,
18
- 1978
19
  ],
20
- "eval_loss": 0.020566828548908234,
21
  "eval_precisions": [
22
- 96.92473698408416,
23
- 95.19019176359636,
24
- 94.27495291902072,
25
- 92.90746829497417
26
- ],
27
- "eval_ref_len": 3888,
28
- "eval_runtime": 15.2582,
29
- "eval_samples_per_second": 34.473,
30
- "eval_score": 90.29492610822332,
31
- "eval_steps_per_second": 1.114,
32
- "eval_sys_len": 3707,
33
  "eval_totals": [
34
- 3707,
35
- 3181,
36
- 2655,
37
- 2129
38
  ],
39
  "step": 129
40
  },
41
  {
42
  "epoch": 2.0,
43
- "eval_bp": 0.9404234170896001,
44
  "eval_counts": [
45
- 3612,
46
- 3070,
47
- 2543,
48
- 2017
49
  ],
50
- "eval_loss": 0.008390815928578377,
51
  "eval_precisions": [
52
- 98.60769860769861,
53
- 97.86420146636914,
54
- 97.39563385675986,
55
- 96.73860911270984
56
- ],
57
- "eval_ref_len": 3888,
58
- "eval_runtime": 12.8658,
59
- "eval_samples_per_second": 40.883,
60
- "eval_score": 91.83155461111092,
61
- "eval_steps_per_second": 1.321,
62
- "eval_sys_len": 3663,
63
  "eval_totals": [
64
- 3663,
65
- 3137,
66
- 2611,
67
- 2085
68
  ],
69
  "step": 258
70
  },
71
  {
72
  "epoch": 3.0,
73
- "eval_bp": 0.9398782656901951,
74
  "eval_counts": [
75
- 3630,
76
- 3102,
77
- 2576,
78
- 2050
79
  ],
80
- "eval_loss": 0.0058166454546153545,
81
  "eval_precisions": [
82
- 99.15323682054084,
83
- 98.94736842105263,
84
- 98.73514756611729,
85
- 98.4157465194431
86
- ],
87
- "eval_ref_len": 3888,
88
- "eval_runtime": 12.6316,
89
- "eval_samples_per_second": 41.642,
90
- "eval_score": 92.87171931381575,
91
- "eval_steps_per_second": 1.346,
92
- "eval_sys_len": 3661,
93
  "eval_totals": [
94
- 3661,
95
- 3135,
96
- 2609,
97
- 2083
98
  ],
99
  "step": 387
100
  },
101
  {
102
  "epoch": 3.88,
103
  "learning_rate": 1.8449612403100777e-05,
104
- "loss": 0.0629,
105
  "step": 500
106
  },
107
  {
108
  "epoch": 4.0,
109
- "eval_bp": 0.9330402574316247,
110
  "eval_counts": [
111
- 3610,
112
- 3078,
113
- 2553,
114
- 2028
115
  ],
116
- "eval_loss": 0.005211379379034042,
117
  "eval_precisions": [
118
- 99.28492849284929,
119
- 98.97106109324758,
120
- 98.80030959752322,
121
- 98.54227405247813
122
- ],
123
- "eval_ref_len": 3888,
124
- "eval_runtime": 12.7405,
125
- "eval_samples_per_second": 41.286,
126
- "eval_score": 92.27700542114854,
127
- "eval_steps_per_second": 1.334,
128
- "eval_sys_len": 3636,
129
  "eval_totals": [
130
- 3636,
131
- 3110,
132
- 2584,
133
- 2058
134
  ],
135
  "step": 516
136
  },
137
  {
138
  "epoch": 5.0,
139
- "eval_bp": 0.9338631315468312,
140
  "eval_counts": [
141
- 3627,
142
- 3095,
143
- 2565,
144
- 2036
145
  ],
146
- "eval_loss": 0.0038907737471163273,
147
  "eval_precisions": [
148
- 99.67023907666942,
149
- 99.42177963379376,
150
- 99.14959412446849,
151
- 98.7869966035905
152
- ],
153
- "eval_ref_len": 3888,
154
- "eval_runtime": 12.6798,
155
- "eval_samples_per_second": 41.483,
156
- "eval_score": 92.69208875986402,
157
- "eval_steps_per_second": 1.341,
158
- "eval_sys_len": 3639,
159
  "eval_totals": [
160
- 3639,
161
- 3113,
162
- 2587,
163
- 2061
164
  ],
165
  "step": 645
166
  },
167
  {
168
  "epoch": 6.0,
169
- "eval_bp": 0.9335889103861881,
170
  "eval_counts": [
171
- 3634,
172
- 3104,
173
- 2576,
174
- 2049
175
  ],
176
- "eval_loss": 0.002504762727767229,
177
  "eval_precisions": [
178
- 99.89004947773502,
179
- 99.74293059125964,
180
- 99.61330239752513,
181
- 99.46601941747574
182
- ],
183
- "eval_ref_len": 3888,
184
- "eval_runtime": 12.7893,
185
- "eval_samples_per_second": 41.128,
186
- "eval_score": 93.05823077997785,
187
- "eval_steps_per_second": 1.329,
188
- "eval_sys_len": 3638,
189
  "eval_totals": [
190
- 3638,
191
- 3112,
192
- 2586,
193
- 2060
194
  ],
195
  "step": 774
196
  },
197
  {
198
  "epoch": 7.0,
199
- "eval_bp": 0.9357807151741461,
200
  "eval_counts": [
201
- 3634,
202
- 3107,
203
- 2581,
204
- 2055
205
  ],
206
- "eval_loss": 0.0016112308949232101,
207
  "eval_precisions": [
208
- 99.67087218869995,
209
- 99.58333333333333,
210
- 99.4988434849653,
211
- 99.37137330754352
212
- ],
213
- "eval_ref_len": 3888,
214
- "eval_runtime": 12.7232,
215
- "eval_samples_per_second": 41.342,
216
- "eval_score": 93.13923177834742,
217
- "eval_steps_per_second": 1.336,
218
- "eval_sys_len": 3646,
219
  "eval_totals": [
220
- 3646,
221
- 3120,
222
- 2594,
223
- 2068
224
  ],
225
  "step": 903
226
  },
227
  {
228
  "epoch": 7.75,
229
  "learning_rate": 1.689922480620155e-05,
230
- "loss": 0.0146,
231
  "step": 1000
232
  },
233
  {
234
  "epoch": 8.0,
235
- "eval_bp": 0.9335889103861881,
236
  "eval_counts": [
237
- 3635,
238
- 3109,
239
- 2583,
240
- 2057
241
  ],
242
- "eval_loss": 0.0012645127717405558,
243
  "eval_precisions": [
244
- 99.91753710830126,
245
- 99.90359897172236,
246
- 99.88399071925754,
247
- 99.85436893203884
248
- ],
249
- "eval_ref_len": 3888,
250
- "eval_runtime": 12.6584,
251
- "eval_samples_per_second": 41.553,
252
- "eval_score": 93.25607593613448,
253
- "eval_steps_per_second": 1.343,
254
- "eval_sys_len": 3638,
255
  "eval_totals": [
256
- 3638,
257
- 3112,
258
- 2586,
259
- 2060
260
  ],
261
  "step": 1032
262
  },
263
  {
264
  "epoch": 9.0,
265
- "eval_bp": 0.9341372825165507,
266
  "eval_counts": [
267
- 3633,
268
- 3105,
269
- 2580,
270
- 2055
271
  ],
272
- "eval_loss": 0.001253910711966455,
273
  "eval_precisions": [
274
- 99.8076923076923,
275
- 99.71098265895954,
276
- 99.69088098918084,
277
- 99.66052376333657
278
- ],
279
- "eval_ref_len": 3888,
280
- "eval_runtime": 12.6869,
281
- "eval_samples_per_second": 41.46,
282
- "eval_score": 93.14983888433298,
283
- "eval_steps_per_second": 1.34,
284
- "eval_sys_len": 3640,
285
  "eval_totals": [
286
- 3640,
287
- 3114,
288
- 2588,
289
- 2062
290
  ],
291
  "step": 1161
292
  },
293
  {
294
  "epoch": 10.0,
295
- "eval_bp": 0.933314619019492,
296
  "eval_counts": [
297
- 3634,
298
- 3108,
299
- 2582,
300
- 2056
301
  ],
302
- "eval_loss": 0.002512833336368203,
303
  "eval_precisions": [
304
- 99.91751443497388,
305
- 99.90356798457088,
306
- 99.88394584139265,
307
- 99.85429820301117
308
- ],
309
- "eval_ref_len": 3888,
310
- "eval_runtime": 12.7666,
311
- "eval_samples_per_second": 41.201,
312
- "eval_score": 93.22863750791105,
313
- "eval_steps_per_second": 1.332,
314
- "eval_sys_len": 3637,
315
  "eval_totals": [
316
- 3637,
317
- 3111,
318
- 2585,
319
- 2059
320
  ],
321
  "step": 1290
322
  },
323
  {
324
  "epoch": 11.0,
325
- "eval_bp": 0.9335889103861881,
326
  "eval_counts": [
327
- 3635,
328
- 3109,
329
- 2583,
330
- 2057
331
  ],
332
- "eval_loss": 0.0023178395349532366,
333
  "eval_precisions": [
334
- 99.91753710830126,
335
- 99.90359897172236,
336
- 99.88399071925754,
337
- 99.85436893203884
338
- ],
339
- "eval_ref_len": 3888,
340
- "eval_runtime": 12.9233,
341
- "eval_samples_per_second": 40.702,
342
- "eval_score": 93.25607593613448,
343
- "eval_steps_per_second": 1.315,
344
- "eval_sys_len": 3638,
345
  "eval_totals": [
346
- 3638,
347
- 3112,
348
- 2586,
349
- 2060
350
  ],
351
  "step": 1419
352
  },
353
  {
354
  "epoch": 11.63,
355
  "learning_rate": 1.5348837209302328e-05,
356
- "loss": 0.0083,
357
  "step": 1500
358
  },
359
  {
360
  "epoch": 12.0,
361
- "eval_bp": 0.9335889103861881,
362
  "eval_counts": [
363
- 3634,
364
- 3107,
365
- 2580,
366
- 2053
367
  ],
368
- "eval_loss": 0.0019630142487585545,
369
  "eval_precisions": [
370
- 99.89004947773502,
371
- 99.83933161953728,
372
- 99.76798143851508,
373
- 99.66019417475728
374
- ],
375
- "eval_ref_len": 3888,
376
- "eval_runtime": 12.6747,
377
- "eval_samples_per_second": 41.5,
378
- "eval_score": 93.16223228048803,
379
- "eval_steps_per_second": 1.341,
380
- "eval_sys_len": 3638,
381
  "eval_totals": [
382
- 3638,
383
- 3112,
384
- 2586,
385
- 2060
386
  ],
387
  "step": 1548
388
  },
389
  {
390
  "epoch": 13.0,
391
- "eval_bp": 0.933314619019492,
392
  "eval_counts": [
393
- 3634,
394
- 3108,
395
- 2582,
396
- 2056
397
  ],
398
- "eval_loss": 0.0019749533385038376,
399
  "eval_precisions": [
400
- 99.91751443497388,
401
- 99.90356798457088,
402
- 99.88394584139265,
403
- 99.85429820301117
404
- ],
405
- "eval_ref_len": 3888,
406
- "eval_runtime": 12.9957,
407
- "eval_samples_per_second": 40.475,
408
- "eval_score": 93.22863750791105,
409
- "eval_steps_per_second": 1.308,
410
- "eval_sys_len": 3637,
411
  "eval_totals": [
412
- 3637,
413
- 3111,
414
- 2585,
415
- 2059
416
  ],
417
  "step": 1677
418
  },
419
  {
420
  "epoch": 14.0,
421
- "eval_bp": 0.9316673956463823,
422
  "eval_counts": [
423
- 3624,
424
- 3098,
425
- 2573,
426
- 2048
427
  ],
428
- "eval_loss": 0.003918025176972151,
429
  "eval_precisions": [
430
- 99.80721564307353,
431
- 99.77455716586151,
432
- 99.76735168670027,
433
- 99.75645396980029
434
- ],
435
- "eval_ref_len": 3888,
436
- "eval_runtime": 12.6294,
437
- "eval_samples_per_second": 41.649,
438
- "eval_score": 92.95841204711185,
439
- "eval_steps_per_second": 1.346,
440
- "eval_sys_len": 3631,
441
  "eval_totals": [
442
- 3631,
443
- 3105,
444
- 2579,
445
- 2053
446
  ],
447
  "step": 1806
448
  },
449
  {
450
  "epoch": 15.0,
451
- "eval_bp": 0.936327965220313,
452
  "eval_counts": [
453
- 3633,
454
- 3105,
455
- 2580,
456
- 2055
457
  ],
458
- "eval_loss": 0.0004065596731379628,
459
  "eval_precisions": [
460
- 99.58881578947368,
461
- 99.45547725816785,
462
- 99.38366718027736,
463
- 99.27536231884058
464
- ],
465
- "eval_ref_len": 3888,
466
- "eval_runtime": 12.9462,
467
- "eval_samples_per_second": 40.63,
468
- "eval_score": 93.09512464742835,
469
- "eval_steps_per_second": 1.313,
470
- "eval_sys_len": 3648,
471
  "eval_totals": [
472
- 3648,
473
- 3122,
474
- 2596,
475
- 2070
476
  ],
477
  "step": 1935
478
  },
479
  {
480
  "epoch": 15.5,
481
  "learning_rate": 1.3798449612403102e-05,
482
- "loss": 0.0058,
483
  "step": 2000
484
  },
485
  {
486
  "epoch": 16.0,
487
- "eval_bp": 0.9335889103861881,
488
  "eval_counts": [
489
- 3635,
490
- 3109,
491
- 2583,
492
- 2057
493
  ],
494
- "eval_loss": 0.0023909315932542086,
495
  "eval_precisions": [
496
- 99.91753710830126,
497
- 99.90359897172236,
498
- 99.88399071925754,
499
- 99.85436893203884
500
- ],
501
- "eval_ref_len": 3888,
502
- "eval_runtime": 12.6508,
503
- "eval_samples_per_second": 41.578,
504
- "eval_score": 93.25607593613448,
505
- "eval_steps_per_second": 1.344,
506
- "eval_sys_len": 3638,
507
  "eval_totals": [
508
- 3638,
509
- 3112,
510
- 2586,
511
- 2060
512
  ],
513
  "step": 2064
514
  },
515
  {
516
  "epoch": 17.0,
517
- "eval_bp": 0.9335889103861881,
518
  "eval_counts": [
519
- 3635,
520
- 3109,
521
- 2583,
522
- 2057
523
  ],
524
- "eval_loss": 0.0004882567736785859,
525
  "eval_precisions": [
526
- 99.91753710830126,
527
- 99.90359897172236,
528
- 99.88399071925754,
529
- 99.85436893203884
530
- ],
531
- "eval_ref_len": 3888,
532
- "eval_runtime": 12.9088,
533
- "eval_samples_per_second": 40.747,
534
- "eval_score": 93.25607593613448,
535
- "eval_steps_per_second": 1.317,
536
- "eval_sys_len": 3638,
537
  "eval_totals": [
538
- 3638,
539
- 3112,
540
- 2586,
541
- 2060
542
  ],
543
  "step": 2193
544
  },
545
  {
546
  "epoch": 18.0,
547
- "eval_bp": 0.9335889103861881,
548
  "eval_counts": [
549
- 3635,
550
- 3109,
551
- 2583,
552
- 2057
553
  ],
554
- "eval_loss": 0.000274818652542308,
555
  "eval_precisions": [
556
- 99.91753710830126,
557
- 99.90359897172236,
558
- 99.88399071925754,
559
- 99.85436893203884
560
- ],
561
- "eval_ref_len": 3888,
562
- "eval_runtime": 12.638,
563
- "eval_samples_per_second": 41.621,
564
- "eval_score": 93.25607593613448,
565
- "eval_steps_per_second": 1.345,
566
- "eval_sys_len": 3638,
567
  "eval_totals": [
568
- 3638,
569
- 3112,
570
- 2586,
571
- 2060
572
  ],
573
  "step": 2322
574
  },
575
  {
576
  "epoch": 19.0,
577
- "eval_bp": 0.9335889103861881,
578
  "eval_counts": [
579
- 3635,
580
- 3109,
581
- 2583,
582
- 2057
583
  ],
584
- "eval_loss": 0.0001670583733357489,
585
  "eval_precisions": [
586
- 99.91753710830126,
587
- 99.90359897172236,
588
- 99.88399071925754,
589
- 99.85436893203884
590
- ],
591
- "eval_ref_len": 3888,
592
- "eval_runtime": 12.6977,
593
- "eval_samples_per_second": 41.425,
594
- "eval_score": 93.25607593613448,
595
- "eval_steps_per_second": 1.339,
596
- "eval_sys_len": 3638,
597
  "eval_totals": [
598
- 3638,
599
- 3112,
600
- 2586,
601
- 2060
602
  ],
603
  "step": 2451
604
  },
605
  {
606
  "epoch": 19.38,
607
  "learning_rate": 1.2248062015503876e-05,
608
- "loss": 0.0045,
609
  "step": 2500
610
  },
611
  {
612
  "epoch": 20.0,
613
- "eval_bp": 0.9335889103861881,
614
  "eval_counts": [
615
- 3635,
616
- 3109,
617
- 2583,
618
- 2057
619
  ],
620
- "eval_loss": 2.638001569721382e-05,
621
  "eval_precisions": [
622
- 99.91753710830126,
623
- 99.90359897172236,
624
- 99.88399071925754,
625
- 99.85436893203884
626
- ],
627
- "eval_ref_len": 3888,
628
- "eval_runtime": 12.6589,
629
- "eval_samples_per_second": 41.552,
630
- "eval_score": 93.25607593613448,
631
- "eval_steps_per_second": 1.343,
632
- "eval_sys_len": 3638,
633
  "eval_totals": [
634
- 3638,
635
- 3112,
636
- 2586,
637
- 2060
638
  ],
639
  "step": 2580
640
  },
641
  {
642
  "epoch": 21.0,
643
- "eval_bp": 0.9335889103861881,
644
  "eval_counts": [
645
- 3635,
646
- 3109,
647
- 2583,
648
- 2057
649
  ],
650
- "eval_loss": 2.2430480385082774e-05,
651
  "eval_precisions": [
652
- 99.91753710830126,
653
- 99.90359897172236,
654
- 99.88399071925754,
655
- 99.85436893203884
656
- ],
657
- "eval_ref_len": 3888,
658
- "eval_runtime": 12.8875,
659
- "eval_samples_per_second": 40.815,
660
- "eval_score": 93.25607593613448,
661
- "eval_steps_per_second": 1.319,
662
- "eval_sys_len": 3638,
663
  "eval_totals": [
664
- 3638,
665
- 3112,
666
- 2586,
667
- 2060
668
  ],
669
  "step": 2709
670
  },
671
  {
672
  "epoch": 22.0,
673
- "eval_bp": 0.9338631315468312,
674
  "eval_counts": [
675
- 3635,
676
- 3109,
677
- 2582,
678
- 2055
679
  ],
680
- "eval_loss": 0.002159741008654237,
681
  "eval_precisions": [
682
- 99.89007969222314,
683
- 99.8715065852875,
684
- 99.8067259373792,
685
- 99.70887918486171
686
- ],
687
- "eval_ref_len": 3888,
688
- "eval_runtime": 12.6238,
689
- "eval_samples_per_second": 41.667,
690
- "eval_score": 93.21753858269537,
691
- "eval_steps_per_second": 1.347,
692
- "eval_sys_len": 3639,
693
  "eval_totals": [
694
- 3639,
695
- 3113,
696
- 2587,
697
- 2061
698
  ],
699
  "step": 2838
700
  },
701
  {
702
  "epoch": 23.0,
703
- "eval_bp": 0.9335889103861881,
704
  "eval_counts": [
705
- 3635,
706
- 3109,
707
- 2583,
708
- 2057
709
  ],
710
- "eval_loss": 0.00015405841986648738,
711
  "eval_precisions": [
712
- 99.91753710830126,
713
- 99.90359897172236,
714
- 99.88399071925754,
715
- 99.85436893203884
716
- ],
717
- "eval_ref_len": 3888,
718
- "eval_runtime": 12.6331,
719
- "eval_samples_per_second": 41.637,
720
- "eval_score": 93.25607593613448,
721
- "eval_steps_per_second": 1.346,
722
- "eval_sys_len": 3638,
723
  "eval_totals": [
724
- 3638,
725
- 3112,
726
- 2586,
727
- 2060
728
  ],
729
  "step": 2967
730
  },
731
  {
732
  "epoch": 23.26,
733
  "learning_rate": 1.0697674418604651e-05,
734
- "loss": 0.0028,
735
  "step": 3000
736
  },
737
  {
738
  "epoch": 24.0,
739
- "eval_bp": 0.9335889103861881,
740
  "eval_counts": [
741
- 3635,
742
- 3109,
743
- 2583,
744
- 2057
745
  ],
746
- "eval_loss": 2.6909227017313242e-05,
747
  "eval_precisions": [
748
- 99.91753710830126,
749
- 99.90359897172236,
750
- 99.88399071925754,
751
- 99.85436893203884
752
- ],
753
- "eval_ref_len": 3888,
754
- "eval_runtime": 12.7937,
755
- "eval_samples_per_second": 41.114,
756
- "eval_score": 93.25607593613448,
757
- "eval_steps_per_second": 1.329,
758
- "eval_sys_len": 3638,
759
  "eval_totals": [
760
- 3638,
761
- 3112,
762
- 2586,
763
- 2060
764
  ],
765
  "step": 3096
766
  },
767
  {
768
  "epoch": 25.0,
769
- "eval_bp": 0.9335889103861881,
770
  "eval_counts": [
771
- 3635,
772
- 3109,
773
- 2582,
774
- 2055
775
  ],
776
- "eval_loss": 0.0019257767125964165,
777
  "eval_precisions": [
778
- 99.91753710830126,
779
- 99.90359897172236,
780
- 99.84532095901005,
781
- 99.75728155339806
782
- ],
783
- "eval_ref_len": 3888,
784
- "eval_runtime": 12.5937,
785
- "eval_samples_per_second": 41.767,
786
- "eval_score": 93.2243746232048,
787
- "eval_steps_per_second": 1.35,
788
- "eval_sys_len": 3638,
789
  "eval_totals": [
790
- 3638,
791
- 3112,
792
- 2586,
793
- 2060
794
  ],
795
  "step": 3225
796
  },
797
  {
798
  "epoch": 26.0,
799
- "eval_bp": 0.9335889103861881,
800
  "eval_counts": [
801
- 3635,
802
- 3109,
803
- 2583,
804
- 2057
805
  ],
806
- "eval_loss": 2.2932923457119614e-05,
807
  "eval_precisions": [
808
- 99.91753710830126,
809
- 99.90359897172236,
810
- 99.88399071925754,
811
- 99.85436893203884
812
- ],
813
- "eval_ref_len": 3888,
814
- "eval_runtime": 12.8177,
815
- "eval_samples_per_second": 41.037,
816
- "eval_score": 93.25607593613448,
817
- "eval_steps_per_second": 1.326,
818
- "eval_sys_len": 3638,
819
  "eval_totals": [
820
- 3638,
821
- 3112,
822
- 2586,
823
- 2060
824
  ],
825
  "step": 3354
826
  },
827
  {
828
  "epoch": 27.0,
829
- "eval_bp": 0.9335889103861881,
830
  "eval_counts": [
831
- 3635,
832
- 3109,
833
- 2583,
834
- 2057
835
  ],
836
- "eval_loss": 8.50883407110814e-06,
837
  "eval_precisions": [
838
- 99.91753710830126,
839
- 99.90359897172236,
840
- 99.88399071925754,
841
- 99.85436893203884
842
- ],
843
- "eval_ref_len": 3888,
844
- "eval_runtime": 12.6361,
845
- "eval_samples_per_second": 41.627,
846
- "eval_score": 93.25607593613448,
847
- "eval_steps_per_second": 1.345,
848
- "eval_sys_len": 3638,
849
  "eval_totals": [
850
- 3638,
851
- 3112,
852
- 2586,
853
- 2060
854
  ],
855
  "step": 3483
856
  },
857
  {
858
  "epoch": 27.13,
859
  "learning_rate": 9.147286821705427e-06,
860
- "loss": 0.0024,
861
  "step": 3500
862
  },
863
  {
864
  "epoch": 28.0,
865
- "eval_bp": 0.9335889103861881,
866
  "eval_counts": [
867
- 3635,
868
- 3109,
869
- 2583,
870
- 2057
871
  ],
872
- "eval_loss": 4.2664833017624915e-05,
873
  "eval_precisions": [
874
- 99.91753710830126,
875
- 99.90359897172236,
876
- 99.88399071925754,
877
- 99.85436893203884
878
- ],
879
- "eval_ref_len": 3888,
880
- "eval_runtime": 12.8206,
881
- "eval_samples_per_second": 41.028,
882
- "eval_score": 93.25607593613448,
883
- "eval_steps_per_second": 1.326,
884
- "eval_sys_len": 3638,
885
  "eval_totals": [
886
- 3638,
887
- 3112,
888
- 2586,
889
- 2060
890
  ],
891
  "step": 3612
892
  },
893
  {
894
  "epoch": 29.0,
895
- "eval_bp": 0.9335889103861881,
896
  "eval_counts": [
897
- 3635,
898
- 3109,
899
- 2583,
900
- 2057
901
  ],
902
- "eval_loss": 1.0555435437709093e-05,
903
  "eval_precisions": [
904
- 99.91753710830126,
905
- 99.90359897172236,
906
- 99.88399071925754,
907
- 99.85436893203884
908
- ],
909
- "eval_ref_len": 3888,
910
- "eval_runtime": 12.6065,
911
- "eval_samples_per_second": 41.725,
912
- "eval_score": 93.25607593613448,
913
- "eval_steps_per_second": 1.349,
914
- "eval_sys_len": 3638,
915
  "eval_totals": [
916
- 3638,
917
- 3112,
918
- 2586,
919
- 2060
920
  ],
921
  "step": 3741
922
  },
923
  {
924
  "epoch": 30.0,
925
- "eval_bp": 0.9335889103861881,
926
  "eval_counts": [
927
- 3635,
928
- 3109,
929
- 2583,
930
- 2057
931
  ],
932
- "eval_loss": 4.156934210186591e-06,
933
  "eval_precisions": [
934
- 99.91753710830126,
935
- 99.90359897172236,
936
- 99.88399071925754,
937
- 99.85436893203884
938
- ],
939
- "eval_ref_len": 3888,
940
- "eval_runtime": 12.9263,
941
- "eval_samples_per_second": 40.692,
942
- "eval_score": 93.25607593613448,
943
- "eval_steps_per_second": 1.315,
944
- "eval_sys_len": 3638,
945
  "eval_totals": [
946
- 3638,
947
- 3112,
948
- 2586,
949
- 2060
950
  ],
951
  "step": 3870
952
  },
953
  {
954
  "epoch": 31.0,
955
- "eval_bp": 0.9335889103861881,
956
  "eval_counts": [
957
- 3635,
958
- 3109,
959
- 2583,
960
- 2057
961
  ],
962
- "eval_loss": 3.890494554070756e-06,
963
  "eval_precisions": [
964
- 99.91753710830126,
965
- 99.90359897172236,
966
- 99.88399071925754,
967
- 99.85436893203884
968
- ],
969
- "eval_ref_len": 3888,
970
- "eval_runtime": 12.67,
971
- "eval_samples_per_second": 41.515,
972
- "eval_score": 93.25607593613448,
973
- "eval_steps_per_second": 1.342,
974
- "eval_sys_len": 3638,
975
  "eval_totals": [
976
- 3638,
977
- 3112,
978
- 2586,
979
- 2060
980
  ],
981
  "step": 3999
982
  },
983
  {
984
  "epoch": 31.01,
985
  "learning_rate": 7.596899224806202e-06,
986
- "loss": 0.002,
987
  "step": 4000
988
  },
989
  {
990
  "epoch": 32.0,
991
- "eval_bp": 0.9335889103861881,
992
  "eval_counts": [
993
- 3635,
994
- 3109,
995
- 2583,
996
- 2057
997
  ],
998
- "eval_loss": 4.488087142817676e-06,
999
  "eval_precisions": [
1000
- 99.91753710830126,
1001
- 99.90359897172236,
1002
- 99.88399071925754,
1003
- 99.85436893203884
1004
- ],
1005
- "eval_ref_len": 3888,
1006
- "eval_runtime": 12.6776,
1007
- "eval_samples_per_second": 41.491,
1008
- "eval_score": 93.25607593613448,
1009
- "eval_steps_per_second": 1.341,
1010
- "eval_sys_len": 3638,
1011
  "eval_totals": [
1012
- 3638,
1013
- 3112,
1014
- 2586,
1015
- 2060
1016
  ],
1017
  "step": 4128
1018
  },
1019
  {
1020
  "epoch": 33.0,
1021
- "eval_bp": 0.9335889103861881,
1022
  "eval_counts": [
1023
- 3635,
1024
- 3109,
1025
- 2583,
1026
- 2057
1027
  ],
1028
- "eval_loss": 4.070204340678174e-06,
1029
  "eval_precisions": [
1030
- 99.91753710830126,
1031
- 99.90359897172236,
1032
- 99.88399071925754,
1033
- 99.85436893203884
1034
- ],
1035
- "eval_ref_len": 3888,
1036
- "eval_runtime": 12.6602,
1037
- "eval_samples_per_second": 41.547,
1038
- "eval_score": 93.25607593613448,
1039
- "eval_steps_per_second": 1.343,
1040
- "eval_sys_len": 3638,
1041
  "eval_totals": [
1042
- 3638,
1043
- 3112,
1044
- 2586,
1045
- 2060
1046
  ],
1047
  "step": 4257
1048
  },
1049
  {
1050
  "epoch": 34.0,
1051
- "eval_bp": 0.9335889103861881,
1052
  "eval_counts": [
1053
- 3635,
1054
- 3109,
1055
- 2583,
1056
- 2057
1057
  ],
1058
- "eval_loss": 1.0456860763952136e-06,
1059
  "eval_precisions": [
1060
- 99.91753710830126,
1061
- 99.90359897172236,
1062
- 99.88399071925754,
1063
- 99.85436893203884
1064
- ],
1065
- "eval_ref_len": 3888,
1066
- "eval_runtime": 12.6435,
1067
- "eval_samples_per_second": 41.602,
1068
- "eval_score": 93.25607593613448,
1069
- "eval_steps_per_second": 1.345,
1070
- "eval_sys_len": 3638,
1071
  "eval_totals": [
1072
- 3638,
1073
- 3112,
1074
- 2586,
1075
- 2060
1076
  ],
1077
  "step": 4386
1078
  },
1079
  {
1080
  "epoch": 34.88,
1081
  "learning_rate": 6.046511627906977e-06,
1082
- "loss": 0.0015,
1083
  "step": 4500
1084
  },
1085
  {
1086
  "epoch": 35.0,
1087
- "eval_bp": 0.9335889103861881,
1088
  "eval_counts": [
1089
- 3635,
1090
- 3109,
1091
- 2583,
1092
- 2057
1093
  ],
1094
- "eval_loss": 1.005386025099142e-06,
1095
  "eval_precisions": [
1096
- 99.91753710830126,
1097
- 99.90359897172236,
1098
- 99.88399071925754,
1099
- 99.85436893203884
1100
- ],
1101
- "eval_ref_len": 3888,
1102
- "eval_runtime": 12.9848,
1103
- "eval_samples_per_second": 40.509,
1104
- "eval_score": 93.25607593613448,
1105
- "eval_steps_per_second": 1.309,
1106
- "eval_sys_len": 3638,
1107
  "eval_totals": [
1108
- 3638,
1109
- 3112,
1110
- 2586,
1111
- 2060
1112
  ],
1113
  "step": 4515
1114
  },
1115
  {
1116
  "epoch": 36.0,
1117
- "eval_bp": 0.9335889103861881,
1118
  "eval_counts": [
1119
- 3635,
1120
- 3109,
1121
- 2583,
1122
- 2057
1123
  ],
1124
- "eval_loss": 1.019975456983957e-06,
1125
  "eval_precisions": [
1126
- 99.91753710830126,
1127
- 99.90359897172236,
1128
- 99.88399071925754,
1129
- 99.85436893203884
1130
- ],
1131
- "eval_ref_len": 3888,
1132
- "eval_runtime": 12.6703,
1133
- "eval_samples_per_second": 41.515,
1134
- "eval_score": 93.25607593613448,
1135
- "eval_steps_per_second": 1.342,
1136
- "eval_sys_len": 3638,
1137
  "eval_totals": [
1138
- 3638,
1139
- 3112,
1140
- 2586,
1141
- 2060
1142
  ],
1143
  "step": 4644
1144
  },
1145
  {
1146
  "epoch": 37.0,
1147
- "eval_bp": 0.9335889103861881,
1148
  "eval_counts": [
1149
- 3635,
1150
- 3109,
1151
- 2583,
1152
- 2057
1153
  ],
1154
- "eval_loss": 9.417852879778366e-07,
1155
  "eval_precisions": [
1156
- 99.91753710830126,
1157
- 99.90359897172236,
1158
- 99.88399071925754,
1159
- 99.85436893203884
1160
- ],
1161
- "eval_ref_len": 3888,
1162
- "eval_runtime": 12.9803,
1163
- "eval_samples_per_second": 40.523,
1164
- "eval_score": 93.25607593613448,
1165
- "eval_steps_per_second": 1.31,
1166
- "eval_sys_len": 3638,
1167
  "eval_totals": [
1168
- 3638,
1169
- 3112,
1170
- 2586,
1171
- 2060
1172
  ],
1173
  "step": 4773
1174
  },
1175
  {
1176
  "epoch": 38.0,
1177
- "eval_bp": 0.9335889103861881,
1178
  "eval_counts": [
1179
- 3635,
1180
- 3109,
1181
- 2583,
1182
- 2057
1183
  ],
1184
- "eval_loss": 6.607356226595584e-07,
1185
  "eval_precisions": [
1186
- 99.91753710830126,
1187
- 99.90359897172236,
1188
- 99.88399071925754,
1189
- 99.85436893203884
1190
- ],
1191
- "eval_ref_len": 3888,
1192
- "eval_runtime": 12.6699,
1193
- "eval_samples_per_second": 41.516,
1194
- "eval_score": 93.25607593613448,
1195
- "eval_steps_per_second": 1.342,
1196
- "eval_sys_len": 3638,
1197
  "eval_totals": [
1198
- 3638,
1199
- 3112,
1200
- 2586,
1201
- 2060
1202
  ],
1203
  "step": 4902
1204
  },
1205
  {
1206
  "epoch": 38.76,
1207
  "learning_rate": 4.4961240310077525e-06,
1208
- "loss": 0.0013,
1209
  "step": 5000
1210
  },
1211
  {
1212
  "epoch": 39.0,
1213
- "eval_bp": 0.9335889103861881,
1214
  "eval_counts": [
1215
- 3635,
1216
- 3109,
1217
- 2583,
1218
- 2057
1219
  ],
1220
- "eval_loss": 8.30285159736377e-07,
1221
  "eval_precisions": [
1222
- 99.91753710830126,
1223
- 99.90359897172236,
1224
- 99.88399071925754,
1225
- 99.85436893203884
1226
- ],
1227
- "eval_ref_len": 3888,
1228
- "eval_runtime": 12.8802,
1229
- "eval_samples_per_second": 40.838,
1230
- "eval_score": 93.25607593613448,
1231
- "eval_steps_per_second": 1.32,
1232
- "eval_sys_len": 3638,
1233
  "eval_totals": [
1234
- 3638,
1235
- 3112,
1236
- 2586,
1237
- 2060
1238
  ],
1239
  "step": 5031
1240
  },
1241
  {
1242
  "epoch": 40.0,
1243
- "eval_bp": 0.9335889103861881,
1244
  "eval_counts": [
1245
- 3635,
1246
- 3109,
1247
- 2583,
1248
- 2057
1249
  ],
1250
- "eval_loss": 6.291656404755486e-07,
1251
  "eval_precisions": [
1252
- 99.91753710830126,
1253
- 99.90359897172236,
1254
- 99.88399071925754,
1255
- 99.85436893203884
1256
- ],
1257
- "eval_ref_len": 3888,
1258
- "eval_runtime": 12.7302,
1259
- "eval_samples_per_second": 41.319,
1260
- "eval_score": 93.25607593613448,
1261
- "eval_steps_per_second": 1.335,
1262
- "eval_sys_len": 3638,
1263
  "eval_totals": [
1264
- 3638,
1265
- 3112,
1266
- 2586,
1267
- 2060
1268
  ],
1269
  "step": 5160
1270
  },
1271
  {
1272
  "epoch": 41.0,
1273
- "eval_bp": 0.9335889103861881,
1274
  "eval_counts": [
1275
- 3635,
1276
- 3109,
1277
- 2583,
1278
- 2057
1279
  ],
1280
- "eval_loss": 6.857289918116294e-07,
1281
  "eval_precisions": [
1282
- 99.91753710830126,
1283
- 99.90359897172236,
1284
- 99.88399071925754,
1285
- 99.85436893203884
1286
- ],
1287
- "eval_ref_len": 3888,
1288
- "eval_runtime": 12.6807,
1289
- "eval_samples_per_second": 41.48,
1290
- "eval_score": 93.25607593613448,
1291
- "eval_steps_per_second": 1.341,
1292
- "eval_sys_len": 3638,
1293
  "eval_totals": [
1294
- 3638,
1295
- 3112,
1296
- 2586,
1297
- 2060
1298
  ],
1299
  "step": 5289
1300
  },
1301
  {
1302
  "epoch": 42.0,
1303
- "eval_bp": 0.9335889103861881,
1304
  "eval_counts": [
1305
- 3635,
1306
- 3109,
1307
- 2583,
1308
- 2057
1309
  ],
1310
- "eval_loss": 5.779653520221473e-07,
1311
  "eval_precisions": [
1312
- 99.91753710830126,
1313
- 99.90359897172236,
1314
- 99.88399071925754,
1315
- 99.85436893203884
1316
- ],
1317
- "eval_ref_len": 3888,
1318
- "eval_runtime": 12.935,
1319
- "eval_samples_per_second": 40.665,
1320
- "eval_score": 93.25607593613448,
1321
- "eval_steps_per_second": 1.314,
1322
- "eval_sys_len": 3638,
1323
  "eval_totals": [
1324
- 3638,
1325
- 3112,
1326
- 2586,
1327
- 2060
1328
  ],
1329
  "step": 5418
1330
  },
1331
  {
1332
  "epoch": 42.64,
1333
  "learning_rate": 2.9457364341085276e-06,
1334
- "loss": 0.001,
1335
  "step": 5500
1336
  },
1337
  {
1338
  "epoch": 43.0,
1339
- "eval_bp": 0.9335889103861881,
1340
  "eval_counts": [
1341
- 3635,
1342
- 3109,
1343
- 2583,
1344
- 2057
1345
  ],
1346
- "eval_loss": 4.883366386820853e-07,
1347
  "eval_precisions": [
1348
- 99.91753710830126,
1349
- 99.90359897172236,
1350
- 99.88399071925754,
1351
- 99.85436893203884
1352
- ],
1353
- "eval_ref_len": 3888,
1354
- "eval_runtime": 12.5557,
1355
- "eval_samples_per_second": 41.893,
1356
- "eval_score": 93.25607593613448,
1357
- "eval_steps_per_second": 1.354,
1358
- "eval_sys_len": 3638,
1359
  "eval_totals": [
1360
- 3638,
1361
- 3112,
1362
- 2586,
1363
- 2060
1364
  ],
1365
  "step": 5547
1366
  },
1367
  {
1368
  "epoch": 44.0,
1369
- "eval_bp": 0.9335889103861881,
1370
  "eval_counts": [
1371
- 3635,
1372
- 3109,
1373
- 2583,
1374
- 2057
1375
  ],
1376
- "eval_loss": 4.574254433009628e-07,
1377
  "eval_precisions": [
1378
- 99.91753710830126,
1379
- 99.90359897172236,
1380
- 99.88399071925754,
1381
- 99.85436893203884
1382
- ],
1383
- "eval_ref_len": 3888,
1384
- "eval_runtime": 12.9614,
1385
- "eval_samples_per_second": 40.582,
1386
- "eval_score": 93.25607593613448,
1387
- "eval_steps_per_second": 1.312,
1388
- "eval_sys_len": 3638,
1389
  "eval_totals": [
1390
- 3638,
1391
- 3112,
1392
- 2586,
1393
- 2060
1394
  ],
1395
  "step": 5676
1396
  },
1397
  {
1398
  "epoch": 45.0,
1399
- "eval_bp": 0.9335889103861881,
1400
  "eval_counts": [
1401
- 3635,
1402
- 3109,
1403
- 2583,
1404
- 2057
1405
  ],
1406
- "eval_loss": 5.373989324652939e-07,
1407
  "eval_precisions": [
1408
- 99.91753710830126,
1409
- 99.90359897172236,
1410
- 99.88399071925754,
1411
- 99.85436893203884
1412
- ],
1413
- "eval_ref_len": 3888,
1414
- "eval_runtime": 12.641,
1415
- "eval_samples_per_second": 41.611,
1416
- "eval_score": 93.25607593613448,
1417
- "eval_steps_per_second": 1.345,
1418
- "eval_sys_len": 3638,
1419
  "eval_totals": [
1420
- 3638,
1421
- 3112,
1422
- 2586,
1423
- 2060
1424
  ],
1425
  "step": 5805
1426
  },
1427
  {
1428
  "epoch": 46.0,
1429
- "eval_bp": 0.9335889103861881,
1430
  "eval_counts": [
1431
- 3635,
1432
- 3109,
1433
- 2583,
1434
- 2057
1435
  ],
1436
- "eval_loss": 4.2050155002471e-07,
1437
  "eval_precisions": [
1438
- 99.91753710830126,
1439
- 99.90359897172236,
1440
- 99.88399071925754,
1441
- 99.85436893203884
1442
- ],
1443
- "eval_ref_len": 3888,
1444
- "eval_runtime": 12.9508,
1445
- "eval_samples_per_second": 40.615,
1446
- "eval_score": 93.25607593613448,
1447
- "eval_steps_per_second": 1.313,
1448
- "eval_sys_len": 3638,
1449
  "eval_totals": [
1450
- 3638,
1451
- 3112,
1452
- 2586,
1453
- 2060
1454
  ],
1455
  "step": 5934
1456
  },
1457
  {
1458
  "epoch": 46.51,
1459
  "learning_rate": 1.3953488372093025e-06,
1460
- "loss": 0.0007,
1461
  "step": 6000
1462
  }
1463
  ],
@@ -1466,7 +1466,7 @@
1466
  "num_input_tokens_seen": 0,
1467
  "num_train_epochs": 50,
1468
  "save_steps": 500,
1469
- "total_flos": 2.0486735002368e+16,
1470
  "train_batch_size": 32,
1471
  "trial_name": null,
1472
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_bp": 0.8731878729849682,
14
  "eval_counts": [
15
+ 4461,
16
+ 3931,
17
+ 3406,
18
+ 2881
19
  ],
20
+ "eval_loss": 0.004611688666045666,
21
  "eval_precisions": [
22
+ 99.33199732798931,
23
+ 99.14249684741488,
24
+ 99.04041872637394,
25
+ 98.90147614143494
26
+ ],
27
+ "eval_ref_len": 5100,
28
+ "eval_runtime": 16.8,
29
+ "eval_samples_per_second": 31.31,
30
+ "eval_score": 86.5363874068898,
31
+ "eval_steps_per_second": 1.012,
32
+ "eval_sys_len": 4491,
33
  "eval_totals": [
34
+ 4491,
35
+ 3965,
36
+ 3439,
37
+ 2913
38
  ],
39
  "step": 129
40
  },
41
  {
42
  "epoch": 2.0,
43
+ "eval_bp": 0.869649689238195,
44
  "eval_counts": [
45
+ 4458,
46
+ 3930,
47
+ 3403,
48
+ 2876
49
  ],
50
+ "eval_loss": 0.00696711428463459,
51
  "eval_precisions": [
52
+ 99.62011173184358,
53
+ 99.51886553557863,
54
+ 99.41571720712825,
55
+ 99.27511218501898
56
+ ],
57
+ "eval_ref_len": 5100,
58
+ "eval_runtime": 13.788,
59
+ "eval_samples_per_second": 38.149,
60
+ "eval_score": 86.49307063670169,
61
+ "eval_steps_per_second": 1.233,
62
+ "eval_sys_len": 4475,
63
  "eval_totals": [
64
+ 4475,
65
+ 3949,
66
+ 3423,
67
+ 2897
68
  ],
69
  "step": 258
70
  },
71
  {
72
  "epoch": 3.0,
73
+ "eval_bp": 0.8727461954138965,
74
  "eval_counts": [
75
+ 4463,
76
+ 3936,
77
+ 3409,
78
+ 2882
79
  ],
80
+ "eval_loss": 0.0054754531010985374,
81
  "eval_precisions": [
82
+ 99.42080641568278,
83
+ 99.31869795609387,
84
+ 99.18533604887983,
85
+ 99.00377877018207
86
+ ],
87
+ "eval_ref_len": 5100,
88
+ "eval_runtime": 13.887,
89
+ "eval_samples_per_second": 37.877,
90
+ "eval_score": 86.60437841944254,
91
+ "eval_steps_per_second": 1.224,
92
+ "eval_sys_len": 4489,
93
  "eval_totals": [
94
+ 4489,
95
+ 3963,
96
+ 3437,
97
+ 2911
98
  ],
99
  "step": 387
100
  },
101
  {
102
  "epoch": 3.88,
103
  "learning_rate": 1.8449612403100777e-05,
104
+ "loss": 0.0338,
105
  "step": 500
106
  },
107
  {
108
  "epoch": 4.0,
109
+ "eval_bp": 0.8703139283627056,
110
  "eval_counts": [
111
+ 4464,
112
+ 3938,
113
+ 3409,
114
+ 2881
115
  ],
116
+ "eval_loss": 0.0021970090456306934,
117
  "eval_precisions": [
118
+ 99.68736042876284,
119
+ 99.64574898785425,
120
+ 99.50379451255108,
121
+ 99.34482758620689
122
+ ],
123
+ "eval_ref_len": 5100,
124
+ "eval_runtime": 14.1368,
125
+ "eval_samples_per_second": 37.208,
126
+ "eval_score": 86.63569782205596,
127
+ "eval_steps_per_second": 1.203,
128
+ "eval_sys_len": 4478,
129
  "eval_totals": [
130
+ 4478,
131
+ 3952,
132
+ 3426,
133
+ 2900
134
  ],
135
  "step": 516
136
  },
137
  {
138
  "epoch": 5.0,
139
+ "eval_bp": 0.8707565416150743,
140
  "eval_counts": [
141
+ 4461,
142
+ 3933,
143
+ 3406,
144
+ 2880
145
  ],
146
+ "eval_loss": 0.0019001211039721966,
147
  "eval_precisions": [
148
+ 99.57589285714286,
149
+ 99.46889226100151,
150
+ 99.35822637106185,
151
+ 99.24190213645761
152
+ ],
153
+ "eval_ref_len": 5100,
154
+ "eval_runtime": 13.879,
155
+ "eval_samples_per_second": 37.899,
156
+ "eval_score": 86.56290964643597,
157
+ "eval_steps_per_second": 1.225,
158
+ "eval_sys_len": 4480,
159
  "eval_totals": [
160
+ 4480,
161
+ 3954,
162
+ 3428,
163
+ 2902
164
  ],
165
  "step": 645
166
  },
167
  {
168
  "epoch": 6.0,
169
+ "eval_bp": 0.8700925578924447,
170
  "eval_counts": [
171
+ 4463,
172
+ 3937,
173
+ 3411,
174
+ 2885
175
  ],
176
+ "eval_loss": 0.0021749669685959816,
177
  "eval_precisions": [
178
+ 99.6872905963815,
179
+ 99.64565932675272,
180
+ 99.5912408759124,
181
+ 99.51707485339773
182
+ ],
183
+ "eval_ref_len": 5100,
184
+ "eval_runtime": 14.1959,
185
+ "eval_samples_per_second": 37.053,
186
+ "eval_score": 86.6701772747815,
187
+ "eval_steps_per_second": 1.198,
188
+ "eval_sys_len": 4477,
189
  "eval_totals": [
190
+ 4477,
191
+ 3951,
192
+ 3425,
193
+ 2899
194
  ],
195
  "step": 774
196
  },
197
  {
198
  "epoch": 7.0,
199
+ "eval_bp": 0.8703139283627056,
200
  "eval_counts": [
201
+ 4464,
202
+ 3938,
203
+ 3412,
204
+ 2886
205
  ],
206
+ "eval_loss": 0.0006092642433941364,
207
  "eval_precisions": [
208
+ 99.68736042876284,
209
+ 99.64574898785425,
210
+ 99.59136018680677,
211
+ 99.51724137931035
212
+ ],
213
+ "eval_ref_len": 5100,
214
+ "eval_runtime": 13.778,
215
+ "eval_samples_per_second": 38.177,
216
+ "eval_score": 86.69232496739014,
217
+ "eval_steps_per_second": 1.234,
218
+ "eval_sys_len": 4478,
219
  "eval_totals": [
220
+ 4478,
221
+ 3952,
222
+ 3426,
223
+ 2900
224
  ],
225
  "step": 903
226
  },
227
  {
228
  "epoch": 7.75,
229
  "learning_rate": 1.689922480620155e-05,
230
+ "loss": 0.0059,
231
  "step": 1000
232
  },
233
  {
234
  "epoch": 8.0,
235
+ "eval_bp": 0.8714201423501817,
236
  "eval_counts": [
237
+ 4460,
238
+ 3930,
239
+ 3404,
240
+ 2878
241
  ],
242
+ "eval_loss": 0.0026030810549855232,
243
  "eval_precisions": [
244
+ 99.48695070265447,
245
+ 99.31766489764973,
246
+ 99.21305741766248,
247
+ 99.07056798623064
248
+ ],
249
+ "eval_ref_len": 5100,
250
+ "eval_runtime": 14.1647,
251
+ "eval_samples_per_second": 37.135,
252
+ "eval_score": 86.50757156803533,
253
+ "eval_steps_per_second": 1.2,
254
+ "eval_sys_len": 4483,
255
  "eval_totals": [
256
+ 4483,
257
+ 3957,
258
+ 3431,
259
+ 2905
260
  ],
261
  "step": 1032
262
  },
263
  {
264
  "epoch": 9.0,
265
+ "eval_bp": 0.8711989846507546,
266
  "eval_counts": [
267
+ 4459,
268
+ 3929,
269
+ 3403,
270
+ 2877
271
  ],
272
+ "eval_loss": 0.00256777903996408,
273
  "eval_precisions": [
274
+ 99.48683623382419,
275
+ 99.31749241658241,
276
+ 99.21282798833819,
277
+ 99.0702479338843
278
+ ],
279
+ "eval_ref_len": 5100,
280
+ "eval_runtime": 13.7621,
281
+ "eval_samples_per_second": 38.221,
282
+ "eval_score": 86.4854345384993,
283
+ "eval_steps_per_second": 1.235,
284
+ "eval_sys_len": 4482,
285
  "eval_totals": [
286
+ 4482,
287
+ 3956,
288
+ 3430,
289
+ 2904
290
  ],
291
  "step": 1161
292
  },
293
  {
294
  "epoch": 10.0,
295
+ "eval_bp": 0.8700925578924447,
296
  "eval_counts": [
297
+ 4463,
298
+ 3937,
299
+ 3410,
300
+ 2883
301
  ],
302
+ "eval_loss": 0.0008762977086007595,
303
  "eval_precisions": [
304
+ 99.6872905963815,
305
+ 99.64565932675272,
306
+ 99.56204379562044,
307
+ 99.44808554674026
308
+ ],
309
+ "eval_ref_len": 5100,
310
+ "eval_runtime": 14.033,
311
+ "eval_samples_per_second": 37.483,
312
+ "eval_score": 86.64880068746125,
313
+ "eval_steps_per_second": 1.211,
314
+ "eval_sys_len": 4477,
315
  "eval_totals": [
316
+ 4477,
317
+ 3951,
318
+ 3425,
319
+ 2899
320
  ],
321
  "step": 1290
322
  },
323
  {
324
  "epoch": 11.0,
325
+ "eval_bp": 0.8707565416150743,
326
  "eval_counts": [
327
+ 4462,
328
+ 3935,
329
+ 3408,
330
+ 2881
331
  ],
332
+ "eval_loss": 0.0005689110257662833,
333
  "eval_precisions": [
334
+ 99.59821428571429,
335
+ 99.51947395042994,
336
+ 99.41656942823803,
337
+ 99.276361130255
338
+ ],
339
+ "eval_ref_len": 5100,
340
+ "eval_runtime": 13.8477,
341
+ "eval_samples_per_second": 37.985,
342
+ "eval_score": 86.5989861295871,
343
+ "eval_steps_per_second": 1.228,
344
+ "eval_sys_len": 4480,
345
  "eval_totals": [
346
+ 4480,
347
+ 3954,
348
+ 3428,
349
+ 2902
350
  ],
351
  "step": 1419
352
  },
353
  {
354
  "epoch": 11.63,
355
  "learning_rate": 1.5348837209302328e-05,
356
+ "loss": 0.0042,
357
  "step": 1500
358
  },
359
  {
360
  "epoch": 12.0,
361
+ "eval_bp": 0.8711989846507546,
362
  "eval_counts": [
363
+ 4459,
364
+ 3929,
365
+ 3403,
366
+ 2877
367
  ],
368
+ "eval_loss": 0.0013145459815859795,
369
  "eval_precisions": [
370
+ 99.48683623382419,
371
+ 99.31749241658241,
372
+ 99.21282798833819,
373
+ 99.0702479338843
374
+ ],
375
+ "eval_ref_len": 5100,
376
+ "eval_runtime": 13.7281,
377
+ "eval_samples_per_second": 38.316,
378
+ "eval_score": 86.4854345384993,
379
+ "eval_steps_per_second": 1.238,
380
+ "eval_sys_len": 4482,
381
  "eval_totals": [
382
+ 4482,
383
+ 3956,
384
+ 3430,
385
+ 2904
386
  ],
387
  "step": 1548
388
  },
389
  {
390
  "epoch": 13.0,
391
+ "eval_bp": 0.8700925578924447,
392
  "eval_counts": [
393
+ 4463,
394
+ 3937,
395
+ 3410,
396
+ 2883
397
  ],
398
+ "eval_loss": 0.0008758930489420891,
399
  "eval_precisions": [
400
+ 99.6872905963815,
401
+ 99.64565932675272,
402
+ 99.56204379562044,
403
+ 99.44808554674026
404
+ ],
405
+ "eval_ref_len": 5100,
406
+ "eval_runtime": 13.731,
407
+ "eval_samples_per_second": 38.308,
408
+ "eval_score": 86.64880068746125,
409
+ "eval_steps_per_second": 1.238,
410
+ "eval_sys_len": 4477,
411
  "eval_totals": [
412
+ 4477,
413
+ 3951,
414
+ 3425,
415
+ 2899
416
  ],
417
  "step": 1677
418
  },
419
  {
420
  "epoch": 14.0,
421
+ "eval_bp": 0.8700925578924447,
422
  "eval_counts": [
423
+ 4463,
424
+ 3937,
425
+ 3411,
426
+ 2885
427
  ],
428
+ "eval_loss": 0.0003304094134364277,
429
  "eval_precisions": [
430
+ 99.6872905963815,
431
+ 99.64565932675272,
432
+ 99.5912408759124,
433
+ 99.51707485339773
434
+ ],
435
+ "eval_ref_len": 5100,
436
+ "eval_runtime": 14.021,
437
+ "eval_samples_per_second": 37.515,
438
+ "eval_score": 86.6701772747815,
439
+ "eval_steps_per_second": 1.212,
440
+ "eval_sys_len": 4477,
441
  "eval_totals": [
442
+ 4477,
443
+ 3951,
444
+ 3425,
445
+ 2899
446
  ],
447
  "step": 1806
448
  },
449
  {
450
  "epoch": 15.0,
451
+ "eval_bp": 0.8700925578924447,
452
  "eval_counts": [
453
+ 4463,
454
+ 3937,
455
+ 3411,
456
+ 2885
457
  ],
458
+ "eval_loss": 0.0008369011338800192,
459
  "eval_precisions": [
460
+ 99.6872905963815,
461
+ 99.64565932675272,
462
+ 99.5912408759124,
463
+ 99.51707485339773
464
+ ],
465
+ "eval_ref_len": 5100,
466
+ "eval_runtime": 13.8925,
467
+ "eval_samples_per_second": 37.862,
468
+ "eval_score": 86.6701772747815,
469
+ "eval_steps_per_second": 1.224,
470
+ "eval_sys_len": 4477,
471
  "eval_totals": [
472
+ 4477,
473
+ 3951,
474
+ 3425,
475
+ 2899
476
  ],
477
  "step": 1935
478
  },
479
  {
480
  "epoch": 15.5,
481
  "learning_rate": 1.3798449612403102e-05,
482
+ "loss": 0.0027,
483
  "step": 2000
484
  },
485
  {
486
  "epoch": 16.0,
487
+ "eval_bp": 0.8700925578924447,
488
  "eval_counts": [
489
+ 4463,
490
+ 3937,
491
+ 3411,
492
+ 2885
493
  ],
494
+ "eval_loss": 0.00020534679060801864,
495
  "eval_precisions": [
496
+ 99.6872905963815,
497
+ 99.64565932675272,
498
+ 99.5912408759124,
499
+ 99.51707485339773
500
+ ],
501
+ "eval_ref_len": 5100,
502
+ "eval_runtime": 13.9128,
503
+ "eval_samples_per_second": 37.807,
504
+ "eval_score": 86.6701772747815,
505
+ "eval_steps_per_second": 1.222,
506
+ "eval_sys_len": 4477,
507
  "eval_totals": [
508
+ 4477,
509
+ 3951,
510
+ 3425,
511
+ 2899
512
  ],
513
  "step": 2064
514
  },
515
  {
516
  "epoch": 17.0,
517
+ "eval_bp": 0.8700925578924447,
518
  "eval_counts": [
519
+ 4463,
520
+ 3937,
521
+ 3411,
522
+ 2885
523
  ],
524
+ "eval_loss": 0.001328099868260324,
525
  "eval_precisions": [
526
+ 99.6872905963815,
527
+ 99.64565932675272,
528
+ 99.5912408759124,
529
+ 99.51707485339773
530
+ ],
531
+ "eval_ref_len": 5100,
532
+ "eval_runtime": 13.6909,
533
+ "eval_samples_per_second": 38.42,
534
+ "eval_score": 86.6701772747815,
535
+ "eval_steps_per_second": 1.242,
536
+ "eval_sys_len": 4477,
537
  "eval_totals": [
538
+ 4477,
539
+ 3951,
540
+ 3425,
541
+ 2899
542
  ],
543
  "step": 2193
544
  },
545
  {
546
  "epoch": 18.0,
547
+ "eval_bp": 0.8700925578924447,
548
  "eval_counts": [
549
+ 4463,
550
+ 3937,
551
+ 3411,
552
+ 2885
553
  ],
554
+ "eval_loss": 0.0003365726734045893,
555
  "eval_precisions": [
556
+ 99.6872905963815,
557
+ 99.64565932675272,
558
+ 99.5912408759124,
559
+ 99.51707485339773
560
+ ],
561
+ "eval_ref_len": 5100,
562
+ "eval_runtime": 13.7109,
563
+ "eval_samples_per_second": 38.364,
564
+ "eval_score": 86.6701772747815,
565
+ "eval_steps_per_second": 1.24,
566
+ "eval_sys_len": 4477,
567
  "eval_totals": [
568
+ 4477,
569
+ 3951,
570
+ 3425,
571
+ 2899
572
  ],
573
  "step": 2322
574
  },
575
  {
576
  "epoch": 19.0,
577
+ "eval_bp": 0.8700925578924447,
578
  "eval_counts": [
579
+ 4463,
580
+ 3937,
581
+ 3411,
582
+ 2885
583
  ],
584
+ "eval_loss": 8.775618334766477e-05,
585
  "eval_precisions": [
586
+ 99.6872905963815,
587
+ 99.64565932675272,
588
+ 99.5912408759124,
589
+ 99.51707485339773
590
+ ],
591
+ "eval_ref_len": 5100,
592
+ "eval_runtime": 14.0994,
593
+ "eval_samples_per_second": 37.307,
594
+ "eval_score": 86.6701772747815,
595
+ "eval_steps_per_second": 1.206,
596
+ "eval_sys_len": 4477,
597
  "eval_totals": [
598
+ 4477,
599
+ 3951,
600
+ 3425,
601
+ 2899
602
  ],
603
  "step": 2451
604
  },
605
  {
606
  "epoch": 19.38,
607
  "learning_rate": 1.2248062015503876e-05,
608
+ "loss": 0.002,
609
  "step": 2500
610
  },
611
  {
612
  "epoch": 20.0,
613
+ "eval_bp": 0.8700925578924447,
614
  "eval_counts": [
615
+ 4463,
616
+ 3937,
617
+ 3411,
618
+ 2885
619
  ],
620
+ "eval_loss": 8.96168130566366e-05,
621
  "eval_precisions": [
622
+ 99.6872905963815,
623
+ 99.64565932675272,
624
+ 99.5912408759124,
625
+ 99.51707485339773
626
+ ],
627
+ "eval_ref_len": 5100,
628
+ "eval_runtime": 13.8102,
629
+ "eval_samples_per_second": 38.088,
630
+ "eval_score": 86.6701772747815,
631
+ "eval_steps_per_second": 1.231,
632
+ "eval_sys_len": 4477,
633
  "eval_totals": [
634
+ 4477,
635
+ 3951,
636
+ 3425,
637
+ 2899
638
  ],
639
  "step": 2580
640
  },
641
  {
642
  "epoch": 21.0,
643
+ "eval_bp": 0.8700925578924447,
644
  "eval_counts": [
645
+ 4463,
646
+ 3937,
647
+ 3411,
648
+ 2885
649
  ],
650
+ "eval_loss": 0.0007285438477993011,
651
  "eval_precisions": [
652
+ 99.6872905963815,
653
+ 99.64565932675272,
654
+ 99.5912408759124,
655
+ 99.51707485339773
656
+ ],
657
+ "eval_ref_len": 5100,
658
+ "eval_runtime": 13.7462,
659
+ "eval_samples_per_second": 38.265,
660
+ "eval_score": 86.6701772747815,
661
+ "eval_steps_per_second": 1.237,
662
+ "eval_sys_len": 4477,
663
  "eval_totals": [
664
+ 4477,
665
+ 3951,
666
+ 3425,
667
+ 2899
668
  ],
669
  "step": 2709
670
  },
671
  {
672
  "epoch": 22.0,
673
+ "eval_bp": 0.8703139283627056,
674
  "eval_counts": [
675
+ 4464,
676
+ 3938,
677
+ 3412,
678
+ 2886
679
  ],
680
+ "eval_loss": 0.0018083422910422087,
681
  "eval_precisions": [
682
+ 99.68736042876284,
683
+ 99.64574898785425,
684
+ 99.59136018680677,
685
+ 99.51724137931035
686
+ ],
687
+ "eval_ref_len": 5100,
688
+ "eval_runtime": 13.8084,
689
+ "eval_samples_per_second": 38.093,
690
+ "eval_score": 86.69232496739014,
691
+ "eval_steps_per_second": 1.231,
692
+ "eval_sys_len": 4478,
693
  "eval_totals": [
694
+ 4478,
695
+ 3952,
696
+ 3426,
697
+ 2900
698
  ],
699
  "step": 2838
700
  },
701
  {
702
  "epoch": 23.0,
703
+ "eval_bp": 0.8700925578924447,
704
  "eval_counts": [
705
+ 4463,
706
+ 3937,
707
+ 3411,
708
+ 2885
709
  ],
710
+ "eval_loss": 0.000638504687231034,
711
  "eval_precisions": [
712
+ 99.6872905963815,
713
+ 99.64565932675272,
714
+ 99.5912408759124,
715
+ 99.51707485339773
716
+ ],
717
+ "eval_ref_len": 5100,
718
+ "eval_runtime": 13.995,
719
+ "eval_samples_per_second": 37.585,
720
+ "eval_score": 86.6701772747815,
721
+ "eval_steps_per_second": 1.215,
722
+ "eval_sys_len": 4477,
723
  "eval_totals": [
724
+ 4477,
725
+ 3951,
726
+ 3425,
727
+ 2899
728
  ],
729
  "step": 2967
730
  },
731
  {
732
  "epoch": 23.26,
733
  "learning_rate": 1.0697674418604651e-05,
734
+ "loss": 0.0015,
735
  "step": 3000
736
  },
737
  {
738
  "epoch": 24.0,
739
+ "eval_bp": 0.8700925578924447,
740
  "eval_counts": [
741
+ 4463,
742
+ 3937,
743
+ 3411,
744
+ 2885
745
  ],
746
+ "eval_loss": 0.00022906862432137132,
747
  "eval_precisions": [
748
+ 99.6872905963815,
749
+ 99.64565932675272,
750
+ 99.5912408759124,
751
+ 99.51707485339773
752
+ ],
753
+ "eval_ref_len": 5100,
754
+ "eval_runtime": 13.7342,
755
+ "eval_samples_per_second": 38.299,
756
+ "eval_score": 86.6701772747815,
757
+ "eval_steps_per_second": 1.238,
758
+ "eval_sys_len": 4477,
759
  "eval_totals": [
760
+ 4477,
761
+ 3951,
762
+ 3425,
763
+ 2899
764
  ],
765
  "step": 3096
766
  },
767
  {
768
  "epoch": 25.0,
769
+ "eval_bp": 0.8700925578924447,
770
  "eval_counts": [
771
+ 4463,
772
+ 3937,
773
+ 3411,
774
+ 2885
775
  ],
776
+ "eval_loss": 0.000275774480542168,
777
  "eval_precisions": [
778
+ 99.6872905963815,
779
+ 99.64565932675272,
780
+ 99.5912408759124,
781
+ 99.51707485339773
782
+ ],
783
+ "eval_ref_len": 5100,
784
+ "eval_runtime": 14.1769,
785
+ "eval_samples_per_second": 37.103,
786
+ "eval_score": 86.6701772747815,
787
+ "eval_steps_per_second": 1.199,
788
+ "eval_sys_len": 4477,
789
  "eval_totals": [
790
+ 4477,
791
+ 3951,
792
+ 3425,
793
+ 2899
794
  ],
795
  "step": 3225
796
  },
797
  {
798
  "epoch": 26.0,
799
+ "eval_bp": 0.8700925578924447,
800
  "eval_counts": [
801
+ 4463,
802
+ 3937,
803
+ 3411,
804
+ 2885
805
  ],
806
+ "eval_loss": 0.00025378959253430367,
807
  "eval_precisions": [
808
+ 99.6872905963815,
809
+ 99.64565932675272,
810
+ 99.5912408759124,
811
+ 99.51707485339773
812
+ ],
813
+ "eval_ref_len": 5100,
814
+ "eval_runtime": 13.944,
815
+ "eval_samples_per_second": 37.722,
816
+ "eval_score": 86.6701772747815,
817
+ "eval_steps_per_second": 1.219,
818
+ "eval_sys_len": 4477,
819
  "eval_totals": [
820
+ 4477,
821
+ 3951,
822
+ 3425,
823
+ 2899
824
  ],
825
  "step": 3354
826
  },
827
  {
828
  "epoch": 27.0,
829
+ "eval_bp": 0.8700925578924447,
830
  "eval_counts": [
831
+ 4463,
832
+ 3937,
833
+ 3411,
834
+ 2885
835
  ],
836
+ "eval_loss": 2.7398107704357244e-05,
837
  "eval_precisions": [
838
+ 99.6872905963815,
839
+ 99.64565932675272,
840
+ 99.5912408759124,
841
+ 99.51707485339773
842
+ ],
843
+ "eval_ref_len": 5100,
844
+ "eval_runtime": 13.9791,
845
+ "eval_samples_per_second": 37.628,
846
+ "eval_score": 86.6701772747815,
847
+ "eval_steps_per_second": 1.216,
848
+ "eval_sys_len": 4477,
849
  "eval_totals": [
850
+ 4477,
851
+ 3951,
852
+ 3425,
853
+ 2899
854
  ],
855
  "step": 3483
856
  },
857
  {
858
  "epoch": 27.13,
859
  "learning_rate": 9.147286821705427e-06,
860
+ "loss": 0.0014,
861
  "step": 3500
862
  },
863
  {
864
  "epoch": 28.0,
865
+ "eval_bp": 0.8700925578924447,
866
  "eval_counts": [
867
+ 4463,
868
+ 3937,
869
+ 3411,
870
+ 2885
871
  ],
872
+ "eval_loss": 6.660177314188331e-05,
873
  "eval_precisions": [
874
+ 99.6872905963815,
875
+ 99.64565932675272,
876
+ 99.5912408759124,
877
+ 99.51707485339773
878
+ ],
879
+ "eval_ref_len": 5100,
880
+ "eval_runtime": 14.1404,
881
+ "eval_samples_per_second": 37.198,
882
+ "eval_score": 86.6701772747815,
883
+ "eval_steps_per_second": 1.202,
884
+ "eval_sys_len": 4477,
885
  "eval_totals": [
886
+ 4477,
887
+ 3951,
888
+ 3425,
889
+ 2899
890
  ],
891
  "step": 3612
892
  },
893
  {
894
  "epoch": 29.0,
895
+ "eval_bp": 0.8700925578924447,
896
  "eval_counts": [
897
+ 4463,
898
+ 3937,
899
+ 3411,
900
+ 2885
901
  ],
902
+ "eval_loss": 0.00022675798390991986,
903
  "eval_precisions": [
904
+ 99.6872905963815,
905
+ 99.64565932675272,
906
+ 99.5912408759124,
907
+ 99.51707485339773
908
+ ],
909
+ "eval_ref_len": 5100,
910
+ "eval_runtime": 13.9231,
911
+ "eval_samples_per_second": 37.779,
912
+ "eval_score": 86.6701772747815,
913
+ "eval_steps_per_second": 1.221,
914
+ "eval_sys_len": 4477,
915
  "eval_totals": [
916
+ 4477,
917
+ 3951,
918
+ 3425,
919
+ 2899
920
  ],
921
  "step": 3741
922
  },
923
  {
924
  "epoch": 30.0,
925
+ "eval_bp": 0.8700925578924447,
926
  "eval_counts": [
927
+ 4463,
928
+ 3937,
929
+ 3411,
930
+ 2885
931
  ],
932
+ "eval_loss": 5.096692984807305e-05,
933
  "eval_precisions": [
934
+ 99.6872905963815,
935
+ 99.64565932675272,
936
+ 99.5912408759124,
937
+ 99.51707485339773
938
+ ],
939
+ "eval_ref_len": 5100,
940
+ "eval_runtime": 14.3573,
941
+ "eval_samples_per_second": 36.636,
942
+ "eval_score": 86.6701772747815,
943
+ "eval_steps_per_second": 1.184,
944
+ "eval_sys_len": 4477,
945
  "eval_totals": [
946
+ 4477,
947
+ 3951,
948
+ 3425,
949
+ 2899
950
  ],
951
  "step": 3870
952
  },
953
  {
954
  "epoch": 31.0,
955
+ "eval_bp": 0.8700925578924447,
956
  "eval_counts": [
957
+ 4463,
958
+ 3937,
959
+ 3411,
960
+ 2885
961
  ],
962
+ "eval_loss": 9.942305041477084e-05,
963
  "eval_precisions": [
964
+ 99.6872905963815,
965
+ 99.64565932675272,
966
+ 99.5912408759124,
967
+ 99.51707485339773
968
+ ],
969
+ "eval_ref_len": 5100,
970
+ "eval_runtime": 13.7812,
971
+ "eval_samples_per_second": 38.168,
972
+ "eval_score": 86.6701772747815,
973
+ "eval_steps_per_second": 1.234,
974
+ "eval_sys_len": 4477,
975
  "eval_totals": [
976
+ 4477,
977
+ 3951,
978
+ 3425,
979
+ 2899
980
  ],
981
  "step": 3999
982
  },
983
  {
984
  "epoch": 31.01,
985
  "learning_rate": 7.596899224806202e-06,
986
+ "loss": 0.0009,
987
  "step": 4000
988
  },
989
  {
990
  "epoch": 32.0,
991
+ "eval_bp": 0.8700925578924447,
992
  "eval_counts": [
993
+ 4463,
994
+ 3937,
995
+ 3411,
996
+ 2885
997
  ],
998
+ "eval_loss": 0.00020378571934998035,
999
  "eval_precisions": [
1000
+ 99.6872905963815,
1001
+ 99.64565932675272,
1002
+ 99.5912408759124,
1003
+ 99.51707485339773
1004
+ ],
1005
+ "eval_ref_len": 5100,
1006
+ "eval_runtime": 14.1117,
1007
+ "eval_samples_per_second": 37.274,
1008
+ "eval_score": 86.6701772747815,
1009
+ "eval_steps_per_second": 1.205,
1010
+ "eval_sys_len": 4477,
1011
  "eval_totals": [
1012
+ 4477,
1013
+ 3951,
1014
+ 3425,
1015
+ 2899
1016
  ],
1017
  "step": 4128
1018
  },
1019
  {
1020
  "epoch": 33.0,
1021
+ "eval_bp": 0.8700925578924447,
1022
  "eval_counts": [
1023
+ 4463,
1024
+ 3937,
1025
+ 3411,
1026
+ 2885
1027
  ],
1028
+ "eval_loss": 4.814989733858965e-05,
1029
  "eval_precisions": [
1030
+ 99.6872905963815,
1031
+ 99.64565932675272,
1032
+ 99.5912408759124,
1033
+ 99.51707485339773
1034
+ ],
1035
+ "eval_ref_len": 5100,
1036
+ "eval_runtime": 13.7623,
1037
+ "eval_samples_per_second": 38.22,
1038
+ "eval_score": 86.6701772747815,
1039
+ "eval_steps_per_second": 1.235,
1040
+ "eval_sys_len": 4477,
1041
  "eval_totals": [
1042
+ 4477,
1043
+ 3951,
1044
+ 3425,
1045
+ 2899
1046
  ],
1047
  "step": 4257
1048
  },
1049
  {
1050
  "epoch": 34.0,
1051
+ "eval_bp": 0.8700925578924447,
1052
  "eval_counts": [
1053
+ 4463,
1054
+ 3937,
1055
+ 3411,
1056
+ 2885
1057
  ],
1058
+ "eval_loss": 5.921188130741939e-05,
1059
  "eval_precisions": [
1060
+ 99.6872905963815,
1061
+ 99.64565932675272,
1062
+ 99.5912408759124,
1063
+ 99.51707485339773
1064
+ ],
1065
+ "eval_ref_len": 5100,
1066
+ "eval_runtime": 13.904,
1067
+ "eval_samples_per_second": 37.831,
1068
+ "eval_score": 86.6701772747815,
1069
+ "eval_steps_per_second": 1.223,
1070
+ "eval_sys_len": 4477,
1071
  "eval_totals": [
1072
+ 4477,
1073
+ 3951,
1074
+ 3425,
1075
+ 2899
1076
  ],
1077
  "step": 4386
1078
  },
1079
  {
1080
  "epoch": 34.88,
1081
  "learning_rate": 6.046511627906977e-06,
1082
+ "loss": 0.0008,
1083
  "step": 4500
1084
  },
1085
  {
1086
  "epoch": 35.0,
1087
+ "eval_bp": 0.8700925578924447,
1088
  "eval_counts": [
1089
+ 4463,
1090
+ 3937,
1091
+ 3411,
1092
+ 2885
1093
  ],
1094
+ "eval_loss": 2.948127621493768e-05,
1095
  "eval_precisions": [
1096
+ 99.6872905963815,
1097
+ 99.64565932675272,
1098
+ 99.5912408759124,
1099
+ 99.51707485339773
1100
+ ],
1101
+ "eval_ref_len": 5100,
1102
+ "eval_runtime": 14.005,
1103
+ "eval_samples_per_second": 37.558,
1104
+ "eval_score": 86.6701772747815,
1105
+ "eval_steps_per_second": 1.214,
1106
+ "eval_sys_len": 4477,
1107
  "eval_totals": [
1108
+ 4477,
1109
+ 3951,
1110
+ 3425,
1111
+ 2899
1112
  ],
1113
  "step": 4515
1114
  },
1115
  {
1116
  "epoch": 36.0,
1117
+ "eval_bp": 0.8700925578924447,
1118
  "eval_counts": [
1119
+ 4463,
1120
+ 3937,
1121
+ 3411,
1122
+ 2885
1123
  ],
1124
+ "eval_loss": 1.501874066889286e-05,
1125
  "eval_precisions": [
1126
+ 99.6872905963815,
1127
+ 99.64565932675272,
1128
+ 99.5912408759124,
1129
+ 99.51707485339773
1130
+ ],
1131
+ "eval_ref_len": 5100,
1132
+ "eval_runtime": 13.8563,
1133
+ "eval_samples_per_second": 37.961,
1134
+ "eval_score": 86.6701772747815,
1135
+ "eval_steps_per_second": 1.227,
1136
+ "eval_sys_len": 4477,
1137
  "eval_totals": [
1138
+ 4477,
1139
+ 3951,
1140
+ 3425,
1141
+ 2899
1142
  ],
1143
  "step": 4644
1144
  },
1145
  {
1146
  "epoch": 37.0,
1147
+ "eval_bp": 0.8700925578924447,
1148
  "eval_counts": [
1149
+ 4463,
1150
+ 3937,
1151
+ 3411,
1152
+ 2885
1153
  ],
1154
+ "eval_loss": 3.573419598978944e-05,
1155
  "eval_precisions": [
1156
+ 99.6872905963815,
1157
+ 99.64565932675272,
1158
+ 99.5912408759124,
1159
+ 99.51707485339773
1160
+ ],
1161
+ "eval_ref_len": 5100,
1162
+ "eval_runtime": 14.0819,
1163
+ "eval_samples_per_second": 37.353,
1164
+ "eval_score": 86.6701772747815,
1165
+ "eval_steps_per_second": 1.207,
1166
+ "eval_sys_len": 4477,
1167
  "eval_totals": [
1168
+ 4477,
1169
+ 3951,
1170
+ 3425,
1171
+ 2899
1172
  ],
1173
  "step": 4773
1174
  },
1175
  {
1176
  "epoch": 38.0,
1177
+ "eval_bp": 0.8700925578924447,
1178
  "eval_counts": [
1179
+ 4463,
1180
+ 3937,
1181
+ 3411,
1182
+ 2885
1183
  ],
1184
+ "eval_loss": 1.292789511353476e-05,
1185
  "eval_precisions": [
1186
+ 99.6872905963815,
1187
+ 99.64565932675272,
1188
+ 99.5912408759124,
1189
+ 99.51707485339773
1190
+ ],
1191
+ "eval_ref_len": 5100,
1192
+ "eval_runtime": 13.8393,
1193
+ "eval_samples_per_second": 38.008,
1194
+ "eval_score": 86.6701772747815,
1195
+ "eval_steps_per_second": 1.228,
1196
+ "eval_sys_len": 4477,
1197
  "eval_totals": [
1198
+ 4477,
1199
+ 3951,
1200
+ 3425,
1201
+ 2899
1202
  ],
1203
  "step": 4902
1204
  },
1205
  {
1206
  "epoch": 38.76,
1207
  "learning_rate": 4.4961240310077525e-06,
1208
+ "loss": 0.0006,
1209
  "step": 5000
1210
  },
1211
  {
1212
  "epoch": 39.0,
1213
+ "eval_bp": 0.8700925578924447,
1214
  "eval_counts": [
1215
+ 4463,
1216
+ 3937,
1217
+ 3411,
1218
+ 2885
1219
  ],
1220
+ "eval_loss": 2.826396666932851e-05,
1221
  "eval_precisions": [
1222
+ 99.6872905963815,
1223
+ 99.64565932675272,
1224
+ 99.5912408759124,
1225
+ 99.51707485339773
1226
+ ],
1227
+ "eval_ref_len": 5100,
1228
+ "eval_runtime": 14.2505,
1229
+ "eval_samples_per_second": 36.911,
1230
+ "eval_score": 86.6701772747815,
1231
+ "eval_steps_per_second": 1.193,
1232
+ "eval_sys_len": 4477,
1233
  "eval_totals": [
1234
+ 4477,
1235
+ 3951,
1236
+ 3425,
1237
+ 2899
1238
  ],
1239
  "step": 5031
1240
  },
1241
  {
1242
  "epoch": 40.0,
1243
+ "eval_bp": 0.8700925578924447,
1244
  "eval_counts": [
1245
+ 4463,
1246
+ 3937,
1247
+ 3411,
1248
+ 2885
1249
  ],
1250
+ "eval_loss": 9.735503226693254e-06,
1251
  "eval_precisions": [
1252
+ 99.6872905963815,
1253
+ 99.64565932675272,
1254
+ 99.5912408759124,
1255
+ 99.51707485339773
1256
+ ],
1257
+ "eval_ref_len": 5100,
1258
+ "eval_runtime": 13.8339,
1259
+ "eval_samples_per_second": 38.023,
1260
+ "eval_score": 86.6701772747815,
1261
+ "eval_steps_per_second": 1.229,
1262
+ "eval_sys_len": 4477,
1263
  "eval_totals": [
1264
+ 4477,
1265
+ 3951,
1266
+ 3425,
1267
+ 2899
1268
  ],
1269
  "step": 5160
1270
  },
1271
  {
1272
  "epoch": 41.0,
1273
+ "eval_bp": 0.8700925578924447,
1274
  "eval_counts": [
1275
+ 4463,
1276
+ 3937,
1277
+ 3411,
1278
+ 2885
1279
  ],
1280
+ "eval_loss": 5.421350579126738e-06,
1281
  "eval_precisions": [
1282
+ 99.6872905963815,
1283
+ 99.64565932675272,
1284
+ 99.5912408759124,
1285
+ 99.51707485339773
1286
+ ],
1287
+ "eval_ref_len": 5100,
1288
+ "eval_runtime": 13.9445,
1289
+ "eval_samples_per_second": 37.721,
1290
+ "eval_score": 86.6701772747815,
1291
+ "eval_steps_per_second": 1.219,
1292
+ "eval_sys_len": 4477,
1293
  "eval_totals": [
1294
+ 4477,
1295
+ 3951,
1296
+ 3425,
1297
+ 2899
1298
  ],
1299
  "step": 5289
1300
  },
1301
  {
1302
  "epoch": 42.0,
1303
+ "eval_bp": 0.8700925578924447,
1304
  "eval_counts": [
1305
+ 4463,
1306
+ 3937,
1307
+ 3411,
1308
+ 2885
1309
  ],
1310
+ "eval_loss": 1.1437254215707071e-05,
1311
  "eval_precisions": [
1312
+ 99.6872905963815,
1313
+ 99.64565932675272,
1314
+ 99.5912408759124,
1315
+ 99.51707485339773
1316
+ ],
1317
+ "eval_ref_len": 5100,
1318
+ "eval_runtime": 13.8327,
1319
+ "eval_samples_per_second": 38.026,
1320
+ "eval_score": 86.6701772747815,
1321
+ "eval_steps_per_second": 1.229,
1322
+ "eval_sys_len": 4477,
1323
  "eval_totals": [
1324
+ 4477,
1325
+ 3951,
1326
+ 3425,
1327
+ 2899
1328
  ],
1329
  "step": 5418
1330
  },
1331
  {
1332
  "epoch": 42.64,
1333
  "learning_rate": 2.9457364341085276e-06,
1334
+ "loss": 0.0004,
1335
  "step": 5500
1336
  },
1337
  {
1338
  "epoch": 43.0,
1339
+ "eval_bp": 0.8700925578924447,
1340
  "eval_counts": [
1341
+ 4463,
1342
+ 3937,
1343
+ 3411,
1344
+ 2885
1345
  ],
1346
+ "eval_loss": 1.4262999684433453e-05,
1347
  "eval_precisions": [
1348
+ 99.6872905963815,
1349
+ 99.64565932675272,
1350
+ 99.5912408759124,
1351
+ 99.51707485339773
1352
+ ],
1353
+ "eval_ref_len": 5100,
1354
+ "eval_runtime": 13.8768,
1355
+ "eval_samples_per_second": 37.905,
1356
+ "eval_score": 86.6701772747815,
1357
+ "eval_steps_per_second": 1.225,
1358
+ "eval_sys_len": 4477,
1359
  "eval_totals": [
1360
+ 4477,
1361
+ 3951,
1362
+ 3425,
1363
+ 2899
1364
  ],
1365
  "step": 5547
1366
  },
1367
  {
1368
  "epoch": 44.0,
1369
+ "eval_bp": 0.8700925578924447,
1370
  "eval_counts": [
1371
+ 4463,
1372
+ 3937,
1373
+ 3411,
1374
+ 2885
1375
  ],
1376
+ "eval_loss": 3.849239874398336e-06,
1377
  "eval_precisions": [
1378
+ 99.6872905963815,
1379
+ 99.64565932675272,
1380
+ 99.5912408759124,
1381
+ 99.51707485339773
1382
+ ],
1383
+ "eval_ref_len": 5100,
1384
+ "eval_runtime": 13.9608,
1385
+ "eval_samples_per_second": 37.677,
1386
+ "eval_score": 86.6701772747815,
1387
+ "eval_steps_per_second": 1.218,
1388
+ "eval_sys_len": 4477,
1389
  "eval_totals": [
1390
+ 4477,
1391
+ 3951,
1392
+ 3425,
1393
+ 2899
1394
  ],
1395
  "step": 5676
1396
  },
1397
  {
1398
  "epoch": 45.0,
1399
+ "eval_bp": 0.8700925578924447,
1400
  "eval_counts": [
1401
+ 4463,
1402
+ 3937,
1403
+ 3411,
1404
+ 2885
1405
  ],
1406
+ "eval_loss": 1.1063038982683793e-05,
1407
  "eval_precisions": [
1408
+ 99.6872905963815,
1409
+ 99.64565932675272,
1410
+ 99.5912408759124,
1411
+ 99.51707485339773
1412
+ ],
1413
+ "eval_ref_len": 5100,
1414
+ "eval_runtime": 13.8436,
1415
+ "eval_samples_per_second": 37.996,
1416
+ "eval_score": 86.6701772747815,
1417
+ "eval_steps_per_second": 1.228,
1418
+ "eval_sys_len": 4477,
1419
  "eval_totals": [
1420
+ 4477,
1421
+ 3951,
1422
+ 3425,
1423
+ 2899
1424
  ],
1425
  "step": 5805
1426
  },
1427
  {
1428
  "epoch": 46.0,
1429
+ "eval_bp": 0.8700925578924447,
1430
  "eval_counts": [
1431
+ 4463,
1432
+ 3937,
1433
+ 3411,
1434
+ 2885
1435
  ],
1436
+ "eval_loss": 7.035921953502111e-06,
1437
  "eval_precisions": [
1438
+ 99.6872905963815,
1439
+ 99.64565932675272,
1440
+ 99.5912408759124,
1441
+ 99.51707485339773
1442
+ ],
1443
+ "eval_ref_len": 5100,
1444
+ "eval_runtime": 14.1871,
1445
+ "eval_samples_per_second": 37.076,
1446
+ "eval_score": 86.6701772747815,
1447
+ "eval_steps_per_second": 1.198,
1448
+ "eval_sys_len": 4477,
1449
  "eval_totals": [
1450
+ 4477,
1451
+ 3951,
1452
+ 3425,
1453
+ 2899
1454
  ],
1455
  "step": 5934
1456
  },
1457
  {
1458
  "epoch": 46.51,
1459
  "learning_rate": 1.3953488372093025e-06,
1460
+ "loss": 0.0002,
1461
  "step": 6000
1462
  }
1463
  ],
 
1466
  "num_input_tokens_seen": 0,
1467
  "num_train_epochs": 50,
1468
  "save_steps": 500,
1469
+ "total_flos": 2.06928054010368e+16,
1470
  "train_batch_size": 32,
1471
  "trial_name": null,
1472
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a21ba4c3e1a448be45c882e3ac85afadb6467dd79e956054400475b6a2c7d49
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cabe380e4e6dc1d9abdb4a06b79362daebcc59f39eeff81c6e1fbf03582c7bc2
3
  size 4856