dq158 commited on
Commit
6ab882d
·
1 Parent(s): 36d06ca

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a719ae057e05af32962d3c0b0a042e87e0340cd8be875b7011b5d7c0a11eb6c
3
  size 1980860410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ab5328b63a8ec3b53c2f26a99111049d2792ac322fccebc0739829ebfab0879
3
  size 1980860410
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20dab4a02d1fc1823157099879eca284bd66f0c8febf3dba5cfc87be7c9c9028
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0afe6c8cd50d6bcf5ad6cb45258efb15f89d523168c877fac499ab3891b636e
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfda48251ec49dae4ce59144bf0a41ddf1eaebff873a2c756112a5149466e4b8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52a28bc3541f02e8de86363b8d2f634108f83fb8b7a33f774c760cce0869599d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5cbe3af0ede50d3acac3ddaa7d06c821cbab27a0479e23f99d3fb100db2c5e0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5afd8d09736ea35ab4f783803d2aca249bf98e9d0591be216198fe2ebe96a3c3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,684 +1,50 @@
1
  {
2
- "best_metric": 1.8320603370666504,
3
- "best_model_checkpoint": "dq158/morbius/checkpoint-47840",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 47840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.05,
13
- "learning_rate": 4.947742474916388e-05,
14
- "loss": 2.0308,
15
  "step": 500
16
  },
17
- {
18
- "epoch": 0.1,
19
- "learning_rate": 4.895484949832776e-05,
20
- "loss": 2.08,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 0.16,
25
- "learning_rate": 4.8432274247491646e-05,
26
- "loss": 2.0242,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 0.21,
31
- "learning_rate": 4.7909698996655525e-05,
32
- "loss": 2.0865,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 0.26,
37
- "learning_rate": 4.7387123745819403e-05,
38
- "loss": 2.0723,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 0.31,
43
- "learning_rate": 4.686454849498328e-05,
44
- "loss": 2.0951,
45
- "step": 3000
46
- },
47
- {
48
- "epoch": 0.37,
49
- "learning_rate": 4.6341973244147154e-05,
50
- "loss": 2.1049,
51
- "step": 3500
52
- },
53
- {
54
- "epoch": 0.42,
55
- "learning_rate": 4.581939799331103e-05,
56
- "loss": 2.1124,
57
- "step": 4000
58
- },
59
- {
60
- "epoch": 0.47,
61
- "learning_rate": 4.529682274247492e-05,
62
- "loss": 2.0989,
63
- "step": 4500
64
- },
65
- {
66
- "epoch": 0.52,
67
- "learning_rate": 4.47742474916388e-05,
68
- "loss": 2.1086,
69
- "step": 5000
70
- },
71
- {
72
- "epoch": 0.57,
73
- "learning_rate": 4.425167224080268e-05,
74
- "loss": 2.0899,
75
- "step": 5500
76
- },
77
- {
78
- "epoch": 0.63,
79
- "learning_rate": 4.3729096989966556e-05,
80
- "loss": 2.0607,
81
- "step": 6000
82
- },
83
  {
84
  "epoch": 0.68,
85
- "learning_rate": 4.3206521739130434e-05,
86
- "loss": 2.0947,
87
- "step": 6500
88
- },
89
- {
90
- "epoch": 0.73,
91
- "learning_rate": 4.268394648829432e-05,
92
- "loss": 2.0447,
93
- "step": 7000
94
- },
95
- {
96
- "epoch": 0.78,
97
- "learning_rate": 4.21613712374582e-05,
98
- "loss": 2.0825,
99
- "step": 7500
100
- },
101
- {
102
- "epoch": 0.84,
103
- "learning_rate": 4.163879598662208e-05,
104
- "loss": 2.077,
105
- "step": 8000
106
- },
107
- {
108
- "epoch": 0.89,
109
- "learning_rate": 4.111622073578596e-05,
110
- "loss": 2.0956,
111
- "step": 8500
112
- },
113
- {
114
- "epoch": 0.94,
115
- "learning_rate": 4.0593645484949835e-05,
116
- "loss": 2.0863,
117
- "step": 9000
118
- },
119
- {
120
- "epoch": 0.99,
121
- "learning_rate": 4.0071070234113714e-05,
122
- "loss": 2.0549,
123
- "step": 9500
124
  },
125
  {
126
  "epoch": 1.0,
127
- "eval_bleu": 0.08495831954684596,
128
- "eval_brevity_penalty": 0.7702011355400264,
129
- "eval_length_ratio": 0.7929562751828085,
130
- "eval_loss": 1.835176944732666,
131
- "eval_precisions": [
132
- 0.1860885275519422,
133
- 0.11627011709351441,
134
- 0.08451872485298668,
135
- 0.08095923402255639
136
- ],
137
- "eval_reference_length": 134020,
138
- "eval_runtime": 811.6945,
139
- "eval_samples_per_second": 15.717,
140
- "eval_steps_per_second": 1.311,
141
- "eval_translation_length": 106272,
142
- "step": 9568
143
- },
144
- {
145
- "epoch": 1.05,
146
- "learning_rate": 3.954849498327759e-05,
147
- "loss": 2.0367,
148
- "step": 10000
149
- },
150
- {
151
- "epoch": 1.1,
152
- "learning_rate": 3.902591973244147e-05,
153
- "loss": 2.0379,
154
- "step": 10500
155
- },
156
- {
157
- "epoch": 1.15,
158
- "learning_rate": 3.850334448160535e-05,
159
- "loss": 1.9553,
160
- "step": 11000
161
- },
162
- {
163
- "epoch": 1.2,
164
- "learning_rate": 3.798076923076923e-05,
165
- "loss": 1.9742,
166
- "step": 11500
167
- },
168
- {
169
- "epoch": 1.25,
170
- "learning_rate": 3.745819397993311e-05,
171
- "loss": 1.9989,
172
- "step": 12000
173
- },
174
- {
175
- "epoch": 1.31,
176
- "learning_rate": 3.6935618729096994e-05,
177
- "loss": 2.0074,
178
- "step": 12500
179
- },
180
- {
181
- "epoch": 1.36,
182
- "learning_rate": 3.641304347826087e-05,
183
- "loss": 2.04,
184
- "step": 13000
185
- },
186
- {
187
- "epoch": 1.41,
188
- "learning_rate": 3.589046822742475e-05,
189
- "loss": 1.9847,
190
- "step": 13500
191
- },
192
- {
193
- "epoch": 1.46,
194
- "learning_rate": 3.536789297658863e-05,
195
- "loss": 1.9823,
196
- "step": 14000
197
- },
198
- {
199
- "epoch": 1.52,
200
- "learning_rate": 3.484531772575251e-05,
201
- "loss": 2.034,
202
- "step": 14500
203
- },
204
- {
205
- "epoch": 1.57,
206
- "learning_rate": 3.432274247491639e-05,
207
- "loss": 2.0177,
208
- "step": 15000
209
- },
210
- {
211
- "epoch": 1.62,
212
- "learning_rate": 3.380016722408027e-05,
213
- "loss": 2.0167,
214
- "step": 15500
215
- },
216
- {
217
- "epoch": 1.67,
218
- "learning_rate": 3.3277591973244146e-05,
219
- "loss": 2.0235,
220
- "step": 16000
221
- },
222
- {
223
- "epoch": 1.72,
224
- "learning_rate": 3.2755016722408025e-05,
225
- "loss": 2.0242,
226
- "step": 16500
227
- },
228
- {
229
- "epoch": 1.78,
230
- "learning_rate": 3.2232441471571904e-05,
231
- "loss": 2.0119,
232
- "step": 17000
233
- },
234
- {
235
- "epoch": 1.83,
236
- "learning_rate": 3.170986622073579e-05,
237
- "loss": 1.9696,
238
- "step": 17500
239
- },
240
- {
241
- "epoch": 1.88,
242
- "learning_rate": 3.118729096989967e-05,
243
- "loss": 2.0214,
244
- "step": 18000
245
- },
246
- {
247
- "epoch": 1.93,
248
- "learning_rate": 3.066471571906355e-05,
249
- "loss": 2.0348,
250
- "step": 18500
251
- },
252
- {
253
- "epoch": 1.99,
254
- "learning_rate": 3.0142140468227426e-05,
255
- "loss": 1.9923,
256
- "step": 19000
257
- },
258
- {
259
- "epoch": 2.0,
260
- "eval_bleu": 0.08658597339938905,
261
- "eval_brevity_penalty": 0.7517944633429557,
262
- "eval_length_ratio": 0.7780331293836741,
263
- "eval_loss": 1.83921480178833,
264
- "eval_precisions": [
265
- 0.1897824919441461,
266
- 0.11953231710648528,
267
- 0.0894031328543502,
268
- 0.08675598838616018
269
- ],
270
- "eval_reference_length": 134020,
271
- "eval_runtime": 809.9384,
272
- "eval_samples_per_second": 15.751,
273
- "eval_steps_per_second": 1.314,
274
- "eval_translation_length": 104272,
275
- "step": 19136
276
- },
277
- {
278
- "epoch": 2.04,
279
- "learning_rate": 2.9619565217391305e-05,
280
- "loss": 1.9566,
281
- "step": 19500
282
- },
283
- {
284
- "epoch": 2.09,
285
- "learning_rate": 2.9096989966555184e-05,
286
- "loss": 1.9582,
287
- "step": 20000
288
- },
289
- {
290
- "epoch": 2.14,
291
- "learning_rate": 2.8574414715719066e-05,
292
- "loss": 1.9681,
293
- "step": 20500
294
- },
295
- {
296
- "epoch": 2.19,
297
- "learning_rate": 2.8051839464882945e-05,
298
- "loss": 1.9374,
299
- "step": 21000
300
- },
301
- {
302
- "epoch": 2.25,
303
- "learning_rate": 2.7529264214046824e-05,
304
- "loss": 1.9412,
305
- "step": 21500
306
- },
307
- {
308
- "epoch": 2.3,
309
- "learning_rate": 2.7006688963210703e-05,
310
- "loss": 1.9557,
311
- "step": 22000
312
- },
313
- {
314
- "epoch": 2.35,
315
- "learning_rate": 2.6484113712374582e-05,
316
- "loss": 1.9684,
317
- "step": 22500
318
- },
319
- {
320
- "epoch": 2.4,
321
- "learning_rate": 2.5961538461538464e-05,
322
- "loss": 1.9307,
323
- "step": 23000
324
- },
325
- {
326
- "epoch": 2.46,
327
- "learning_rate": 2.5438963210702343e-05,
328
- "loss": 1.9237,
329
- "step": 23500
330
- },
331
- {
332
- "epoch": 2.51,
333
- "learning_rate": 2.491638795986622e-05,
334
- "loss": 1.9608,
335
- "step": 24000
336
- },
337
- {
338
- "epoch": 2.56,
339
- "learning_rate": 2.43938127090301e-05,
340
- "loss": 1.9549,
341
- "step": 24500
342
- },
343
- {
344
- "epoch": 2.61,
345
- "learning_rate": 2.3871237458193983e-05,
346
- "loss": 1.9211,
347
- "step": 25000
348
- },
349
- {
350
- "epoch": 2.67,
351
- "learning_rate": 2.334866220735786e-05,
352
- "loss": 1.9698,
353
- "step": 25500
354
- },
355
- {
356
- "epoch": 2.72,
357
- "learning_rate": 2.282608695652174e-05,
358
- "loss": 1.9413,
359
- "step": 26000
360
- },
361
- {
362
- "epoch": 2.77,
363
- "learning_rate": 2.230351170568562e-05,
364
- "loss": 1.9943,
365
- "step": 26500
366
- },
367
- {
368
- "epoch": 2.82,
369
- "learning_rate": 2.1780936454849498e-05,
370
- "loss": 1.938,
371
- "step": 27000
372
- },
373
- {
374
- "epoch": 2.87,
375
- "learning_rate": 2.125836120401338e-05,
376
- "loss": 1.987,
377
- "step": 27500
378
- },
379
- {
380
- "epoch": 2.93,
381
- "learning_rate": 2.073578595317726e-05,
382
- "loss": 1.9455,
383
- "step": 28000
384
- },
385
- {
386
- "epoch": 2.98,
387
- "learning_rate": 2.0213210702341138e-05,
388
- "loss": 1.9788,
389
- "step": 28500
390
- },
391
- {
392
- "epoch": 3.0,
393
- "eval_bleu": 0.08712036412034174,
394
- "eval_brevity_penalty": 0.7810596870491452,
395
- "eval_length_ratio": 0.8018579316519923,
396
- "eval_loss": 1.8364616632461548,
397
- "eval_precisions": [
398
- 0.1853626762201647,
399
- 0.1156396502935338,
400
- 0.08657517535834096,
401
- 0.08341024457775727
402
- ],
403
- "eval_reference_length": 134020,
404
- "eval_runtime": 809.3386,
405
- "eval_samples_per_second": 15.762,
406
- "eval_steps_per_second": 1.315,
407
- "eval_translation_length": 107465,
408
- "step": 28704
409
- },
410
- {
411
- "epoch": 3.03,
412
- "learning_rate": 1.9690635451505017e-05,
413
- "loss": 1.9465,
414
- "step": 29000
415
- },
416
- {
417
- "epoch": 3.08,
418
- "learning_rate": 1.9168060200668896e-05,
419
- "loss": 1.9071,
420
- "step": 29500
421
- },
422
- {
423
- "epoch": 3.14,
424
- "learning_rate": 1.8645484949832775e-05,
425
- "loss": 1.893,
426
- "step": 30000
427
- },
428
- {
429
- "epoch": 3.19,
430
- "learning_rate": 1.8122909698996657e-05,
431
- "loss": 1.8895,
432
- "step": 30500
433
- },
434
- {
435
- "epoch": 3.24,
436
- "learning_rate": 1.7600334448160536e-05,
437
- "loss": 1.8914,
438
- "step": 31000
439
- },
440
- {
441
- "epoch": 3.29,
442
- "learning_rate": 1.7077759197324418e-05,
443
- "loss": 1.9214,
444
- "step": 31500
445
- },
446
- {
447
- "epoch": 3.34,
448
- "learning_rate": 1.6555183946488294e-05,
449
- "loss": 1.8911,
450
- "step": 32000
451
- },
452
- {
453
- "epoch": 3.4,
454
- "learning_rate": 1.6032608695652173e-05,
455
- "loss": 1.9149,
456
- "step": 32500
457
- },
458
- {
459
- "epoch": 3.45,
460
- "learning_rate": 1.5510033444816055e-05,
461
- "loss": 1.902,
462
- "step": 33000
463
- },
464
- {
465
- "epoch": 3.5,
466
- "learning_rate": 1.4987458193979934e-05,
467
- "loss": 1.9066,
468
- "step": 33500
469
- },
470
- {
471
- "epoch": 3.55,
472
- "learning_rate": 1.4464882943143812e-05,
473
- "loss": 1.9027,
474
- "step": 34000
475
- },
476
- {
477
- "epoch": 3.61,
478
- "learning_rate": 1.3942307692307693e-05,
479
- "loss": 1.9232,
480
- "step": 34500
481
- },
482
- {
483
- "epoch": 3.66,
484
- "learning_rate": 1.3419732441471572e-05,
485
- "loss": 1.926,
486
- "step": 35000
487
- },
488
- {
489
- "epoch": 3.71,
490
- "learning_rate": 1.2897157190635452e-05,
491
- "loss": 1.87,
492
- "step": 35500
493
- },
494
- {
495
- "epoch": 3.76,
496
- "learning_rate": 1.2374581939799331e-05,
497
- "loss": 1.9055,
498
- "step": 36000
499
- },
500
- {
501
- "epoch": 3.81,
502
- "learning_rate": 1.1852006688963212e-05,
503
- "loss": 1.952,
504
- "step": 36500
505
- },
506
- {
507
- "epoch": 3.87,
508
- "learning_rate": 1.132943143812709e-05,
509
- "loss": 1.8896,
510
- "step": 37000
511
- },
512
- {
513
- "epoch": 3.92,
514
- "learning_rate": 1.080685618729097e-05,
515
- "loss": 1.9144,
516
- "step": 37500
517
- },
518
- {
519
- "epoch": 3.97,
520
- "learning_rate": 1.028428093645485e-05,
521
- "loss": 1.9447,
522
- "step": 38000
523
- },
524
- {
525
- "epoch": 4.0,
526
- "eval_bleu": 0.08669705678202416,
527
- "eval_brevity_penalty": 0.7634478532624474,
528
- "eval_length_ratio": 0.7874570959558275,
529
- "eval_loss": 1.8330533504486084,
530
- "eval_precisions": [
531
- 0.1886293646657507,
532
- 0.11698894134385307,
533
- 0.08828452928243054,
534
- 0.08536133232489508
535
- ],
536
- "eval_reference_length": 134020,
537
- "eval_runtime": 811.3896,
538
- "eval_samples_per_second": 15.722,
539
- "eval_steps_per_second": 1.311,
540
- "eval_translation_length": 105535,
541
- "step": 38272
542
- },
543
- {
544
- "epoch": 4.02,
545
- "learning_rate": 9.76170568561873e-06,
546
- "loss": 1.9148,
547
- "step": 38500
548
- },
549
- {
550
- "epoch": 4.08,
551
- "learning_rate": 9.239130434782608e-06,
552
- "loss": 1.9031,
553
- "step": 39000
554
- },
555
- {
556
- "epoch": 4.13,
557
- "learning_rate": 8.716555183946488e-06,
558
- "loss": 1.8884,
559
- "step": 39500
560
- },
561
- {
562
- "epoch": 4.18,
563
- "learning_rate": 8.193979933110369e-06,
564
- "loss": 1.8267,
565
- "step": 40000
566
- },
567
- {
568
- "epoch": 4.23,
569
- "learning_rate": 7.671404682274248e-06,
570
- "loss": 1.8556,
571
- "step": 40500
572
- },
573
- {
574
- "epoch": 4.29,
575
- "learning_rate": 7.148829431438127e-06,
576
- "loss": 1.9098,
577
- "step": 41000
578
- },
579
- {
580
- "epoch": 4.34,
581
- "learning_rate": 6.6262541806020064e-06,
582
- "loss": 1.8693,
583
- "step": 41500
584
- },
585
- {
586
- "epoch": 4.39,
587
- "learning_rate": 6.103678929765887e-06,
588
- "loss": 1.8622,
589
- "step": 42000
590
- },
591
- {
592
- "epoch": 4.44,
593
- "learning_rate": 5.581103678929766e-06,
594
- "loss": 1.8848,
595
- "step": 42500
596
- },
597
- {
598
- "epoch": 4.49,
599
- "learning_rate": 5.0585284280936456e-06,
600
- "loss": 1.8998,
601
- "step": 43000
602
- },
603
- {
604
- "epoch": 4.55,
605
- "learning_rate": 4.535953177257525e-06,
606
- "loss": 1.9059,
607
- "step": 43500
608
- },
609
- {
610
- "epoch": 4.6,
611
- "learning_rate": 4.013377926421405e-06,
612
- "loss": 1.8852,
613
- "step": 44000
614
- },
615
- {
616
- "epoch": 4.65,
617
- "learning_rate": 3.4908026755852843e-06,
618
- "loss": 1.8984,
619
- "step": 44500
620
- },
621
- {
622
- "epoch": 4.7,
623
- "learning_rate": 2.968227424749164e-06,
624
- "loss": 1.8517,
625
- "step": 45000
626
- },
627
- {
628
- "epoch": 4.76,
629
- "learning_rate": 2.4456521739130437e-06,
630
- "loss": 1.917,
631
- "step": 45500
632
- },
633
- {
634
- "epoch": 4.81,
635
- "learning_rate": 1.9230769230769234e-06,
636
- "loss": 1.9134,
637
- "step": 46000
638
- },
639
- {
640
- "epoch": 4.86,
641
- "learning_rate": 1.4005016722408027e-06,
642
- "loss": 1.8772,
643
- "step": 46500
644
- },
645
- {
646
- "epoch": 4.91,
647
- "learning_rate": 8.779264214046823e-07,
648
- "loss": 1.8938,
649
- "step": 47000
650
- },
651
- {
652
- "epoch": 4.96,
653
- "learning_rate": 3.553511705685619e-07,
654
- "loss": 1.8652,
655
- "step": 47500
656
- },
657
- {
658
- "epoch": 5.0,
659
- "eval_bleu": 0.08741495854193439,
660
- "eval_brevity_penalty": 0.7850055548072304,
661
- "eval_length_ratio": 0.8051111774362035,
662
- "eval_loss": 1.8320603370666504,
663
  "eval_precisions": [
664
- 0.1860501756239516,
665
- 0.11514125956444968,
666
- 0.08657509646419298,
667
- 0.08290830945558739
668
  ],
669
- "eval_reference_length": 134020,
670
- "eval_runtime": 812.8238,
671
- "eval_samples_per_second": 15.695,
672
- "eval_steps_per_second": 1.309,
673
- "eval_translation_length": 107901,
674
- "step": 47840
675
  }
676
  ],
677
  "logging_steps": 500,
678
- "max_steps": 47840,
679
  "num_train_epochs": 5,
680
  "save_steps": 500,
681
- "total_flos": 3.9307126302572544e+17,
682
  "trial_name": null,
683
  "trial_params": null
684
  }
 
1
  {
2
+ "best_metric": 1.4803038835525513,
3
+ "best_model_checkpoint": "dq158/morbius/checkpoint-1475",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 1475,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.34,
13
+ "learning_rate": 4.6610169491525425e-05,
14
+ "loss": 1.8922,
15
  "step": 500
16
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  {
18
  "epoch": 0.68,
19
+ "learning_rate": 4.3220338983050854e-05,
20
+ "loss": 1.7522,
21
+ "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  },
23
  {
24
  "epoch": 1.0,
25
+ "eval_bleu": 1.0,
26
+ "eval_brevity_penalty": 1.0,
27
+ "eval_length_ratio": 1.0,
28
+ "eval_loss": 1.4803038835525513,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "eval_precisions": [
30
+ 1.0,
31
+ 1.0,
32
+ 1.0,
33
+ 1.0
34
  ],
35
+ "eval_reference_length": 35996,
36
+ "eval_runtime": 295.5668,
37
+ "eval_samples_per_second": 6.652,
38
+ "eval_steps_per_second": 0.555,
39
+ "eval_translation_length": 35996,
40
+ "step": 1475
41
  }
42
  ],
43
  "logging_steps": 500,
44
+ "max_steps": 7375,
45
  "num_train_epochs": 5,
46
  "save_steps": 500,
47
+ "total_flos": 1.2116096594214912e+16,
48
  "trial_name": null,
49
  "trial_params": null
50
  }