File size: 12,125 Bytes
4148993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
{
  "best_metric": 6.074151039123535,
  "best_model_checkpoint": "bill_sum_finetune_test_gpt2/checkpoint-528",
  "epoch": 33.0,
  "eval_steps": 500,
  "global_step": 528,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.843741416931152,
      "eval_rouge1": 0.4053,
      "eval_rouge2": 0.1708,
      "eval_rougeL": 0.2228,
      "eval_rougeLsum": 0.35,
      "eval_runtime": 21.0375,
      "eval_samples_per_second": 11.788,
      "eval_steps_per_second": 0.19,
      "step": 16
    },
    {
      "epoch": 2.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.511104583740234,
      "eval_rouge1": 0.3978,
      "eval_rouge2": 0.1673,
      "eval_rougeL": 0.2181,
      "eval_rougeLsum": 0.3434,
      "eval_runtime": 21.5368,
      "eval_samples_per_second": 11.515,
      "eval_steps_per_second": 0.186,
      "step": 32
    },
    {
      "epoch": 3.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.349050998687744,
      "eval_rouge1": 0.3988,
      "eval_rouge2": 0.1679,
      "eval_rougeL": 0.2188,
      "eval_rougeLsum": 0.3443,
      "eval_runtime": 21.2101,
      "eval_samples_per_second": 11.693,
      "eval_steps_per_second": 0.189,
      "step": 48
    },
    {
      "epoch": 4.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.258257865905762,
      "eval_rouge1": 0.3996,
      "eval_rouge2": 0.1681,
      "eval_rougeL": 0.2189,
      "eval_rougeLsum": 0.345,
      "eval_runtime": 21.3129,
      "eval_samples_per_second": 11.636,
      "eval_steps_per_second": 0.188,
      "step": 64
    },
    {
      "epoch": 5.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.200411796569824,
      "eval_rouge1": 0.3986,
      "eval_rouge2": 0.1677,
      "eval_rougeL": 0.2184,
      "eval_rougeLsum": 0.3439,
      "eval_runtime": 21.6445,
      "eval_samples_per_second": 11.458,
      "eval_steps_per_second": 0.185,
      "step": 80
    },
    {
      "epoch": 6.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.170421600341797,
      "eval_rouge1": 0.3981,
      "eval_rouge2": 0.1674,
      "eval_rougeL": 0.2178,
      "eval_rougeLsum": 0.3432,
      "eval_runtime": 21.242,
      "eval_samples_per_second": 11.675,
      "eval_steps_per_second": 0.188,
      "step": 96
    },
    {
      "epoch": 7.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.150304317474365,
      "eval_rouge1": 0.3976,
      "eval_rouge2": 0.1672,
      "eval_rougeL": 0.2176,
      "eval_rougeLsum": 0.3428,
      "eval_runtime": 21.3562,
      "eval_samples_per_second": 11.613,
      "eval_steps_per_second": 0.187,
      "step": 112
    },
    {
      "epoch": 8.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.135751724243164,
      "eval_rouge1": 0.3977,
      "eval_rouge2": 0.1672,
      "eval_rougeL": 0.2175,
      "eval_rougeLsum": 0.3427,
      "eval_runtime": 21.5836,
      "eval_samples_per_second": 11.49,
      "eval_steps_per_second": 0.185,
      "step": 128
    },
    {
      "epoch": 9.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.122563362121582,
      "eval_rouge1": 0.3977,
      "eval_rouge2": 0.1671,
      "eval_rougeL": 0.2171,
      "eval_rougeLsum": 0.3425,
      "eval_runtime": 21.5829,
      "eval_samples_per_second": 11.491,
      "eval_steps_per_second": 0.185,
      "step": 144
    },
    {
      "epoch": 10.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.114274501800537,
      "eval_rouge1": 0.397,
      "eval_rouge2": 0.1669,
      "eval_rougeL": 0.2174,
      "eval_rougeLsum": 0.3427,
      "eval_runtime": 21.508,
      "eval_samples_per_second": 11.531,
      "eval_steps_per_second": 0.186,
      "step": 160
    },
    {
      "epoch": 11.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.108905792236328,
      "eval_rouge1": 0.3973,
      "eval_rouge2": 0.167,
      "eval_rougeL": 0.2173,
      "eval_rougeLsum": 0.3427,
      "eval_runtime": 21.2386,
      "eval_samples_per_second": 11.677,
      "eval_steps_per_second": 0.188,
      "step": 176
    },
    {
      "epoch": 12.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.107725620269775,
      "eval_rouge1": 0.3974,
      "eval_rouge2": 0.167,
      "eval_rougeL": 0.2173,
      "eval_rougeLsum": 0.3426,
      "eval_runtime": 21.6952,
      "eval_samples_per_second": 11.431,
      "eval_steps_per_second": 0.184,
      "step": 192
    },
    {
      "epoch": 13.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.099628448486328,
      "eval_rouge1": 0.3976,
      "eval_rouge2": 0.167,
      "eval_rougeL": 0.2172,
      "eval_rougeLsum": 0.3428,
      "eval_runtime": 21.1438,
      "eval_samples_per_second": 11.729,
      "eval_steps_per_second": 0.189,
      "step": 208
    },
    {
      "epoch": 14.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.096395492553711,
      "eval_rouge1": 0.3975,
      "eval_rouge2": 0.167,
      "eval_rougeL": 0.2171,
      "eval_rougeLsum": 0.3426,
      "eval_runtime": 21.6504,
      "eval_samples_per_second": 11.455,
      "eval_steps_per_second": 0.185,
      "step": 224
    },
    {
      "epoch": 15.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.0916852951049805,
      "eval_rouge1": 0.3979,
      "eval_rouge2": 0.167,
      "eval_rougeL": 0.2168,
      "eval_rougeLsum": 0.3427,
      "eval_runtime": 21.4782,
      "eval_samples_per_second": 11.547,
      "eval_steps_per_second": 0.186,
      "step": 240
    },
    {
      "epoch": 16.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.090492248535156,
      "eval_rouge1": 0.3977,
      "eval_rouge2": 0.1672,
      "eval_rougeL": 0.2173,
      "eval_rougeLsum": 0.3428,
      "eval_runtime": 21.7128,
      "eval_samples_per_second": 11.422,
      "eval_steps_per_second": 0.184,
      "step": 256
    },
    {
      "epoch": 17.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.091054916381836,
      "eval_rouge1": 0.399,
      "eval_rouge2": 0.168,
      "eval_rougeL": 0.2176,
      "eval_rougeLsum": 0.3436,
      "eval_runtime": 21.2583,
      "eval_samples_per_second": 11.666,
      "eval_steps_per_second": 0.188,
      "step": 272
    },
    {
      "epoch": 18.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.0864386558532715,
      "eval_rouge1": 0.3985,
      "eval_rouge2": 0.1675,
      "eval_rougeL": 0.2172,
      "eval_rougeLsum": 0.3431,
      "eval_runtime": 21.4489,
      "eval_samples_per_second": 11.562,
      "eval_steps_per_second": 0.186,
      "step": 288
    },
    {
      "epoch": 19.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.082566261291504,
      "eval_rouge1": 0.4004,
      "eval_rouge2": 0.1686,
      "eval_rougeL": 0.2186,
      "eval_rougeLsum": 0.3451,
      "eval_runtime": 21.4779,
      "eval_samples_per_second": 11.547,
      "eval_steps_per_second": 0.186,
      "step": 304
    },
    {
      "epoch": 20.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.0813798904418945,
      "eval_rouge1": 0.4009,
      "eval_rouge2": 0.1689,
      "eval_rougeL": 0.2189,
      "eval_rougeLsum": 0.3454,
      "eval_runtime": 21.5568,
      "eval_samples_per_second": 11.504,
      "eval_steps_per_second": 0.186,
      "step": 320
    },
    {
      "epoch": 21.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.082016944885254,
      "eval_rouge1": 0.3999,
      "eval_rouge2": 0.1682,
      "eval_rougeL": 0.218,
      "eval_rougeLsum": 0.3444,
      "eval_runtime": 21.5727,
      "eval_samples_per_second": 11.496,
      "eval_steps_per_second": 0.185,
      "step": 336
    },
    {
      "epoch": 22.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.082878589630127,
      "eval_rouge1": 0.4076,
      "eval_rouge2": 0.1718,
      "eval_rougeL": 0.2222,
      "eval_rougeLsum": 0.3508,
      "eval_runtime": 20.8434,
      "eval_samples_per_second": 11.898,
      "eval_steps_per_second": 0.192,
      "step": 352
    },
    {
      "epoch": 23.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.080228805541992,
      "eval_rouge1": 0.405,
      "eval_rouge2": 0.1705,
      "eval_rougeL": 0.221,
      "eval_rougeLsum": 0.3488,
      "eval_runtime": 21.1916,
      "eval_samples_per_second": 11.703,
      "eval_steps_per_second": 0.189,
      "step": 368
    },
    {
      "epoch": 24.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.07808780670166,
      "eval_rouge1": 0.4052,
      "eval_rouge2": 0.1709,
      "eval_rougeL": 0.2212,
      "eval_rougeLsum": 0.3491,
      "eval_runtime": 21.3026,
      "eval_samples_per_second": 11.642,
      "eval_steps_per_second": 0.188,
      "step": 384
    },
    {
      "epoch": 25.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.077059268951416,
      "eval_rouge1": 0.4064,
      "eval_rouge2": 0.1711,
      "eval_rougeL": 0.2216,
      "eval_rougeLsum": 0.3498,
      "eval_runtime": 20.9702,
      "eval_samples_per_second": 11.826,
      "eval_steps_per_second": 0.191,
      "step": 400
    },
    {
      "epoch": 26.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.075596809387207,
      "eval_rouge1": 0.4086,
      "eval_rouge2": 0.1723,
      "eval_rougeL": 0.223,
      "eval_rougeLsum": 0.3517,
      "eval_runtime": 21.1984,
      "eval_samples_per_second": 11.699,
      "eval_steps_per_second": 0.189,
      "step": 416
    },
    {
      "epoch": 27.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.075705528259277,
      "eval_rouge1": 0.4075,
      "eval_rouge2": 0.1719,
      "eval_rougeL": 0.2224,
      "eval_rougeLsum": 0.3509,
      "eval_runtime": 20.7964,
      "eval_samples_per_second": 11.925,
      "eval_steps_per_second": 0.192,
      "step": 432
    },
    {
      "epoch": 28.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.075275421142578,
      "eval_rouge1": 0.4081,
      "eval_rouge2": 0.1722,
      "eval_rougeL": 0.2224,
      "eval_rougeLsum": 0.3509,
      "eval_runtime": 20.9972,
      "eval_samples_per_second": 11.811,
      "eval_steps_per_second": 0.191,
      "step": 448
    },
    {
      "epoch": 29.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.076692581176758,
      "eval_rouge1": 0.4132,
      "eval_rouge2": 0.1751,
      "eval_rougeL": 0.2258,
      "eval_rougeLsum": 0.3553,
      "eval_runtime": 21.0313,
      "eval_samples_per_second": 11.792,
      "eval_steps_per_second": 0.19,
      "step": 464
    },
    {
      "epoch": 30.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.075990676879883,
      "eval_rouge1": 0.4108,
      "eval_rouge2": 0.1737,
      "eval_rougeL": 0.2242,
      "eval_rougeLsum": 0.3533,
      "eval_runtime": 20.714,
      "eval_samples_per_second": 11.973,
      "eval_steps_per_second": 0.193,
      "step": 480
    },
    {
      "epoch": 31.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.074672222137451,
      "eval_rouge1": 0.4126,
      "eval_rouge2": 0.1747,
      "eval_rougeL": 0.2253,
      "eval_rougeLsum": 0.3546,
      "eval_runtime": 21.1511,
      "eval_samples_per_second": 11.725,
      "eval_steps_per_second": 0.189,
      "step": 496
    },
    {
      "epoch": 31.25,
      "grad_norm": 237350.25,
      "learning_rate": 2.1428571428571427e-06,
      "loss": 6.1153,
      "step": 500
    },
    {
      "epoch": 32.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.076193809509277,
      "eval_rouge1": 0.4119,
      "eval_rouge2": 0.1744,
      "eval_rougeL": 0.2248,
      "eval_rougeLsum": 0.3541,
      "eval_runtime": 20.5412,
      "eval_samples_per_second": 12.073,
      "eval_steps_per_second": 0.195,
      "step": 512
    },
    {
      "epoch": 33.0,
      "eval_gen_len": 600.0,
      "eval_loss": 6.074151039123535,
      "eval_rouge1": 0.4123,
      "eval_rouge2": 0.1746,
      "eval_rougeL": 0.2251,
      "eval_rougeLsum": 0.3545,
      "eval_runtime": 21.0056,
      "eval_samples_per_second": 11.806,
      "eval_steps_per_second": 0.19,
      "step": 528
    }
  ],
  "logging_steps": 500,
  "max_steps": 560,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 35,
  "save_steps": 500,
  "total_flos": 8527788048384000.0,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}