ShengdingHu commited on
Commit
2b1d8be
·
1 Parent(s): 4617467

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 80.0,
3
- "eval_accuracy": 0.8050541516245487,
4
- "eval_loss": 2.2128214836120605,
5
- "eval_runtime": 0.2593,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 1068.383,
8
- "eval_steps_per_second": 11.571,
9
- "train_loss": 0.10918387793577634,
10
- "train_runtime": 662.0132,
11
  "train_samples": 2490,
12
- "train_samples_per_second": 300.9,
13
- "train_steps_per_second": 9.426
14
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.7472924187725631,
4
+ "eval_loss": 0.5988962054252625,
5
+ "eval_runtime": 0.3009,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 920.67,
8
+ "eval_steps_per_second": 9.971,
9
+ "train_loss": 0.49518577380058093,
10
+ "train_runtime": 60.2408,
11
  "train_samples": 2490,
12
+ "train_samples_per_second": 413.341,
13
+ "train_steps_per_second": 12.948
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 80.0,
3
- "eval_accuracy": 0.8050541516245487,
4
- "eval_loss": 2.2128214836120605,
5
- "eval_runtime": 0.2593,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 1068.383,
8
- "eval_steps_per_second": 11.571
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.7472924187725631,
4
+ "eval_loss": 0.5988962054252625,
5
+ "eval_runtime": 0.3009,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 920.67,
8
+ "eval_steps_per_second": 9.971
9
  }
runs/Jan15_14-25-45_node3/events.out.tfevents.1642227962.node3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90d634be7b73075df444dc666c23dfb608d257c252ddae5a3439bfde0e0dde3d
3
- size 3516
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86ec7a2500b6f80fe87d847cac323f7130a3b730e48ada599a2f9910622e4b9c
3
+ size 7251
runs/Jan15_14-25-45_node3/events.out.tfevents.1642228023.node3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15611aaa3f81743fc33924ae0afe13a8d36e79351c1a1dbe127cd16c92b40f95
3
+ size 363
runs/Jan15_14-45-51_node3/1642229168.4472055/events.out.tfevents.1642229168.node3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d75d791374ffedd3a37ac7babefce0461663bb02452e13316e0c2270b70724
3
+ size 4585
runs/Jan15_14-45-51_node3/events.out.tfevents.1642229168.node3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ccabbb2c70db197b63cbdce1e1ed6de1e4e28d706aaed1028c4ffd55b584048
3
+ size 3516
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 80.0,
3
- "train_loss": 0.10918387793577634,
4
- "train_runtime": 662.0132,
5
  "train_samples": 2490,
6
- "train_samples_per_second": 300.9,
7
- "train_steps_per_second": 9.426
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.49518577380058093,
4
+ "train_runtime": 60.2408,
5
  "train_samples": 2490,
6
+ "train_samples_per_second": 413.341,
7
+ "train_steps_per_second": 12.948
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.8050541516245487,
3
- "best_model_checkpoint": "outputs/lora/roberta-base/v2/rte/checkpoint-3900",
4
- "epoch": 80.0,
5
- "global_step": 6240,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,808 +10,112 @@
10
  {
11
  "epoch": 1.0,
12
  "eval_accuracy": 0.4729241877256318,
13
- "eval_loss": 0.6956132650375366,
14
- "eval_runtime": 0.247,
15
- "eval_samples_per_second": 1121.502,
16
- "eval_steps_per_second": 12.146,
17
  "step": 78
18
  },
19
  {
20
  "epoch": 2.0,
21
- "eval_accuracy": 0.4729241877256318,
22
- "eval_loss": 0.6990882158279419,
23
- "eval_runtime": 0.2312,
24
- "eval_samples_per_second": 1197.857,
25
- "eval_steps_per_second": 12.973,
26
  "step": 156
27
  },
28
  {
29
  "epoch": 3.0,
30
- "eval_accuracy": 0.6353790613718412,
31
- "eval_loss": 0.6490963101387024,
32
- "eval_runtime": 0.23,
33
- "eval_samples_per_second": 1204.444,
34
- "eval_steps_per_second": 13.045,
35
  "step": 234
36
  },
37
  {
38
  "epoch": 4.0,
39
- "eval_accuracy": 0.6823104693140795,
40
- "eval_loss": 0.607087254524231,
41
- "eval_runtime": 0.2281,
42
- "eval_samples_per_second": 1214.339,
43
- "eval_steps_per_second": 13.152,
44
  "step": 312
45
  },
46
  {
47
  "epoch": 5.0,
48
- "eval_accuracy": 0.7364620938628159,
49
- "eval_loss": 0.486316442489624,
50
- "eval_runtime": 0.231,
51
- "eval_samples_per_second": 1199.159,
52
- "eval_steps_per_second": 12.987,
53
  "step": 390
54
  },
55
  {
56
  "epoch": 6.0,
57
- "eval_accuracy": 0.7075812274368231,
58
- "eval_loss": 0.5595567226409912,
59
- "eval_runtime": 0.2322,
60
- "eval_samples_per_second": 1192.954,
61
- "eval_steps_per_second": 12.92,
62
  "step": 468
63
  },
64
  {
65
  "epoch": 6.41,
66
- "learning_rate": 0.0004893435635123615,
67
- "loss": 0.6104,
68
  "step": 500
69
  },
70
  {
71
  "epoch": 7.0,
72
- "eval_accuracy": 0.7220216606498195,
73
- "eval_loss": 0.5245415568351746,
74
- "eval_runtime": 0.2389,
75
- "eval_samples_per_second": 1159.31,
76
- "eval_steps_per_second": 12.556,
77
  "step": 546
78
  },
79
  {
80
  "epoch": 8.0,
81
- "eval_accuracy": 0.7581227436823105,
82
- "eval_loss": 0.54262775182724,
83
- "eval_runtime": 0.3139,
84
- "eval_samples_per_second": 882.519,
85
- "eval_steps_per_second": 9.558,
86
  "step": 624
87
  },
88
  {
89
  "epoch": 9.0,
90
- "eval_accuracy": 0.7220216606498195,
91
- "eval_loss": 0.7142526507377625,
92
- "eval_runtime": 0.3151,
93
- "eval_samples_per_second": 879.035,
94
- "eval_steps_per_second": 9.52,
95
  "step": 702
96
  },
97
  {
98
  "epoch": 10.0,
99
- "eval_accuracy": 0.7545126353790613,
100
- "eval_loss": 0.6892580986022949,
101
- "eval_runtime": 0.2701,
102
- "eval_samples_per_second": 1025.641,
103
- "eval_steps_per_second": 11.108,
104
  "step": 780
105
  },
106
  {
107
- "epoch": 11.0,
108
- "eval_accuracy": 0.7725631768953068,
109
- "eval_loss": 0.6529865860939026,
110
- "eval_runtime": 0.2299,
111
- "eval_samples_per_second": 1204.789,
112
- "eval_steps_per_second": 13.048,
113
- "step": 858
114
- },
115
- {
116
- "epoch": 12.0,
117
- "eval_accuracy": 0.7545126353790613,
118
- "eval_loss": 0.7537366151809692,
119
- "eval_runtime": 0.2349,
120
- "eval_samples_per_second": 1179.15,
121
- "eval_steps_per_second": 12.771,
122
- "step": 936
123
- },
124
- {
125
- "epoch": 12.82,
126
- "learning_rate": 0.00044671781756180733,
127
- "loss": 0.3007,
128
- "step": 1000
129
- },
130
- {
131
- "epoch": 13.0,
132
- "eval_accuracy": 0.7653429602888087,
133
- "eval_loss": 0.8136062622070312,
134
- "eval_runtime": 0.2324,
135
- "eval_samples_per_second": 1191.907,
136
- "eval_steps_per_second": 12.909,
137
- "step": 1014
138
- },
139
- {
140
- "epoch": 14.0,
141
- "eval_accuracy": 0.7003610108303249,
142
- "eval_loss": 1.2378367185592651,
143
- "eval_runtime": 0.2301,
144
- "eval_samples_per_second": 1203.951,
145
- "eval_steps_per_second": 13.039,
146
- "step": 1092
147
- },
148
- {
149
- "epoch": 15.0,
150
- "eval_accuracy": 0.7689530685920578,
151
- "eval_loss": 0.787124514579773,
152
- "eval_runtime": 0.2444,
153
- "eval_samples_per_second": 1133.544,
154
- "eval_steps_per_second": 12.277,
155
- "step": 1170
156
- },
157
- {
158
- "epoch": 16.0,
159
- "eval_accuracy": 0.7545126353790613,
160
- "eval_loss": 1.070081114768982,
161
- "eval_runtime": 0.2359,
162
- "eval_samples_per_second": 1174.133,
163
- "eval_steps_per_second": 12.716,
164
- "step": 1248
165
- },
166
- {
167
- "epoch": 17.0,
168
- "eval_accuracy": 0.776173285198556,
169
- "eval_loss": 0.840284526348114,
170
- "eval_runtime": 0.3239,
171
- "eval_samples_per_second": 855.277,
172
- "eval_steps_per_second": 9.263,
173
- "step": 1326
174
- },
175
- {
176
- "epoch": 18.0,
177
- "eval_accuracy": 0.7364620938628159,
178
- "eval_loss": 1.004836082458496,
179
- "eval_runtime": 0.2349,
180
- "eval_samples_per_second": 1179.426,
181
- "eval_steps_per_second": 12.774,
182
- "step": 1404
183
- },
184
- {
185
- "epoch": 19.0,
186
- "eval_accuracy": 0.7725631768953068,
187
- "eval_loss": 1.151750087738037,
188
- "eval_runtime": 0.2448,
189
- "eval_samples_per_second": 1131.443,
190
- "eval_steps_per_second": 12.254,
191
- "step": 1482
192
- },
193
- {
194
- "epoch": 19.23,
195
- "learning_rate": 0.0004040920716112532,
196
- "loss": 0.1396,
197
- "step": 1500
198
- },
199
- {
200
- "epoch": 20.0,
201
- "eval_accuracy": 0.7509025270758123,
202
- "eval_loss": 1.3428694009780884,
203
- "eval_runtime": 0.2276,
204
- "eval_samples_per_second": 1217.064,
205
- "eval_steps_per_second": 13.181,
206
- "step": 1560
207
- },
208
- {
209
- "epoch": 21.0,
210
- "eval_accuracy": 0.7617328519855595,
211
- "eval_loss": 1.3712104558944702,
212
- "eval_runtime": 0.3329,
213
- "eval_samples_per_second": 832.029,
214
- "eval_steps_per_second": 9.011,
215
- "step": 1638
216
- },
217
- {
218
- "epoch": 22.0,
219
- "eval_accuracy": 0.740072202166065,
220
- "eval_loss": 1.764930009841919,
221
- "eval_runtime": 0.2783,
222
- "eval_samples_per_second": 995.158,
223
- "eval_steps_per_second": 10.778,
224
- "step": 1716
225
- },
226
- {
227
- "epoch": 23.0,
228
- "eval_accuracy": 0.7436823104693141,
229
- "eval_loss": 1.5562723875045776,
230
- "eval_runtime": 0.2305,
231
- "eval_samples_per_second": 1201.943,
232
- "eval_steps_per_second": 13.017,
233
- "step": 1794
234
- },
235
- {
236
- "epoch": 24.0,
237
- "eval_accuracy": 0.7653429602888087,
238
- "eval_loss": 1.4325311183929443,
239
- "eval_runtime": 0.2723,
240
- "eval_samples_per_second": 1017.092,
241
- "eval_steps_per_second": 11.015,
242
- "step": 1872
243
- },
244
- {
245
- "epoch": 25.0,
246
- "eval_accuracy": 0.7509025270758123,
247
- "eval_loss": 1.7198717594146729,
248
- "eval_runtime": 0.3389,
249
- "eval_samples_per_second": 817.452,
250
- "eval_steps_per_second": 8.853,
251
- "step": 1950
252
- },
253
- {
254
- "epoch": 25.64,
255
- "learning_rate": 0.0003614663256606991,
256
- "loss": 0.0875,
257
- "step": 2000
258
- },
259
- {
260
- "epoch": 26.0,
261
- "eval_accuracy": 0.7545126353790613,
262
- "eval_loss": 1.5090142488479614,
263
- "eval_runtime": 0.2361,
264
- "eval_samples_per_second": 1173.253,
265
- "eval_steps_per_second": 12.707,
266
- "step": 2028
267
- },
268
- {
269
- "epoch": 27.0,
270
- "eval_accuracy": 0.7436823104693141,
271
- "eval_loss": 1.8515905141830444,
272
- "eval_runtime": 0.2487,
273
- "eval_samples_per_second": 1113.898,
274
- "eval_steps_per_second": 12.064,
275
- "step": 2106
276
- },
277
- {
278
- "epoch": 28.0,
279
- "eval_accuracy": 0.7509025270758123,
280
- "eval_loss": 1.5827966928482056,
281
- "eval_runtime": 0.3236,
282
- "eval_samples_per_second": 856.029,
283
- "eval_steps_per_second": 9.271,
284
- "step": 2184
285
- },
286
- {
287
- "epoch": 29.0,
288
- "eval_accuracy": 0.7509025270758123,
289
- "eval_loss": 1.9509788751602173,
290
- "eval_runtime": 0.2848,
291
- "eval_samples_per_second": 972.601,
292
- "eval_steps_per_second": 10.534,
293
- "step": 2262
294
- },
295
- {
296
- "epoch": 30.0,
297
- "eval_accuracy": 0.7617328519855595,
298
- "eval_loss": 1.903420329093933,
299
- "eval_runtime": 0.3114,
300
- "eval_samples_per_second": 889.649,
301
- "eval_steps_per_second": 9.635,
302
- "step": 2340
303
- },
304
- {
305
- "epoch": 31.0,
306
- "eval_accuracy": 0.7364620938628159,
307
- "eval_loss": 2.065227508544922,
308
- "eval_runtime": 0.2318,
309
- "eval_samples_per_second": 1195.129,
310
- "eval_steps_per_second": 12.944,
311
- "step": 2418
312
- },
313
- {
314
- "epoch": 32.0,
315
- "eval_accuracy": 0.7617328519855595,
316
- "eval_loss": 1.5601612329483032,
317
- "eval_runtime": 0.2455,
318
- "eval_samples_per_second": 1128.485,
319
- "eval_steps_per_second": 12.222,
320
- "step": 2496
321
- },
322
- {
323
- "epoch": 32.05,
324
- "learning_rate": 0.0003188405797101449,
325
- "loss": 0.0561,
326
- "step": 2500
327
- },
328
- {
329
- "epoch": 33.0,
330
- "eval_accuracy": 0.7653429602888087,
331
- "eval_loss": 1.9638988971710205,
332
- "eval_runtime": 0.3284,
333
- "eval_samples_per_second": 843.409,
334
- "eval_steps_per_second": 9.134,
335
- "step": 2574
336
- },
337
- {
338
- "epoch": 34.0,
339
- "eval_accuracy": 0.7653429602888087,
340
- "eval_loss": 2.137803792953491,
341
- "eval_runtime": 0.2261,
342
- "eval_samples_per_second": 1225.21,
343
- "eval_steps_per_second": 13.269,
344
- "step": 2652
345
- },
346
- {
347
- "epoch": 35.0,
348
- "eval_accuracy": 0.7509025270758123,
349
- "eval_loss": 1.8859294652938843,
350
- "eval_runtime": 0.2755,
351
- "eval_samples_per_second": 1005.403,
352
- "eval_steps_per_second": 10.889,
353
- "step": 2730
354
- },
355
- {
356
- "epoch": 36.0,
357
- "eval_accuracy": 0.7653429602888087,
358
- "eval_loss": 1.9513648748397827,
359
- "eval_runtime": 0.3253,
360
- "eval_samples_per_second": 851.469,
361
- "eval_steps_per_second": 9.222,
362
- "step": 2808
363
- },
364
- {
365
- "epoch": 37.0,
366
- "eval_accuracy": 0.7725631768953068,
367
- "eval_loss": 1.9519377946853638,
368
- "eval_runtime": 0.2278,
369
- "eval_samples_per_second": 1215.717,
370
- "eval_steps_per_second": 13.167,
371
- "step": 2886
372
- },
373
- {
374
- "epoch": 38.0,
375
- "eval_accuracy": 0.7617328519855595,
376
- "eval_loss": 2.181129217147827,
377
- "eval_runtime": 0.2412,
378
- "eval_samples_per_second": 1148.242,
379
- "eval_steps_per_second": 12.436,
380
- "step": 2964
381
- },
382
- {
383
- "epoch": 38.46,
384
- "learning_rate": 0.00027621483375959077,
385
- "loss": 0.0423,
386
- "step": 3000
387
- },
388
- {
389
- "epoch": 39.0,
390
- "eval_accuracy": 0.7581227436823105,
391
- "eval_loss": 2.104398250579834,
392
- "eval_runtime": 0.3032,
393
- "eval_samples_per_second": 913.545,
394
- "eval_steps_per_second": 9.894,
395
- "step": 3042
396
- },
397
- {
398
- "epoch": 40.0,
399
- "eval_accuracy": 0.7689530685920578,
400
- "eval_loss": 2.0705928802490234,
401
- "eval_runtime": 0.2647,
402
- "eval_samples_per_second": 1046.589,
403
- "eval_steps_per_second": 11.335,
404
- "step": 3120
405
- },
406
- {
407
- "epoch": 41.0,
408
- "eval_accuracy": 0.7870036101083032,
409
- "eval_loss": 1.89673912525177,
410
- "eval_runtime": 0.2316,
411
- "eval_samples_per_second": 1195.807,
412
- "eval_steps_per_second": 12.951,
413
- "step": 3198
414
- },
415
- {
416
- "epoch": 42.0,
417
- "eval_accuracy": 0.7906137184115524,
418
- "eval_loss": 2.081404209136963,
419
- "eval_runtime": 0.3113,
420
- "eval_samples_per_second": 889.737,
421
- "eval_steps_per_second": 9.636,
422
- "step": 3276
423
- },
424
- {
425
- "epoch": 43.0,
426
- "eval_accuracy": 0.779783393501805,
427
- "eval_loss": 2.200363874435425,
428
- "eval_runtime": 0.3027,
429
- "eval_samples_per_second": 914.986,
430
- "eval_steps_per_second": 9.91,
431
- "step": 3354
432
- },
433
- {
434
- "epoch": 44.0,
435
- "eval_accuracy": 0.7689530685920578,
436
- "eval_loss": 2.4501726627349854,
437
- "eval_runtime": 0.3032,
438
- "eval_samples_per_second": 913.655,
439
- "eval_steps_per_second": 9.895,
440
- "step": 3432
441
- },
442
- {
443
- "epoch": 44.87,
444
- "learning_rate": 0.00023358908780903668,
445
- "loss": 0.0294,
446
- "step": 3500
447
- },
448
- {
449
- "epoch": 45.0,
450
- "eval_accuracy": 0.7725631768953068,
451
- "eval_loss": 2.2387545108795166,
452
- "eval_runtime": 0.308,
453
- "eval_samples_per_second": 899.428,
454
- "eval_steps_per_second": 9.741,
455
- "step": 3510
456
- },
457
- {
458
- "epoch": 46.0,
459
- "eval_accuracy": 0.7978339350180506,
460
- "eval_loss": 2.1785941123962402,
461
- "eval_runtime": 0.2416,
462
- "eval_samples_per_second": 1146.514,
463
- "eval_steps_per_second": 12.417,
464
- "step": 3588
465
- },
466
- {
467
- "epoch": 47.0,
468
- "eval_accuracy": 0.7653429602888087,
469
- "eval_loss": 2.5040040016174316,
470
- "eval_runtime": 0.2296,
471
- "eval_samples_per_second": 1206.456,
472
- "eval_steps_per_second": 13.066,
473
- "step": 3666
474
- },
475
- {
476
- "epoch": 48.0,
477
- "eval_accuracy": 0.779783393501805,
478
- "eval_loss": 2.4230690002441406,
479
- "eval_runtime": 0.3106,
480
- "eval_samples_per_second": 891.893,
481
- "eval_steps_per_second": 9.659,
482
- "step": 3744
483
- },
484
- {
485
- "epoch": 49.0,
486
- "eval_accuracy": 0.776173285198556,
487
- "eval_loss": 2.3979299068450928,
488
- "eval_runtime": 0.2331,
489
- "eval_samples_per_second": 1188.417,
490
- "eval_steps_per_second": 12.871,
491
- "step": 3822
492
- },
493
- {
494
- "epoch": 50.0,
495
- "eval_accuracy": 0.8050541516245487,
496
- "eval_loss": 2.2128214836120605,
497
- "eval_runtime": 0.2857,
498
- "eval_samples_per_second": 969.393,
499
- "eval_steps_per_second": 10.499,
500
- "step": 3900
501
- },
502
- {
503
- "epoch": 51.0,
504
- "eval_accuracy": 0.7870036101083032,
505
- "eval_loss": 2.4434621334075928,
506
- "eval_runtime": 0.232,
507
- "eval_samples_per_second": 1193.915,
508
- "eval_steps_per_second": 12.93,
509
- "step": 3978
510
- },
511
- {
512
- "epoch": 51.28,
513
- "learning_rate": 0.00019096334185848252,
514
- "loss": 0.0247,
515
- "step": 4000
516
- },
517
- {
518
- "epoch": 52.0,
519
- "eval_accuracy": 0.8050541516245487,
520
- "eval_loss": 2.1702017784118652,
521
- "eval_runtime": 0.2785,
522
- "eval_samples_per_second": 994.698,
523
- "eval_steps_per_second": 10.773,
524
- "step": 4056
525
- },
526
- {
527
- "epoch": 53.0,
528
- "eval_accuracy": 0.7942238267148014,
529
- "eval_loss": 2.052528142929077,
530
- "eval_runtime": 0.2278,
531
- "eval_samples_per_second": 1216.111,
532
- "eval_steps_per_second": 13.171,
533
- "step": 4134
534
- },
535
- {
536
- "epoch": 54.0,
537
- "eval_accuracy": 0.7978339350180506,
538
- "eval_loss": 2.259878396987915,
539
- "eval_runtime": 0.2332,
540
- "eval_samples_per_second": 1187.792,
541
- "eval_steps_per_second": 12.864,
542
- "step": 4212
543
- },
544
- {
545
- "epoch": 55.0,
546
- "eval_accuracy": 0.7870036101083032,
547
- "eval_loss": 2.3375229835510254,
548
- "eval_runtime": 0.2354,
549
- "eval_samples_per_second": 1176.728,
550
- "eval_steps_per_second": 12.744,
551
- "step": 4290
552
- },
553
- {
554
- "epoch": 56.0,
555
- "eval_accuracy": 0.776173285198556,
556
- "eval_loss": 2.3791956901550293,
557
- "eval_runtime": 0.2303,
558
- "eval_samples_per_second": 1202.553,
559
- "eval_steps_per_second": 13.024,
560
- "step": 4368
561
- },
562
- {
563
- "epoch": 57.0,
564
- "eval_accuracy": 0.7617328519855595,
565
- "eval_loss": 2.6323866844177246,
566
- "eval_runtime": 0.2788,
567
- "eval_samples_per_second": 993.528,
568
- "eval_steps_per_second": 10.76,
569
- "step": 4446
570
- },
571
- {
572
- "epoch": 57.69,
573
- "learning_rate": 0.0001483375959079284,
574
- "loss": 0.0262,
575
- "step": 4500
576
- },
577
- {
578
- "epoch": 58.0,
579
- "eval_accuracy": 0.7725631768953068,
580
- "eval_loss": 2.386228561401367,
581
- "eval_runtime": 0.2333,
582
- "eval_samples_per_second": 1187.256,
583
- "eval_steps_per_second": 12.858,
584
- "step": 4524
585
- },
586
- {
587
- "epoch": 59.0,
588
- "eval_accuracy": 0.7653429602888087,
589
- "eval_loss": 2.629952907562256,
590
- "eval_runtime": 0.2289,
591
- "eval_samples_per_second": 1210.168,
592
- "eval_steps_per_second": 13.107,
593
- "step": 4602
594
- },
595
- {
596
- "epoch": 60.0,
597
- "eval_accuracy": 0.7689530685920578,
598
- "eval_loss": 2.5751161575317383,
599
- "eval_runtime": 0.2343,
600
- "eval_samples_per_second": 1182.419,
601
- "eval_steps_per_second": 12.806,
602
- "step": 4680
603
- },
604
- {
605
- "epoch": 61.0,
606
- "eval_accuracy": 0.776173285198556,
607
- "eval_loss": 2.4631881713867188,
608
- "eval_runtime": 0.3299,
609
- "eval_samples_per_second": 839.738,
610
- "eval_steps_per_second": 9.095,
611
- "step": 4758
612
- },
613
- {
614
- "epoch": 62.0,
615
- "eval_accuracy": 0.779783393501805,
616
- "eval_loss": 2.332425117492676,
617
- "eval_runtime": 0.2338,
618
- "eval_samples_per_second": 1184.805,
619
- "eval_steps_per_second": 12.832,
620
- "step": 4836
621
- },
622
- {
623
- "epoch": 63.0,
624
- "eval_accuracy": 0.7942238267148014,
625
- "eval_loss": 2.282438039779663,
626
- "eval_runtime": 0.3154,
627
- "eval_samples_per_second": 878.349,
628
- "eval_steps_per_second": 9.513,
629
- "step": 4914
630
- },
631
- {
632
- "epoch": 64.0,
633
- "eval_accuracy": 0.7725631768953068,
634
- "eval_loss": 2.5226333141326904,
635
- "eval_runtime": 0.2747,
636
- "eval_samples_per_second": 1008.395,
637
- "eval_steps_per_second": 10.921,
638
- "step": 4992
639
- },
640
- {
641
- "epoch": 64.1,
642
- "learning_rate": 0.00010571184995737426,
643
- "loss": 0.0163,
644
- "step": 5000
645
- },
646
- {
647
- "epoch": 65.0,
648
- "eval_accuracy": 0.7833935018050542,
649
- "eval_loss": 2.5482494831085205,
650
- "eval_runtime": 0.2312,
651
- "eval_samples_per_second": 1198.154,
652
- "eval_steps_per_second": 12.976,
653
- "step": 5070
654
- },
655
- {
656
- "epoch": 66.0,
657
- "eval_accuracy": 0.7906137184115524,
658
- "eval_loss": 2.5009658336639404,
659
- "eval_runtime": 0.2315,
660
- "eval_samples_per_second": 1196.726,
661
- "eval_steps_per_second": 12.961,
662
- "step": 5148
663
- },
664
- {
665
- "epoch": 67.0,
666
- "eval_accuracy": 0.7653429602888087,
667
- "eval_loss": 2.863718032836914,
668
- "eval_runtime": 0.2338,
669
- "eval_samples_per_second": 1184.556,
670
- "eval_steps_per_second": 12.829,
671
- "step": 5226
672
- },
673
- {
674
- "epoch": 68.0,
675
- "eval_accuracy": 0.7833935018050542,
676
- "eval_loss": 2.638036012649536,
677
- "eval_runtime": 0.3062,
678
- "eval_samples_per_second": 904.565,
679
- "eval_steps_per_second": 9.797,
680
- "step": 5304
681
- },
682
- {
683
- "epoch": 69.0,
684
- "eval_accuracy": 0.779783393501805,
685
- "eval_loss": 2.676180839538574,
686
- "eval_runtime": 0.2797,
687
- "eval_samples_per_second": 990.196,
688
- "eval_steps_per_second": 10.724,
689
- "step": 5382
690
- },
691
- {
692
- "epoch": 70.0,
693
- "eval_accuracy": 0.779783393501805,
694
- "eval_loss": 2.7992238998413086,
695
- "eval_runtime": 0.2332,
696
- "eval_samples_per_second": 1187.793,
697
- "eval_steps_per_second": 12.864,
698
- "step": 5460
699
- },
700
- {
701
- "epoch": 70.51,
702
- "learning_rate": 6.308610400682012e-05,
703
- "loss": 0.0133,
704
- "step": 5500
705
- },
706
- {
707
- "epoch": 71.0,
708
- "eval_accuracy": 0.7870036101083032,
709
- "eval_loss": 2.691901445388794,
710
- "eval_runtime": 0.2313,
711
- "eval_samples_per_second": 1197.649,
712
- "eval_steps_per_second": 12.971,
713
- "step": 5538
714
- },
715
- {
716
- "epoch": 72.0,
717
- "eval_accuracy": 0.7833935018050542,
718
- "eval_loss": 2.6941630840301514,
719
- "eval_runtime": 0.3108,
720
- "eval_samples_per_second": 891.377,
721
- "eval_steps_per_second": 9.654,
722
- "step": 5616
723
- },
724
- {
725
- "epoch": 73.0,
726
- "eval_accuracy": 0.7942238267148014,
727
- "eval_loss": 2.6317222118377686,
728
- "eval_runtime": 0.2308,
729
- "eval_samples_per_second": 1200.365,
730
- "eval_steps_per_second": 13.0,
731
- "step": 5694
732
- },
733
- {
734
- "epoch": 74.0,
735
- "eval_accuracy": 0.7870036101083032,
736
- "eval_loss": 2.635883331298828,
737
- "eval_runtime": 0.2325,
738
- "eval_samples_per_second": 1191.569,
739
- "eval_steps_per_second": 12.905,
740
- "step": 5772
741
- },
742
- {
743
- "epoch": 75.0,
744
- "eval_accuracy": 0.7870036101083032,
745
- "eval_loss": 2.631652593612671,
746
- "eval_runtime": 0.2307,
747
- "eval_samples_per_second": 1200.479,
748
- "eval_steps_per_second": 13.002,
749
- "step": 5850
750
- },
751
- {
752
- "epoch": 76.0,
753
- "eval_accuracy": 0.7833935018050542,
754
- "eval_loss": 2.68886661529541,
755
- "eval_runtime": 0.2322,
756
- "eval_samples_per_second": 1193.142,
757
- "eval_steps_per_second": 12.922,
758
- "step": 5928
759
- },
760
- {
761
- "epoch": 76.92,
762
- "learning_rate": 2.0460358056265986e-05,
763
- "loss": 0.0085,
764
- "step": 6000
765
- },
766
- {
767
- "epoch": 77.0,
768
- "eval_accuracy": 0.7833935018050542,
769
- "eval_loss": 2.730724811553955,
770
- "eval_runtime": 0.2718,
771
- "eval_samples_per_second": 1019.194,
772
- "eval_steps_per_second": 11.038,
773
- "step": 6006
774
- },
775
- {
776
- "epoch": 78.0,
777
- "eval_accuracy": 0.779783393501805,
778
- "eval_loss": 2.7645108699798584,
779
- "eval_runtime": 0.232,
780
- "eval_samples_per_second": 1193.866,
781
- "eval_steps_per_second": 12.93,
782
- "step": 6084
783
- },
784
- {
785
- "epoch": 79.0,
786
- "eval_accuracy": 0.7833935018050542,
787
- "eval_loss": 2.7476391792297363,
788
- "eval_runtime": 0.3197,
789
- "eval_samples_per_second": 866.393,
790
- "eval_steps_per_second": 9.383,
791
- "step": 6162
792
- },
793
- {
794
- "epoch": 80.0,
795
- "eval_accuracy": 0.7833935018050542,
796
- "eval_loss": 2.7370429039001465,
797
- "eval_runtime": 0.2352,
798
- "eval_samples_per_second": 1177.509,
799
- "eval_steps_per_second": 12.753,
800
- "step": 6240
801
- },
802
- {
803
- "epoch": 80.0,
804
- "step": 6240,
805
- "total_flos": 1.31480478461952e+16,
806
- "train_loss": 0.10918387793577634,
807
- "train_runtime": 662.0132,
808
- "train_samples_per_second": 300.9,
809
- "train_steps_per_second": 9.426
810
  }
811
  ],
812
- "max_steps": 6240,
813
- "num_train_epochs": 80,
814
- "total_flos": 1.31480478461952e+16,
815
  "trial_name": null,
816
  "trial_params": null
817
  }
 
1
  {
2
+ "best_metric": 0.7472924187725631,
3
+ "best_model_checkpoint": "outputs/lora/roberta-base/v2/rte/checkpoint-702",
4
+ "epoch": 10.0,
5
+ "global_step": 780,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 1.0,
12
  "eval_accuracy": 0.4729241877256318,
13
+ "eval_loss": 0.700201690196991,
14
+ "eval_runtime": 0.2542,
15
+ "eval_samples_per_second": 1089.901,
16
+ "eval_steps_per_second": 11.804,
17
  "step": 78
18
  },
19
  {
20
  "epoch": 2.0,
21
+ "eval_accuracy": 0.6389891696750902,
22
+ "eval_loss": 0.6369197964668274,
23
+ "eval_runtime": 0.3571,
24
+ "eval_samples_per_second": 775.771,
25
+ "eval_steps_per_second": 8.402,
26
  "step": 156
27
  },
28
  {
29
  "epoch": 3.0,
30
+ "eval_accuracy": 0.6606498194945848,
31
+ "eval_loss": 0.6243730783462524,
32
+ "eval_runtime": 0.2436,
33
+ "eval_samples_per_second": 1137.183,
34
+ "eval_steps_per_second": 12.316,
35
  "step": 234
36
  },
37
  {
38
  "epoch": 4.0,
39
+ "eval_accuracy": 0.7184115523465704,
40
+ "eval_loss": 0.572300136089325,
41
+ "eval_runtime": 0.228,
42
+ "eval_samples_per_second": 1214.712,
43
+ "eval_steps_per_second": 13.156,
44
  "step": 312
45
  },
46
  {
47
  "epoch": 5.0,
48
+ "eval_accuracy": 0.7220216606498195,
49
+ "eval_loss": 0.5677592754364014,
50
+ "eval_runtime": 0.2299,
51
+ "eval_samples_per_second": 1204.73,
52
+ "eval_steps_per_second": 13.048,
53
  "step": 390
54
  },
55
  {
56
  "epoch": 6.0,
57
+ "eval_accuracy": 0.7220216606498195,
58
+ "eval_loss": 0.6392128467559814,
59
+ "eval_runtime": 0.2306,
60
+ "eval_samples_per_second": 1201.256,
61
+ "eval_steps_per_second": 13.01,
62
  "step": 468
63
  },
64
  {
65
  "epoch": 6.41,
66
+ "learning_rate": 0.00019099590723055935,
67
+ "loss": 0.5649,
68
  "step": 500
69
  },
70
  {
71
  "epoch": 7.0,
72
+ "eval_accuracy": 0.7364620938628159,
73
+ "eval_loss": 0.5661243200302124,
74
+ "eval_runtime": 0.3267,
75
+ "eval_samples_per_second": 847.854,
76
+ "eval_steps_per_second": 9.183,
77
  "step": 546
78
  },
79
  {
80
  "epoch": 8.0,
81
+ "eval_accuracy": 0.7328519855595668,
82
+ "eval_loss": 0.6633248329162598,
83
+ "eval_runtime": 0.228,
84
+ "eval_samples_per_second": 1214.709,
85
+ "eval_steps_per_second": 13.156,
86
  "step": 624
87
  },
88
  {
89
  "epoch": 9.0,
90
+ "eval_accuracy": 0.7472924187725631,
91
+ "eval_loss": 0.5988962054252625,
92
+ "eval_runtime": 0.2338,
93
+ "eval_samples_per_second": 1184.722,
94
+ "eval_steps_per_second": 12.831,
95
  "step": 702
96
  },
97
  {
98
  "epoch": 10.0,
99
+ "eval_accuracy": 0.7292418772563177,
100
+ "eval_loss": 0.6346577405929565,
101
+ "eval_runtime": 0.2362,
102
+ "eval_samples_per_second": 1172.727,
103
+ "eval_steps_per_second": 12.701,
104
  "step": 780
105
  },
106
  {
107
+ "epoch": 10.0,
108
+ "step": 780,
109
+ "total_flos": 1643505980774400.0,
110
+ "train_loss": 0.49518577380058093,
111
+ "train_runtime": 60.2408,
112
+ "train_samples_per_second": 413.341,
113
+ "train_steps_per_second": 12.948
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
  ],
116
+ "max_steps": 780,
117
+ "num_train_epochs": 10,
118
+ "total_flos": 1643505980774400.0,
119
  "trial_name": null,
120
  "trial_params": null
121
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03da3c7b3ee601a1c60e4b1c7b52908cb0d6e3482e88e3fba532380638e91af7
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3be2cac0cb77090e11e0d4f57a25a90219062e587aa740922cc72ee94791b6
3
  size 2991