aalonso-developer commited on
Commit
9782a4f
·
1 Parent(s): 2598c17

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.7164788050618907,
4
- "eval_loss": 0.9746271967887878,
5
- "eval_runtime": 155.7191,
6
- "eval_samples_per_second": 185.732,
7
- "eval_steps_per_second": 23.221,
8
- "train_loss": 0.9355616505553886,
9
- "train_runtime": 13547.1303,
10
- "train_samples_per_second": 68.318,
11
- "train_steps_per_second": 2.135
12
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7055528663301293,
4
+ "eval_loss": 1.0334995985031128,
5
+ "eval_runtime": 152.4169,
6
+ "eval_samples_per_second": 189.756,
7
+ "eval_steps_per_second": 23.724,
8
+ "train_loss": 1.1119355370668336,
9
+ "train_runtime": 13795.8304,
10
+ "train_samples_per_second": 67.086,
11
+ "train_steps_per_second": 2.097
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.7164788050618907,
4
- "eval_loss": 0.9746271967887878,
5
- "eval_runtime": 155.7191,
6
- "eval_samples_per_second": 185.732,
7
- "eval_steps_per_second": 23.221
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7055528663301293,
4
+ "eval_loss": 1.0334995985031128,
5
+ "eval_runtime": 152.4169,
6
+ "eval_samples_per_second": 189.756,
7
+ "eval_steps_per_second": 23.724
8
  }
runs/Jun07_08-09-43_adrian-development/events.out.tfevents.1686132156.adrian-development.7920.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb59c16ce322830415205bda7399949db2853bf92a2f3da3f48b2beb5f7819c
3
+ size 369
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 4.0,
3
- "train_loss": 0.9355616505553886,
4
- "train_runtime": 13547.1303,
5
- "train_samples_per_second": 68.318,
6
- "train_steps_per_second": 2.135
7
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "train_loss": 1.1119355370668336,
4
+ "train_runtime": 13795.8304,
5
+ "train_samples_per_second": 67.086,
6
+ "train_steps_per_second": 2.097
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9746271967887878,
3
- "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-21000",
4
  "epoch": 4.0,
5
  "global_step": 28924,
6
  "is_hyper_param_search": false,
@@ -9,432 +9,432 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "learning_rate": 2.4135665883003737e-05,
13
- "loss": 2.0439,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
- "eval_accuracy": 0.6367125371689372,
19
- "eval_loss": 1.4945471286773682,
20
- "eval_runtime": 155.4022,
21
- "eval_samples_per_second": 186.111,
22
- "eval_steps_per_second": 23.269,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
- "learning_rate": 2.327133176600747e-05,
28
- "loss": 1.3784,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
- "eval_accuracy": 0.6662748081045571,
34
- "eval_loss": 1.2658541202545166,
35
- "eval_runtime": 152.9449,
36
- "eval_samples_per_second": 189.101,
37
- "eval_steps_per_second": 23.642,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
- "learning_rate": 2.2408726317245195e-05,
43
- "loss": 1.2223,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
- "eval_accuracy": 0.6780305649678445,
49
- "eval_loss": 1.176424503326416,
50
- "eval_runtime": 155.5666,
51
- "eval_samples_per_second": 185.914,
52
- "eval_steps_per_second": 23.244,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
- "learning_rate": 2.1544392200248927e-05,
58
- "loss": 1.153,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
- "eval_accuracy": 0.687227715925593,
64
- "eval_loss": 1.115257740020752,
65
- "eval_runtime": 154.7152,
66
- "eval_samples_per_second": 186.937,
67
- "eval_steps_per_second": 23.372,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
- "learning_rate": 2.0680058083252663e-05,
73
- "loss": 1.1037,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
- "eval_accuracy": 0.6912385035613028,
79
- "eval_loss": 1.0900990962982178,
80
- "eval_runtime": 154.22,
81
- "eval_samples_per_second": 187.537,
82
- "eval_steps_per_second": 23.447,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
- "learning_rate": 1.9816588300373394e-05,
88
- "loss": 1.0827,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
- "eval_accuracy": 0.6933822004010788,
94
- "eval_loss": 1.068312406539917,
95
- "eval_runtime": 150.0826,
96
- "eval_samples_per_second": 192.707,
97
- "eval_steps_per_second": 24.093,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
- "learning_rate": 1.8953118517494124e-05,
103
- "loss": 1.0646,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
- "eval_accuracy": 0.7026485028697877,
109
- "eval_loss": 1.037534475326538,
110
- "eval_runtime": 148.9104,
111
- "eval_samples_per_second": 194.224,
112
- "eval_steps_per_second": 24.283,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
- "learning_rate": 1.8088784400497857e-05,
118
- "loss": 0.9782,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
- "eval_accuracy": 0.6986722909895581,
124
- "eval_loss": 1.0466938018798828,
125
- "eval_runtime": 149.1228,
126
- "eval_samples_per_second": 193.948,
127
- "eval_steps_per_second": 24.248,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
- "learning_rate": 1.7225314617618587e-05,
133
- "loss": 0.9627,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
- "eval_accuracy": 0.7060023511513727,
139
- "eval_loss": 1.0178191661834717,
140
- "eval_runtime": 149.6361,
141
- "eval_samples_per_second": 193.282,
142
- "eval_steps_per_second": 24.165,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
- "learning_rate": 1.6360980500622323e-05,
148
- "loss": 0.9411,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
- "eval_accuracy": 0.7056911693520503,
154
- "eval_loss": 1.0125459432601929,
155
- "eval_runtime": 148.9456,
156
- "eval_samples_per_second": 194.178,
157
- "eval_steps_per_second": 24.277,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
- "learning_rate": 1.5496646383626052e-05,
163
- "loss": 0.9333,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
- "eval_accuracy": 0.7067630177719383,
169
- "eval_loss": 1.0095747709274292,
170
- "eval_runtime": 149.6511,
171
- "eval_samples_per_second": 193.263,
172
- "eval_steps_per_second": 24.163,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
- "learning_rate": 1.4633176600746785e-05,
178
- "loss": 0.9333,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
- "eval_accuracy": 0.7047576239540834,
184
- "eval_loss": 1.0087416172027588,
185
- "eval_runtime": 149.4203,
186
- "eval_samples_per_second": 193.561,
187
- "eval_steps_per_second": 24.2,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
- "learning_rate": 1.3768842483750519e-05,
193
- "loss": 0.9227,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
- "eval_accuracy": 0.7118456538275362,
199
- "eval_loss": 0.9883840084075928,
200
- "eval_runtime": 149.2315,
201
- "eval_samples_per_second": 193.806,
202
- "eval_steps_per_second": 24.231,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
- "learning_rate": 1.290537270087125e-05,
208
- "loss": 0.9131,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
- "eval_accuracy": 0.7127100477145426,
214
- "eval_loss": 0.986507773399353,
215
- "eval_runtime": 149.078,
216
- "eval_samples_per_second": 194.006,
217
- "eval_steps_per_second": 24.256,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
- "learning_rate": 1.2041038583874984e-05,
223
- "loss": 0.8703,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
- "eval_accuracy": 0.7143696839775949,
229
- "eval_loss": 0.9831659197807312,
230
- "eval_runtime": 150.612,
231
- "eval_samples_per_second": 192.03,
232
- "eval_steps_per_second": 24.009,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
- "learning_rate": 1.1177568800995714e-05,
238
- "loss": 0.8064,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
- "eval_accuracy": 0.7136435931125095,
244
- "eval_loss": 0.9858880043029785,
245
- "eval_runtime": 149.8659,
246
- "eval_samples_per_second": 192.986,
247
- "eval_steps_per_second": 24.128,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
- "learning_rate": 1.0313234683999447e-05,
253
- "loss": 0.8131,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
- "eval_accuracy": 0.7127100477145426,
259
- "eval_loss": 0.9879944920539856,
260
- "eval_runtime": 150.5293,
261
- "eval_samples_per_second": 192.135,
262
- "eval_steps_per_second": 24.022,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
- "learning_rate": 9.448900567003181e-06,
268
- "loss": 0.8064,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
- "eval_accuracy": 0.7113270174953322,
274
- "eval_loss": 0.9872538447380066,
275
- "eval_runtime": 149.9196,
276
- "eval_samples_per_second": 192.917,
277
- "eval_steps_per_second": 24.12,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
- "learning_rate": 8.584566450006915e-06,
283
- "loss": 0.812,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
- "eval_accuracy": 0.7153723808865223,
289
- "eval_loss": 0.9793804883956909,
290
- "eval_runtime": 151.3957,
291
- "eval_samples_per_second": 191.036,
292
- "eval_steps_per_second": 23.884,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
- "learning_rate": 7.721096667127644e-06,
298
- "loss": 0.7977,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
- "eval_accuracy": 0.7151649263536408,
304
- "eval_loss": 0.9755117297172546,
305
- "eval_runtime": 144.4577,
306
- "eval_samples_per_second": 200.211,
307
- "eval_steps_per_second": 25.032,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
- "learning_rate": 6.856762550131379e-06,
313
- "loss": 0.8118,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
- "eval_accuracy": 0.7164788050618907,
319
- "eval_loss": 0.9746271967887878,
320
- "eval_runtime": 155.3329,
321
- "eval_samples_per_second": 186.194,
322
- "eval_steps_per_second": 23.279,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
- "learning_rate": 5.992428433135113e-06,
328
- "loss": 0.7741,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
- "eval_accuracy": 0.7189336837009889,
334
- "eval_loss": 0.9803725481033325,
335
- "eval_runtime": 153.5991,
336
- "eval_samples_per_second": 188.295,
337
- "eval_steps_per_second": 23.542,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
- "learning_rate": 5.128958650255843e-06,
343
- "loss": 0.7202,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
- "eval_accuracy": 0.71371274462347,
349
- "eval_loss": 0.9932627081871033,
350
- "eval_runtime": 153.8948,
351
- "eval_samples_per_second": 187.934,
352
- "eval_steps_per_second": 23.497,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
- "learning_rate": 4.264624533259577e-06,
358
- "loss": 0.7186,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
- "eval_accuracy": 0.7173086231934168,
364
- "eval_loss": 0.9890537261962891,
365
- "eval_runtime": 154.048,
366
- "eval_samples_per_second": 187.747,
367
- "eval_steps_per_second": 23.473,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
- "learning_rate": 3.4011547503803073e-06,
373
- "loss": 0.7164,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
- "eval_accuracy": 0.7176198049927391,
379
- "eval_loss": 0.9812184572219849,
380
- "eval_runtime": 153.9568,
381
- "eval_samples_per_second": 187.858,
382
- "eval_steps_per_second": 23.487,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
- "learning_rate": 2.536820633384041e-06,
388
- "loss": 0.7047,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
- "eval_accuracy": 0.7157527141968052,
394
- "eval_loss": 0.9885823130607605,
395
- "eval_runtime": 155.4293,
396
- "eval_samples_per_second": 186.078,
397
- "eval_steps_per_second": 23.265,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
- "learning_rate": 1.6724865163877752e-06,
403
- "loss": 0.7241,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
- "eval_accuracy": 0.7153032293755619,
409
- "eval_loss": 0.9851787686347961,
410
- "eval_runtime": 155.7881,
411
- "eval_samples_per_second": 185.65,
412
- "eval_steps_per_second": 23.211,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
- "learning_rate": 8.081523993915088e-07,
418
- "loss": 0.699,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
- "eval_accuracy": 0.7164788050618907,
424
- "eval_loss": 0.984923243522644,
425
- "eval_runtime": 156.4441,
426
- "eval_samples_per_second": 184.871,
427
- "eval_steps_per_second": 23.114,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.0,
432
  "step": 28924,
433
  "total_flos": 7.174773947150918e+19,
434
- "train_loss": 0.9355616505553886,
435
- "train_runtime": 13547.1303,
436
- "train_samples_per_second": 68.318,
437
- "train_steps_per_second": 2.135
438
  }
439
  ],
440
  "max_steps": 28924,
 
1
  {
2
+ "best_metric": 1.0334995985031128,
3
+ "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-27000",
4
  "epoch": 4.0,
5
  "global_step": 28924,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "learning_rate": 9.654266353201494e-06,
13
+ "loss": 2.4588,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
+ "eval_accuracy": 0.5605767236014106,
19
+ "eval_loss": 1.9567219018936157,
20
+ "eval_runtime": 155.8077,
21
+ "eval_samples_per_second": 185.626,
22
+ "eval_steps_per_second": 23.208,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
+ "learning_rate": 9.308878440049787e-06,
28
+ "loss": 1.7412,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
+ "eval_accuracy": 0.6285180831201161,
34
+ "eval_loss": 1.5931317806243896,
35
+ "eval_runtime": 148.2867,
36
+ "eval_samples_per_second": 195.041,
37
+ "eval_steps_per_second": 24.385,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
+ "learning_rate": 8.963490526898079e-06,
43
+ "loss": 1.4825,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
+ "eval_accuracy": 0.6542770209529079,
49
+ "eval_loss": 1.4156450033187866,
50
+ "eval_runtime": 148.2362,
51
+ "eval_samples_per_second": 195.108,
52
+ "eval_steps_per_second": 24.394,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
+ "learning_rate": 8.61810261374637e-06,
58
+ "loss": 1.3499,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
+ "eval_accuracy": 0.6701472927183459,
64
+ "eval_loss": 1.3057234287261963,
65
+ "eval_runtime": 148.7022,
66
+ "eval_samples_per_second": 194.496,
67
+ "eval_steps_per_second": 24.317,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "learning_rate": 8.272368966947864e-06,
73
+ "loss": 1.2777,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "eval_accuracy": 0.6749187469746214,
79
+ "eval_loss": 1.2455776929855347,
80
+ "eval_runtime": 148.0641,
81
+ "eval_samples_per_second": 195.334,
82
+ "eval_steps_per_second": 24.422,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
+ "learning_rate": 7.926635320149357e-06,
88
+ "loss": 1.2325,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
+ "eval_accuracy": 0.6833206555563239,
94
+ "eval_loss": 1.1947654485702515,
95
+ "eval_runtime": 150.1766,
96
+ "eval_samples_per_second": 192.587,
97
+ "eval_steps_per_second": 24.078,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
+ "learning_rate": 7.5812474069976495e-06,
103
+ "loss": 1.1932,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "eval_accuracy": 0.6858101099509024,
109
+ "eval_loss": 1.1682132482528687,
110
+ "eval_runtime": 153.9643,
111
+ "eval_samples_per_second": 187.849,
112
+ "eval_steps_per_second": 23.486,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
+ "learning_rate": 7.235513760199143e-06,
118
+ "loss": 1.1182,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
+ "eval_accuracy": 0.6895788672982505,
124
+ "eval_loss": 1.1419979333877563,
125
+ "eval_runtime": 152.1866,
126
+ "eval_samples_per_second": 190.043,
127
+ "eval_steps_per_second": 23.76,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
+ "learning_rate": 6.889780113400637e-06,
133
+ "loss": 1.086,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
+ "eval_accuracy": 0.6893368370098887,
139
+ "eval_loss": 1.1294597387313843,
140
+ "eval_runtime": 153.4946,
141
+ "eval_samples_per_second": 188.424,
142
+ "eval_steps_per_second": 23.558,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
+ "learning_rate": 6.54404646660213e-06,
148
+ "loss": 1.0761,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
+ "eval_accuracy": 0.6903741096742964,
154
+ "eval_loss": 1.1167794466018677,
155
+ "eval_runtime": 152.2341,
156
+ "eval_samples_per_second": 189.984,
157
+ "eval_steps_per_second": 23.753,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
+ "learning_rate": 6.198312819803624e-06,
163
+ "loss": 1.0525,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
+ "eval_accuracy": 0.6951801396860522,
169
+ "eval_loss": 1.0984091758728027,
170
+ "eval_runtime": 151.8562,
171
+ "eval_samples_per_second": 190.456,
172
+ "eval_steps_per_second": 23.812,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
+ "learning_rate": 5.852924906651915e-06,
178
+ "loss": 1.047,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
+ "eval_accuracy": 0.6963211396169007,
184
+ "eval_loss": 1.0861539840698242,
185
+ "eval_runtime": 154.131,
186
+ "eval_samples_per_second": 187.646,
187
+ "eval_steps_per_second": 23.461,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
+ "learning_rate": 5.5071912598534095e-06,
193
+ "loss": 1.044,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "eval_accuracy": 0.6946960791093285,
199
+ "eval_loss": 1.084086537361145,
200
+ "eval_runtime": 153.6622,
201
+ "eval_samples_per_second": 188.218,
202
+ "eval_steps_per_second": 23.532,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
+ "learning_rate": 5.161803346701701e-06,
208
+ "loss": 1.0231,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
+ "eval_accuracy": 0.6982228061683148,
214
+ "eval_loss": 1.0656354427337646,
215
+ "eval_runtime": 154.8778,
216
+ "eval_samples_per_second": 186.741,
217
+ "eval_steps_per_second": 23.347,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
+ "learning_rate": 4.8160696999031944e-06,
223
+ "loss": 1.0118,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
+ "eval_accuracy": 0.698395684945716,
229
+ "eval_loss": 1.065485954284668,
230
+ "eval_runtime": 152.9666,
231
+ "eval_samples_per_second": 189.074,
232
+ "eval_steps_per_second": 23.639,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
+ "learning_rate": 4.470681786751487e-06,
238
+ "loss": 0.9659,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
+ "eval_accuracy": 0.7020261392711431,
244
+ "eval_loss": 1.0567070245742798,
245
+ "eval_runtime": 154.2161,
246
+ "eval_samples_per_second": 187.542,
247
+ "eval_steps_per_second": 23.448,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
+ "learning_rate": 4.124948139952981e-06,
253
+ "loss": 0.9602,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
+ "eval_accuracy": 0.7003665030080908,
259
+ "eval_loss": 1.0593522787094116,
260
+ "eval_runtime": 154.104,
261
+ "eval_samples_per_second": 187.679,
262
+ "eval_steps_per_second": 23.465,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
+ "learning_rate": 3.779214493154474e-06,
268
+ "loss": 0.9757,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
+ "eval_accuracy": 0.7006431090519328,
274
+ "eval_loss": 1.053895115852356,
275
+ "eval_runtime": 153.596,
276
+ "eval_samples_per_second": 188.299,
277
+ "eval_steps_per_second": 23.542,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
+ "learning_rate": 3.433826580002766e-06,
283
+ "loss": 0.9631,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
+ "eval_accuracy": 0.7040661088444783,
289
+ "eval_loss": 1.0456914901733398,
290
+ "eval_runtime": 152.8682,
291
+ "eval_samples_per_second": 189.196,
292
+ "eval_steps_per_second": 23.654,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
+ "learning_rate": 3.0880929332042597e-06,
298
+ "loss": 0.951,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
+ "eval_accuracy": 0.7039278058225572,
304
+ "eval_loss": 1.0407930612564087,
305
+ "eval_runtime": 152.6055,
306
+ "eval_samples_per_second": 189.521,
307
+ "eval_steps_per_second": 23.695,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
+ "learning_rate": 2.742359286405753e-06,
313
+ "loss": 0.942,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
+ "eval_accuracy": 0.7021298665375838,
319
+ "eval_loss": 1.042394757270813,
320
+ "eval_runtime": 154.2947,
321
+ "eval_samples_per_second": 187.446,
322
+ "eval_steps_per_second": 23.436,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
+ "learning_rate": 2.396625639607247e-06,
328
+ "loss": 0.9415,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
+ "eval_accuracy": 0.7035128967567942,
334
+ "eval_loss": 1.0409824848175049,
335
+ "eval_runtime": 151.55,
336
+ "eval_samples_per_second": 190.841,
337
+ "eval_steps_per_second": 23.86,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
+ "learning_rate": 2.0512377264555388e-06,
343
+ "loss": 0.9128,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
+ "eval_accuracy": 0.7044118663992808,
349
+ "eval_loss": 1.0388602018356323,
350
+ "eval_runtime": 153.9577,
351
+ "eval_samples_per_second": 187.857,
352
+ "eval_steps_per_second": 23.487,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
+ "learning_rate": 1.7055040796570323e-06,
358
+ "loss": 0.9071,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
+ "eval_accuracy": 0.7033745937348731,
364
+ "eval_loss": 1.0366332530975342,
365
+ "eval_runtime": 155.1151,
366
+ "eval_samples_per_second": 186.455,
367
+ "eval_steps_per_second": 23.312,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
+ "learning_rate": 1.3601161665053245e-06,
373
+ "loss": 0.91,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
+ "eval_accuracy": 0.7045847451766821,
379
+ "eval_loss": 1.036346435546875,
380
+ "eval_runtime": 152.4669,
381
+ "eval_samples_per_second": 189.694,
382
+ "eval_steps_per_second": 23.717,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
+ "learning_rate": 1.0147282533536165e-06,
388
+ "loss": 0.8962,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
+ "eval_accuracy": 0.7042044118663993,
394
+ "eval_loss": 1.0368608236312866,
395
+ "eval_runtime": 151.6795,
396
+ "eval_samples_per_second": 190.678,
397
+ "eval_steps_per_second": 23.84,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
+ "learning_rate": 6.6899460655511e-07,
403
+ "loss": 0.8979,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
+ "eval_accuracy": 0.7055528663301293,
409
+ "eval_loss": 1.0334995985031128,
410
+ "eval_runtime": 151.884,
411
+ "eval_samples_per_second": 190.422,
412
+ "eval_steps_per_second": 23.808,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
+ "learning_rate": 3.2326095975660353e-07,
418
+ "loss": 0.906,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
+ "eval_accuracy": 0.7039278058225572,
424
+ "eval_loss": 1.03609037399292,
425
+ "eval_runtime": 152.3393,
426
+ "eval_samples_per_second": 189.853,
427
+ "eval_steps_per_second": 23.736,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.0,
432
  "step": 28924,
433
  "total_flos": 7.174773947150918e+19,
434
+ "train_loss": 1.1119355370668336,
435
+ "train_runtime": 13795.8304,
436
+ "train_samples_per_second": 67.086,
437
+ "train_steps_per_second": 2.097
438
  }
439
  ],
440
  "max_steps": 28924,