aalonso-developer commited on
Commit
038ebfc
·
1 Parent(s): e7d4588

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.710220593319964,
4
- "eval_loss": 0.9962999820709229,
5
- "eval_runtime": 152.3809,
6
- "eval_samples_per_second": 189.801,
7
- "eval_steps_per_second": 23.73,
8
- "train_loss": 0.8206716175314105,
9
- "train_runtime": 13827.0136,
10
- "train_samples_per_second": 66.935,
11
- "train_steps_per_second": 2.092
12
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7164788050618907,
4
+ "eval_loss": 0.9746271967887878,
5
+ "eval_runtime": 155.7191,
6
+ "eval_samples_per_second": 185.732,
7
+ "eval_steps_per_second": 23.221,
8
+ "train_loss": 0.9355616505553886,
9
+ "train_runtime": 13547.1303,
10
+ "train_samples_per_second": 68.318,
11
+ "train_steps_per_second": 2.135
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.710220593319964,
4
- "eval_loss": 0.9962999820709229,
5
- "eval_runtime": 152.3809,
6
- "eval_samples_per_second": 189.801,
7
- "eval_steps_per_second": 23.73
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7164788050618907,
4
+ "eval_loss": 0.9746271967887878,
5
+ "eval_runtime": 155.7191,
6
+ "eval_samples_per_second": 185.732,
7
+ "eval_steps_per_second": 23.221
8
  }
runs/Jun06_14-38-53_adrian-development/events.out.tfevents.1686068860.adrian-development.77015.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ece15a64ee14ac17d8203a4835811612d3769d140597e75e275c59e1b1757c9d
3
+ size 369
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 4.0,
3
- "train_loss": 0.8206716175314105,
4
- "train_runtime": 13827.0136,
5
- "train_samples_per_second": 66.935,
6
- "train_steps_per_second": 2.092
7
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "train_loss": 0.9355616505553886,
4
+ "train_runtime": 13547.1303,
5
+ "train_samples_per_second": 68.318,
6
+ "train_steps_per_second": 2.135
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9962999820709229,
3
- "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-14000",
4
  "epoch": 4.0,
5
  "global_step": 28924,
6
  "is_hyper_param_search": false,
@@ -9,432 +9,432 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "learning_rate": 7.240959065136219e-05,
13
- "loss": 1.6085,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
- "eval_accuracy": 0.6414494156697323,
19
- "eval_loss": 1.2965176105499268,
20
- "eval_runtime": 155.6021,
21
- "eval_samples_per_second": 185.872,
22
- "eval_steps_per_second": 23.239,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
- "learning_rate": 6.981918130272437e-05,
28
- "loss": 1.2071,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
- "eval_accuracy": 0.6689717170320172,
34
- "eval_loss": 1.163772702217102,
35
- "eval_runtime": 147.9377,
36
- "eval_samples_per_second": 195.501,
37
- "eval_steps_per_second": 24.443,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
- "learning_rate": 6.722617895173557e-05,
43
- "loss": 1.1467,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
- "eval_accuracy": 0.672187262291681,
49
- "eval_loss": 1.1356933116912842,
50
- "eval_runtime": 148.7126,
51
- "eval_samples_per_second": 194.482,
52
- "eval_steps_per_second": 24.315,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
- "learning_rate": 6.463836260544876e-05,
58
- "loss": 1.1073,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
- "eval_accuracy": 0.6832515040453634,
64
- "eval_loss": 1.094034194946289,
65
- "eval_runtime": 147.3532,
66
- "eval_samples_per_second": 196.277,
67
- "eval_steps_per_second": 24.54,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
- "learning_rate": 6.204536025445996e-05,
73
- "loss": 1.0721,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
- "eval_accuracy": 0.685879261461863,
79
- "eval_loss": 1.0801600217819214,
80
- "eval_runtime": 148.8743,
81
- "eval_samples_per_second": 194.271,
82
- "eval_steps_per_second": 24.289,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
- "learning_rate": 5.9452357903471155e-05,
88
- "loss": 1.0607,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
- "eval_accuracy": 0.6946615033538482,
94
- "eval_loss": 1.0509121417999268,
95
- "eval_runtime": 153.6507,
96
- "eval_samples_per_second": 188.232,
97
- "eval_steps_per_second": 23.534,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
- "learning_rate": 5.685935555248236e-05,
103
- "loss": 1.032,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
- "eval_accuracy": 0.6914805338496646,
109
- "eval_loss": 1.055686593055725,
110
- "eval_runtime": 151.6461,
111
- "eval_samples_per_second": 190.72,
112
- "eval_steps_per_second": 23.845,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
- "learning_rate": 5.4266353201493564e-05,
118
- "loss": 0.9224,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
- "eval_accuracy": 0.696632321416223,
124
- "eval_loss": 1.0505746603012085,
125
- "eval_runtime": 152.4908,
126
- "eval_samples_per_second": 189.664,
127
- "eval_steps_per_second": 23.713,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
- "learning_rate": 5.167335085050477e-05,
133
- "loss": 0.9029,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
- "eval_accuracy": 0.6952492911970126,
139
- "eval_loss": 1.0421319007873535,
140
- "eval_runtime": 153.6492,
141
- "eval_samples_per_second": 188.234,
142
- "eval_steps_per_second": 23.534,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
- "learning_rate": 4.9080348499515966e-05,
148
- "loss": 0.8858,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
- "eval_accuracy": 0.701887836249222,
154
- "eval_loss": 1.0203652381896973,
155
- "eval_runtime": 153.0256,
156
- "eval_samples_per_second": 189.001,
157
- "eval_steps_per_second": 23.63,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
- "learning_rate": 4.6489939150878154e-05,
163
- "loss": 0.8943,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
- "eval_accuracy": 0.7037895028006362,
169
- "eval_loss": 1.0182340145111084,
170
- "eval_runtime": 153.2325,
171
- "eval_samples_per_second": 188.746,
172
- "eval_steps_per_second": 23.598,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
- "learning_rate": 4.389952980224035e-05,
178
- "loss": 0.8756,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
- "eval_accuracy": 0.7108083811631284,
184
- "eval_loss": 1.0010631084442139,
185
- "eval_runtime": 153.3911,
186
- "eval_samples_per_second": 188.551,
187
- "eval_steps_per_second": 23.574,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
- "learning_rate": 4.130652745125155e-05,
193
- "loss": 0.8657,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
- "eval_accuracy": 0.7074199571260632,
199
- "eval_loss": 1.0035113096237183,
200
- "eval_runtime": 150.8806,
201
- "eval_samples_per_second": 191.688,
202
- "eval_steps_per_second": 23.966,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
- "learning_rate": 3.871352510026276e-05,
208
- "loss": 0.8737,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
- "eval_accuracy": 0.710220593319964,
214
- "eval_loss": 0.9962999820709229,
215
- "eval_runtime": 153.1018,
216
- "eval_samples_per_second": 188.907,
217
- "eval_steps_per_second": 23.618,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
- "learning_rate": 3.6120522749273955e-05,
223
- "loss": 0.7893,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
- "eval_accuracy": 0.708872138856234,
229
- "eval_loss": 1.020798921585083,
230
- "eval_runtime": 154.3576,
231
- "eval_samples_per_second": 187.37,
232
- "eval_steps_per_second": 23.426,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
- "learning_rate": 3.352752039828516e-05,
238
- "loss": 0.7067,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
- "eval_accuracy": 0.7075582601479843,
244
- "eval_loss": 1.0219132900238037,
245
- "eval_runtime": 152.5343,
246
- "eval_samples_per_second": 189.61,
247
- "eval_steps_per_second": 23.706,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
- "learning_rate": 3.093451804729636e-05,
253
- "loss": 0.7072,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
- "eval_accuracy": 0.7095982297213194,
259
- "eval_loss": 1.0180853605270386,
260
- "eval_runtime": 154.4198,
261
- "eval_samples_per_second": 187.295,
262
- "eval_steps_per_second": 23.417,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
- "learning_rate": 2.834410869865855e-05,
268
- "loss": 0.6914,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
- "eval_accuracy": 0.7122605628932992,
274
- "eval_loss": 1.0164895057678223,
275
- "eval_runtime": 151.314,
276
- "eval_samples_per_second": 191.139,
277
- "eval_steps_per_second": 23.897,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
- "learning_rate": 2.575369935002074e-05,
283
- "loss": 0.7044,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
- "eval_accuracy": 0.71236429015974,
289
- "eval_loss": 1.01729416847229,
290
- "eval_runtime": 151.1469,
291
- "eval_samples_per_second": 191.35,
292
- "eval_steps_per_second": 23.924,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
- "learning_rate": 2.3160696999031945e-05,
298
- "loss": 0.7014,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
- "eval_accuracy": 0.7145425627549962,
304
- "eval_loss": 1.0055793523788452,
305
- "eval_runtime": 153.0154,
306
- "eval_samples_per_second": 189.014,
307
- "eval_steps_per_second": 23.632,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
- "learning_rate": 2.0567694648043142e-05,
313
- "loss": 0.6997,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
- "eval_accuracy": 0.7116381992946545,
319
- "eval_loss": 1.0049206018447876,
320
- "eval_runtime": 154.2346,
321
- "eval_samples_per_second": 187.52,
322
- "eval_steps_per_second": 23.445,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
- "learning_rate": 1.7974692297054347e-05,
328
- "loss": 0.6378,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
- "eval_accuracy": 0.7104971993638061,
334
- "eval_loss": 1.035260558128357,
335
- "eval_runtime": 154.7462,
336
- "eval_samples_per_second": 186.9,
337
- "eval_steps_per_second": 23.367,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
- "learning_rate": 1.538168994606555e-05,
343
- "loss": 0.5446,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
- "eval_accuracy": 0.7085609570569117,
349
- "eval_loss": 1.0574305057525635,
350
- "eval_runtime": 153.225,
351
- "eval_samples_per_second": 188.755,
352
- "eval_steps_per_second": 23.599,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
- "learning_rate": 1.2788687595076752e-05,
358
- "loss": 0.5307,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
- "eval_accuracy": 0.7079040177027868,
364
- "eval_loss": 1.0585498809814453,
365
- "eval_runtime": 150.418,
366
- "eval_samples_per_second": 192.277,
367
- "eval_steps_per_second": 24.04,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
- "learning_rate": 1.0198278246438942e-05,
373
- "loss": 0.5269,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
- "eval_accuracy": 0.7093907751884378,
379
- "eval_loss": 1.0660898685455322,
380
- "eval_runtime": 153.521,
381
- "eval_samples_per_second": 188.391,
382
- "eval_steps_per_second": 23.554,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
- "learning_rate": 7.607868897801134e-06,
388
- "loss": 0.525,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
- "eval_accuracy": 0.7103934720973654,
394
- "eval_loss": 1.059892177581787,
395
- "eval_runtime": 153.2046,
396
- "eval_samples_per_second": 188.78,
397
- "eval_steps_per_second": 23.602,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
- "learning_rate": 5.0148665468123355e-06,
403
- "loss": 0.516,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
- "eval_accuracy": 0.7111195629624507,
409
- "eval_loss": 1.0657610893249512,
410
- "eval_runtime": 152.7663,
411
- "eval_samples_per_second": 189.322,
412
- "eval_steps_per_second": 23.67,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
- "learning_rate": 2.4244571981745263e-06,
418
- "loss": 0.5224,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
- "eval_accuracy": 0.7099785630316022,
424
- "eval_loss": 1.059259295463562,
425
- "eval_runtime": 152.2364,
426
- "eval_samples_per_second": 189.981,
427
- "eval_steps_per_second": 23.753,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.0,
432
  "step": 28924,
433
  "total_flos": 7.174773947150918e+19,
434
- "train_loss": 0.8206716175314105,
435
- "train_runtime": 13827.0136,
436
- "train_samples_per_second": 66.935,
437
- "train_steps_per_second": 2.092
438
  }
439
  ],
440
  "max_steps": 28924,
 
1
  {
2
+ "best_metric": 0.9746271967887878,
3
+ "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-21000",
4
  "epoch": 4.0,
5
  "global_step": 28924,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "learning_rate": 2.4135665883003737e-05,
13
+ "loss": 2.0439,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
+ "eval_accuracy": 0.6367125371689372,
19
+ "eval_loss": 1.4945471286773682,
20
+ "eval_runtime": 155.4022,
21
+ "eval_samples_per_second": 186.111,
22
+ "eval_steps_per_second": 23.269,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
+ "learning_rate": 2.327133176600747e-05,
28
+ "loss": 1.3784,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
+ "eval_accuracy": 0.6662748081045571,
34
+ "eval_loss": 1.2658541202545166,
35
+ "eval_runtime": 152.9449,
36
+ "eval_samples_per_second": 189.101,
37
+ "eval_steps_per_second": 23.642,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
+ "learning_rate": 2.2408726317245195e-05,
43
+ "loss": 1.2223,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
+ "eval_accuracy": 0.6780305649678445,
49
+ "eval_loss": 1.176424503326416,
50
+ "eval_runtime": 155.5666,
51
+ "eval_samples_per_second": 185.914,
52
+ "eval_steps_per_second": 23.244,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
+ "learning_rate": 2.1544392200248927e-05,
58
+ "loss": 1.153,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
+ "eval_accuracy": 0.687227715925593,
64
+ "eval_loss": 1.115257740020752,
65
+ "eval_runtime": 154.7152,
66
+ "eval_samples_per_second": 186.937,
67
+ "eval_steps_per_second": 23.372,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "learning_rate": 2.0680058083252663e-05,
73
+ "loss": 1.1037,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "eval_accuracy": 0.6912385035613028,
79
+ "eval_loss": 1.0900990962982178,
80
+ "eval_runtime": 154.22,
81
+ "eval_samples_per_second": 187.537,
82
+ "eval_steps_per_second": 23.447,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
+ "learning_rate": 1.9816588300373394e-05,
88
+ "loss": 1.0827,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
+ "eval_accuracy": 0.6933822004010788,
94
+ "eval_loss": 1.068312406539917,
95
+ "eval_runtime": 150.0826,
96
+ "eval_samples_per_second": 192.707,
97
+ "eval_steps_per_second": 24.093,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
+ "learning_rate": 1.8953118517494124e-05,
103
+ "loss": 1.0646,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "eval_accuracy": 0.7026485028697877,
109
+ "eval_loss": 1.037534475326538,
110
+ "eval_runtime": 148.9104,
111
+ "eval_samples_per_second": 194.224,
112
+ "eval_steps_per_second": 24.283,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
+ "learning_rate": 1.8088784400497857e-05,
118
+ "loss": 0.9782,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
+ "eval_accuracy": 0.6986722909895581,
124
+ "eval_loss": 1.0466938018798828,
125
+ "eval_runtime": 149.1228,
126
+ "eval_samples_per_second": 193.948,
127
+ "eval_steps_per_second": 24.248,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
+ "learning_rate": 1.7225314617618587e-05,
133
+ "loss": 0.9627,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
+ "eval_accuracy": 0.7060023511513727,
139
+ "eval_loss": 1.0178191661834717,
140
+ "eval_runtime": 149.6361,
141
+ "eval_samples_per_second": 193.282,
142
+ "eval_steps_per_second": 24.165,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
+ "learning_rate": 1.6360980500622323e-05,
148
+ "loss": 0.9411,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
+ "eval_accuracy": 0.7056911693520503,
154
+ "eval_loss": 1.0125459432601929,
155
+ "eval_runtime": 148.9456,
156
+ "eval_samples_per_second": 194.178,
157
+ "eval_steps_per_second": 24.277,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
+ "learning_rate": 1.5496646383626052e-05,
163
+ "loss": 0.9333,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
+ "eval_accuracy": 0.7067630177719383,
169
+ "eval_loss": 1.0095747709274292,
170
+ "eval_runtime": 149.6511,
171
+ "eval_samples_per_second": 193.263,
172
+ "eval_steps_per_second": 24.163,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
+ "learning_rate": 1.4633176600746785e-05,
178
+ "loss": 0.9333,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
+ "eval_accuracy": 0.7047576239540834,
184
+ "eval_loss": 1.0087416172027588,
185
+ "eval_runtime": 149.4203,
186
+ "eval_samples_per_second": 193.561,
187
+ "eval_steps_per_second": 24.2,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
+ "learning_rate": 1.3768842483750519e-05,
193
+ "loss": 0.9227,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "eval_accuracy": 0.7118456538275362,
199
+ "eval_loss": 0.9883840084075928,
200
+ "eval_runtime": 149.2315,
201
+ "eval_samples_per_second": 193.806,
202
+ "eval_steps_per_second": 24.231,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
+ "learning_rate": 1.290537270087125e-05,
208
+ "loss": 0.9131,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
+ "eval_accuracy": 0.7127100477145426,
214
+ "eval_loss": 0.986507773399353,
215
+ "eval_runtime": 149.078,
216
+ "eval_samples_per_second": 194.006,
217
+ "eval_steps_per_second": 24.256,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
+ "learning_rate": 1.2041038583874984e-05,
223
+ "loss": 0.8703,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
+ "eval_accuracy": 0.7143696839775949,
229
+ "eval_loss": 0.9831659197807312,
230
+ "eval_runtime": 150.612,
231
+ "eval_samples_per_second": 192.03,
232
+ "eval_steps_per_second": 24.009,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
+ "learning_rate": 1.1177568800995714e-05,
238
+ "loss": 0.8064,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
+ "eval_accuracy": 0.7136435931125095,
244
+ "eval_loss": 0.9858880043029785,
245
+ "eval_runtime": 149.8659,
246
+ "eval_samples_per_second": 192.986,
247
+ "eval_steps_per_second": 24.128,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
+ "learning_rate": 1.0313234683999447e-05,
253
+ "loss": 0.8131,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
+ "eval_accuracy": 0.7127100477145426,
259
+ "eval_loss": 0.9879944920539856,
260
+ "eval_runtime": 150.5293,
261
+ "eval_samples_per_second": 192.135,
262
+ "eval_steps_per_second": 24.022,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
+ "learning_rate": 9.448900567003181e-06,
268
+ "loss": 0.8064,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
+ "eval_accuracy": 0.7113270174953322,
274
+ "eval_loss": 0.9872538447380066,
275
+ "eval_runtime": 149.9196,
276
+ "eval_samples_per_second": 192.917,
277
+ "eval_steps_per_second": 24.12,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
+ "learning_rate": 8.584566450006915e-06,
283
+ "loss": 0.812,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
+ "eval_accuracy": 0.7153723808865223,
289
+ "eval_loss": 0.9793804883956909,
290
+ "eval_runtime": 151.3957,
291
+ "eval_samples_per_second": 191.036,
292
+ "eval_steps_per_second": 23.884,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
+ "learning_rate": 7.721096667127644e-06,
298
+ "loss": 0.7977,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
+ "eval_accuracy": 0.7151649263536408,
304
+ "eval_loss": 0.9755117297172546,
305
+ "eval_runtime": 144.4577,
306
+ "eval_samples_per_second": 200.211,
307
+ "eval_steps_per_second": 25.032,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
+ "learning_rate": 6.856762550131379e-06,
313
+ "loss": 0.8118,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
+ "eval_accuracy": 0.7164788050618907,
319
+ "eval_loss": 0.9746271967887878,
320
+ "eval_runtime": 155.3329,
321
+ "eval_samples_per_second": 186.194,
322
+ "eval_steps_per_second": 23.279,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
+ "learning_rate": 5.992428433135113e-06,
328
+ "loss": 0.7741,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
+ "eval_accuracy": 0.7189336837009889,
334
+ "eval_loss": 0.9803725481033325,
335
+ "eval_runtime": 153.5991,
336
+ "eval_samples_per_second": 188.295,
337
+ "eval_steps_per_second": 23.542,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
+ "learning_rate": 5.128958650255843e-06,
343
+ "loss": 0.7202,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
+ "eval_accuracy": 0.71371274462347,
349
+ "eval_loss": 0.9932627081871033,
350
+ "eval_runtime": 153.8948,
351
+ "eval_samples_per_second": 187.934,
352
+ "eval_steps_per_second": 23.497,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
+ "learning_rate": 4.264624533259577e-06,
358
+ "loss": 0.7186,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
+ "eval_accuracy": 0.7173086231934168,
364
+ "eval_loss": 0.9890537261962891,
365
+ "eval_runtime": 154.048,
366
+ "eval_samples_per_second": 187.747,
367
+ "eval_steps_per_second": 23.473,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
+ "learning_rate": 3.4011547503803073e-06,
373
+ "loss": 0.7164,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
+ "eval_accuracy": 0.7176198049927391,
379
+ "eval_loss": 0.9812184572219849,
380
+ "eval_runtime": 153.9568,
381
+ "eval_samples_per_second": 187.858,
382
+ "eval_steps_per_second": 23.487,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
+ "learning_rate": 2.536820633384041e-06,
388
+ "loss": 0.7047,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
+ "eval_accuracy": 0.7157527141968052,
394
+ "eval_loss": 0.9885823130607605,
395
+ "eval_runtime": 155.4293,
396
+ "eval_samples_per_second": 186.078,
397
+ "eval_steps_per_second": 23.265,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
+ "learning_rate": 1.6724865163877752e-06,
403
+ "loss": 0.7241,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
+ "eval_accuracy": 0.7153032293755619,
409
+ "eval_loss": 0.9851787686347961,
410
+ "eval_runtime": 155.7881,
411
+ "eval_samples_per_second": 185.65,
412
+ "eval_steps_per_second": 23.211,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
+ "learning_rate": 8.081523993915088e-07,
418
+ "loss": 0.699,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
+ "eval_accuracy": 0.7164788050618907,
424
+ "eval_loss": 0.984923243522644,
425
+ "eval_runtime": 156.4441,
426
+ "eval_samples_per_second": 184.871,
427
+ "eval_steps_per_second": 23.114,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.0,
432
  "step": 28924,
433
  "total_flos": 7.174773947150918e+19,
434
+ "train_loss": 0.9355616505553886,
435
+ "train_runtime": 13547.1303,
436
+ "train_samples_per_second": 68.318,
437
+ "train_steps_per_second": 2.135
438
  }
439
  ],
440
  "max_steps": 28924,