aalonso-developer commited on
Commit
70dbafb
·
1 Parent(s): 7e065ec

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.7126063204481018,
4
- "eval_loss": 0.9990608096122742,
5
- "eval_runtime": 155.7648,
6
- "eval_samples_per_second": 185.677,
7
- "eval_steps_per_second": 23.214,
8
- "train_loss": 0.9726295792995491,
9
- "train_runtime": 14146.7049,
10
- "train_samples_per_second": 65.422,
11
- "train_steps_per_second": 2.045
12
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7155106839084434,
4
+ "eval_loss": 0.9953664541244507,
5
+ "eval_runtime": 153.0701,
6
+ "eval_samples_per_second": 188.946,
7
+ "eval_steps_per_second": 23.623,
8
+ "train_loss": 0.9035214643197474,
9
+ "train_runtime": 13914.0149,
10
+ "train_samples_per_second": 66.516,
11
+ "train_steps_per_second": 2.079
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.7126063204481018,
4
- "eval_loss": 0.9990608096122742,
5
- "eval_runtime": 155.7648,
6
- "eval_samples_per_second": 185.677,
7
- "eval_steps_per_second": 23.214
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7155106839084434,
4
+ "eval_loss": 0.9953664541244507,
5
+ "eval_runtime": 153.0701,
6
+ "eval_samples_per_second": 188.946,
7
+ "eval_steps_per_second": 23.623
8
  }
runs/Jun09_16-17-56_adrian-development/events.out.tfevents.1686334369.adrian-development.7991.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:592626b8cebeedc1bb9fe80ce040a64910094eb0f5325b096d5c36f8bdfec215
3
+ size 369
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 4.0,
3
- "train_loss": 0.9726295792995491,
4
- "train_runtime": 14146.7049,
5
- "train_samples_per_second": 65.422,
6
- "train_steps_per_second": 2.045
7
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "train_loss": 0.9035214643197474,
4
+ "train_runtime": 13914.0149,
5
+ "train_samples_per_second": 66.516,
6
+ "train_steps_per_second": 2.079
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9990608096122742,
3
- "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-21000",
4
  "epoch": 4.0,
5
  "global_step": 28924,
6
  "is_hyper_param_search": false,
@@ -9,432 +9,432 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "learning_rate": 1.930853270640299e-05,
13
- "loss": 2.1442,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
- "eval_accuracy": 0.6229513864877948,
19
- "eval_loss": 1.6088389158248901,
20
- "eval_runtime": 166.6999,
21
- "eval_samples_per_second": 173.497,
22
- "eval_steps_per_second": 21.692,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
- "learning_rate": 1.861844834739317e-05,
28
- "loss": 1.4492,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
- "eval_accuracy": 0.6612267478044396,
34
- "eval_loss": 1.3310657739639282,
35
- "eval_runtime": 159.1133,
36
- "eval_samples_per_second": 181.77,
37
- "eval_steps_per_second": 22.726,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
- "learning_rate": 1.7926981053796158e-05,
43
- "loss": 1.2669,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
- "eval_accuracy": 0.6771315953253578,
49
- "eval_loss": 1.2137295007705688,
50
- "eval_runtime": 158.2416,
51
- "eval_samples_per_second": 182.771,
52
- "eval_steps_per_second": 22.851,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
- "learning_rate": 1.7235513760199145e-05,
58
- "loss": 1.1901,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
- "eval_accuracy": 0.6789641103658115,
64
- "eval_loss": 1.1688439846038818,
65
- "eval_runtime": 159.3121,
66
- "eval_samples_per_second": 181.543,
67
- "eval_steps_per_second": 22.698,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
- "learning_rate": 1.6544737933895728e-05,
73
- "loss": 1.1535,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
- "eval_accuracy": 0.6876080492358758,
79
- "eval_loss": 1.122076153755188,
80
- "eval_runtime": 159.944,
81
- "eval_samples_per_second": 180.826,
82
- "eval_steps_per_second": 22.608,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
- "learning_rate": 1.5853270640298714e-05,
88
- "loss": 1.1027,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
- "eval_accuracy": 0.6927252610469539,
94
- "eval_loss": 1.092621922492981,
95
- "eval_runtime": 160.5515,
96
- "eval_samples_per_second": 180.142,
97
- "eval_steps_per_second": 22.522,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
- "learning_rate": 1.5162494813995299e-05,
103
- "loss": 1.0715,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
- "eval_accuracy": 0.6932438973791577,
109
- "eval_loss": 1.0782523155212402,
110
- "eval_runtime": 160.1635,
111
- "eval_samples_per_second": 180.578,
112
- "eval_steps_per_second": 22.577,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
- "learning_rate": 1.4471027520398286e-05,
118
- "loss": 1.0052,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
- "eval_accuracy": 0.7011963211396169,
124
- "eval_loss": 1.0529263019561768,
125
- "eval_runtime": 159.1353,
126
- "eval_samples_per_second": 181.745,
127
- "eval_steps_per_second": 22.723,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
- "learning_rate": 1.378025169409487e-05,
133
- "loss": 0.9777,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
- "eval_accuracy": 0.7021990180485443,
139
- "eval_loss": 1.044997215270996,
140
- "eval_runtime": 157.3717,
141
- "eval_samples_per_second": 183.782,
142
- "eval_steps_per_second": 22.977,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
- "learning_rate": 1.3088784400497859e-05,
148
- "loss": 0.9798,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
- "eval_accuracy": 0.7012654726505774,
154
- "eval_loss": 1.044914960861206,
155
- "eval_runtime": 157.2779,
156
- "eval_samples_per_second": 183.891,
157
- "eval_steps_per_second": 22.991,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
- "learning_rate": 1.2397317106900844e-05,
163
- "loss": 0.9473,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
- "eval_accuracy": 0.7041006845999586,
169
- "eval_loss": 1.0282418727874756,
170
- "eval_runtime": 159.9465,
171
- "eval_samples_per_second": 180.823,
172
- "eval_steps_per_second": 22.608,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
- "learning_rate": 1.1706541280597427e-05,
178
- "loss": 0.9463,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
- "eval_accuracy": 0.6971855335039071,
184
- "eval_loss": 1.04224693775177,
185
- "eval_runtime": 160.2925,
186
- "eval_samples_per_second": 180.433,
187
- "eval_steps_per_second": 22.559,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
- "learning_rate": 1.1015765454294012e-05,
193
- "loss": 0.9508,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
- "eval_accuracy": 0.7025793513588272,
199
- "eval_loss": 1.0288690328598022,
200
- "eval_runtime": 164.2839,
201
- "eval_samples_per_second": 176.049,
202
- "eval_steps_per_second": 22.011,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
- "learning_rate": 1.0324298160697e-05,
208
- "loss": 0.9447,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
- "eval_accuracy": 0.7070396238157803,
214
- "eval_loss": 1.0159052610397339,
215
- "eval_runtime": 159.9439,
216
- "eval_samples_per_second": 180.826,
217
- "eval_steps_per_second": 22.608,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
- "learning_rate": 9.632830867099987e-06,
223
- "loss": 0.8831,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
- "eval_accuracy": 0.7081806237466288,
229
- "eval_loss": 1.0163402557373047,
230
- "eval_runtime": 159.7911,
231
- "eval_samples_per_second": 180.999,
232
- "eval_steps_per_second": 22.63,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
- "learning_rate": 8.941363573502974e-06,
238
- "loss": 0.8581,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
- "eval_accuracy": 0.7075582601479843,
244
- "eval_loss": 1.0116521120071411,
245
- "eval_runtime": 157.9996,
246
- "eval_samples_per_second": 183.051,
247
- "eval_steps_per_second": 22.886,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
- "learning_rate": 8.249896279905962e-06,
253
- "loss": 0.8453,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
- "eval_accuracy": 0.7117765023165756,
259
- "eval_loss": 1.004479169845581,
260
- "eval_runtime": 163.1784,
261
- "eval_samples_per_second": 177.242,
262
- "eval_steps_per_second": 22.16,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
- "learning_rate": 7.559120453602545e-06,
268
- "loss": 0.8422,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
- "eval_accuracy": 0.7068321692828988,
274
- "eval_loss": 1.0124915838241577,
275
- "eval_runtime": 158.2344,
276
- "eval_samples_per_second": 182.779,
277
- "eval_steps_per_second": 22.852,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
- "learning_rate": 6.86834462729913e-06,
283
- "loss": 0.8451,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
- "eval_accuracy": 0.7045155936657216,
289
- "eval_loss": 1.0194756984710693,
290
- "eval_runtime": 159.3072,
291
- "eval_samples_per_second": 181.549,
292
- "eval_steps_per_second": 22.698,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
- "learning_rate": 6.176877333702116e-06,
298
- "loss": 0.8348,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
- "eval_accuracy": 0.7130903810248254,
304
- "eval_loss": 0.9999533891677856,
305
- "eval_runtime": 158.3242,
306
- "eval_samples_per_second": 182.676,
307
- "eval_steps_per_second": 22.839,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
- "learning_rate": 5.485410040105103e-06,
313
- "loss": 0.8319,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
- "eval_accuracy": 0.7126063204481018,
319
- "eval_loss": 0.9990608096122742,
320
- "eval_runtime": 158.4722,
321
- "eval_samples_per_second": 182.505,
322
- "eval_steps_per_second": 22.818,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
- "learning_rate": 4.793942746508091e-06,
328
- "loss": 0.8165,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
- "eval_accuracy": 0.7117765023165756,
334
- "eval_loss": 1.0050508975982666,
335
- "eval_runtime": 159.8886,
336
- "eval_samples_per_second": 180.888,
337
- "eval_steps_per_second": 22.616,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
- "learning_rate": 4.103166920204675e-06,
343
- "loss": 0.7483,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
- "eval_accuracy": 0.713297835557707,
349
- "eval_loss": 1.007352352142334,
350
- "eval_runtime": 158.6943,
351
- "eval_samples_per_second": 182.25,
352
- "eval_steps_per_second": 22.786,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
- "learning_rate": 3.411699626607662e-06,
358
- "loss": 0.7689,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
- "eval_accuracy": 0.7097711084987207,
364
- "eval_loss": 1.0143715143203735,
365
- "eval_runtime": 159.7158,
366
- "eval_samples_per_second": 181.084,
367
- "eval_steps_per_second": 22.64,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
- "learning_rate": 2.7209238003042458e-06,
373
- "loss": 0.7574,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
- "eval_accuracy": 0.7122605628932992,
379
- "eval_loss": 1.0074282884597778,
380
- "eval_runtime": 155.8079,
381
- "eval_samples_per_second": 185.626,
382
- "eval_steps_per_second": 23.208,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
- "learning_rate": 2.029456506707233e-06,
388
- "loss": 0.7625,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
- "eval_accuracy": 0.7105317751192863,
394
- "eval_loss": 1.0111548900604248,
395
- "eval_runtime": 156.0076,
396
- "eval_samples_per_second": 185.388,
397
- "eval_steps_per_second": 23.178,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
- "learning_rate": 1.33798921311022e-06,
403
- "loss": 0.7667,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
- "eval_accuracy": 0.7106700781412074,
409
- "eval_loss": 1.0088056325912476,
410
- "eval_runtime": 157.0208,
411
- "eval_samples_per_second": 184.192,
412
- "eval_steps_per_second": 23.029,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
- "learning_rate": 6.465219195132071e-07,
418
- "loss": 0.7472,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
- "eval_accuracy": 0.7116727750501348,
424
- "eval_loss": 1.008212924003601,
425
- "eval_runtime": 155.8357,
426
- "eval_samples_per_second": 185.593,
427
- "eval_steps_per_second": 23.204,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.0,
432
  "step": 28924,
433
  "total_flos": 7.174773947150918e+19,
434
- "train_loss": 0.9726295792995491,
435
- "train_runtime": 14146.7049,
436
- "train_samples_per_second": 65.422,
437
- "train_steps_per_second": 2.045
438
  }
439
  ],
440
  "max_steps": 28924,
 
1
  {
2
+ "best_metric": 0.9953664541244507,
3
+ "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-20000",
4
  "epoch": 4.0,
5
  "global_step": 28924,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "learning_rate": 2.8963836260544875e-05,
13
+ "loss": 1.9495,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
+ "eval_accuracy": 0.6307309314708527,
19
+ "eval_loss": 1.4553368091583252,
20
+ "eval_runtime": 164.2927,
21
+ "eval_samples_per_second": 176.039,
22
+ "eval_steps_per_second": 22.009,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
+ "learning_rate": 2.7926635320149357e-05,
28
+ "loss": 1.3079,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
+ "eval_accuracy": 0.6676924140792476,
34
+ "eval_loss": 1.2347172498703003,
35
+ "eval_runtime": 157.0046,
36
+ "eval_samples_per_second": 184.211,
37
+ "eval_steps_per_second": 23.031,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
+ "learning_rate": 2.6890471580694235e-05,
43
+ "loss": 1.178,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
+ "eval_accuracy": 0.6757831408616278,
49
+ "eval_loss": 1.1606581211090088,
50
+ "eval_runtime": 154.1382,
51
+ "eval_samples_per_second": 187.637,
52
+ "eval_steps_per_second": 23.459,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
+ "learning_rate": 2.5853270640298713e-05,
58
+ "loss": 1.1324,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
+ "eval_accuracy": 0.682387110158357,
64
+ "eval_loss": 1.1307393312454224,
65
+ "eval_runtime": 154.2255,
66
+ "eval_samples_per_second": 187.531,
67
+ "eval_steps_per_second": 23.446,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "learning_rate": 2.4816069699903195e-05,
73
+ "loss": 1.0928,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "eval_accuracy": 0.6908927460065002,
79
+ "eval_loss": 1.0955638885498047,
80
+ "eval_runtime": 152.3863,
81
+ "eval_samples_per_second": 189.794,
82
+ "eval_steps_per_second": 23.729,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
+ "learning_rate": 2.3778868759507677e-05,
88
+ "loss": 1.0679,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
+ "eval_accuracy": 0.6912385035613028,
94
+ "eval_loss": 1.0789850950241089,
95
+ "eval_runtime": 153.243,
96
+ "eval_samples_per_second": 188.733,
97
+ "eval_steps_per_second": 23.597,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
+ "learning_rate": 2.274166781911216e-05,
103
+ "loss": 1.0488,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "eval_accuracy": 0.7013691999170182,
109
+ "eval_loss": 1.0485832691192627,
110
+ "eval_runtime": 155.3074,
111
+ "eval_samples_per_second": 186.224,
112
+ "eval_steps_per_second": 23.283,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
+ "learning_rate": 2.1705504079657033e-05,
118
+ "loss": 0.9548,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
+ "eval_accuracy": 0.70161123020538,
124
+ "eval_loss": 1.0449484586715698,
125
+ "eval_runtime": 154.3409,
126
+ "eval_samples_per_second": 187.39,
127
+ "eval_steps_per_second": 23.429,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
+ "learning_rate": 2.0668303139261512e-05,
133
+ "loss": 0.9352,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
+ "eval_accuracy": 0.7042044118663993,
139
+ "eval_loss": 1.034779667854309,
140
+ "eval_runtime": 155.2786,
141
+ "eval_samples_per_second": 186.259,
142
+ "eval_steps_per_second": 23.287,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
+ "learning_rate": 1.963213939980639e-05,
148
+ "loss": 0.9164,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
+ "eval_accuracy": 0.7033745937348731,
154
+ "eval_loss": 1.0340265035629272,
155
+ "eval_runtime": 154.4587,
156
+ "eval_samples_per_second": 187.247,
157
+ "eval_steps_per_second": 23.411,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
+ "learning_rate": 1.8595975660351264e-05,
163
+ "loss": 0.9267,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
+ "eval_accuracy": 0.7089412903671946,
169
+ "eval_loss": 1.0178329944610596,
170
+ "eval_runtime": 153.0672,
171
+ "eval_samples_per_second": 188.95,
172
+ "eval_steps_per_second": 23.624,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
+ "learning_rate": 1.7558774719955746e-05,
178
+ "loss": 0.9058,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
+ "eval_accuracy": 0.7063481087061753,
184
+ "eval_loss": 1.0159752368927002,
185
+ "eval_runtime": 153.6019,
186
+ "eval_samples_per_second": 188.292,
187
+ "eval_steps_per_second": 23.541,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
+ "learning_rate": 1.6521573779560228e-05,
193
+ "loss": 0.9028,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "eval_accuracy": 0.7110504114514902,
199
+ "eval_loss": 1.0084210634231567,
200
+ "eval_runtime": 154.4213,
201
+ "eval_samples_per_second": 187.293,
202
+ "eval_steps_per_second": 23.416,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
+ "learning_rate": 1.548437283916471e-05,
208
+ "loss": 0.9093,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
+ "eval_accuracy": 0.7136090173570292,
214
+ "eval_loss": 1.00092351436615,
215
+ "eval_runtime": 151.8217,
216
+ "eval_samples_per_second": 190.5,
217
+ "eval_steps_per_second": 23.817,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
+ "learning_rate": 1.4447171898769188e-05,
223
+ "loss": 0.8346,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
+ "eval_accuracy": 0.7117073508056151,
229
+ "eval_loss": 1.0151883363723755,
230
+ "eval_runtime": 153.6777,
231
+ "eval_samples_per_second": 188.199,
232
+ "eval_steps_per_second": 23.53,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
+ "learning_rate": 1.3411008159314064e-05,
238
+ "loss": 0.7897,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
+ "eval_accuracy": 0.7141276536892331,
244
+ "eval_loss": 1.0071992874145508,
245
+ "eval_runtime": 153.402,
246
+ "eval_samples_per_second": 188.537,
247
+ "eval_steps_per_second": 23.572,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
+ "learning_rate": 1.237484441985894e-05,
253
+ "loss": 0.7869,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
+ "eval_accuracy": 0.7082843510130696,
259
+ "eval_loss": 1.0088136196136475,
260
+ "eval_runtime": 153.7395,
261
+ "eval_samples_per_second": 188.123,
262
+ "eval_steps_per_second": 23.52,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
+ "learning_rate": 1.133764347946342e-05,
268
+ "loss": 0.7853,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
+ "eval_accuracy": 0.7162021990180485,
274
+ "eval_loss": 0.9981487989425659,
275
+ "eval_runtime": 153.6335,
276
+ "eval_samples_per_second": 188.253,
277
+ "eval_steps_per_second": 23.537,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
+ "learning_rate": 1.0300442539067902e-05,
283
+ "loss": 0.7732,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
+ "eval_accuracy": 0.7148537445543185,
289
+ "eval_loss": 1.003010630607605,
290
+ "eval_runtime": 152.6681,
291
+ "eval_samples_per_second": 189.444,
292
+ "eval_steps_per_second": 23.685,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
+ "learning_rate": 9.264278799612779e-06,
298
+ "loss": 0.779,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
+ "eval_accuracy": 0.7155106839084434,
304
+ "eval_loss": 0.9953664541244507,
305
+ "eval_runtime": 154.6075,
306
+ "eval_samples_per_second": 187.067,
307
+ "eval_steps_per_second": 23.388,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
+ "learning_rate": 8.227077859217259e-06,
313
+ "loss": 0.7655,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
+ "eval_accuracy": 0.7178618352811009,
319
+ "eval_loss": 0.9971597790718079,
320
+ "eval_runtime": 153.1084,
321
+ "eval_samples_per_second": 188.899,
322
+ "eval_steps_per_second": 23.617,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
+ "learning_rate": 7.1898769188217404e-06,
328
+ "loss": 0.74,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
+ "eval_accuracy": 0.7138164718899108,
334
+ "eval_loss": 1.0114420652389526,
335
+ "eval_runtime": 152.2988,
336
+ "eval_samples_per_second": 189.903,
337
+ "eval_steps_per_second": 23.743,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
+ "learning_rate": 6.1526759784262205e-06,
343
+ "loss": 0.6824,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
+ "eval_accuracy": 0.7130212295138649,
349
+ "eval_loss": 1.017103910446167,
350
+ "eval_runtime": 152.678,
351
+ "eval_samples_per_second": 189.431,
352
+ "eval_steps_per_second": 23.684,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
+ "learning_rate": 5.116512238971097e-06,
358
+ "loss": 0.68,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
+ "eval_accuracy": 0.7177581080146601,
364
+ "eval_loss": 1.011143684387207,
365
+ "eval_runtime": 152.6609,
366
+ "eval_samples_per_second": 189.453,
367
+ "eval_steps_per_second": 23.686,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
+ "learning_rate": 4.079311298575578e-06,
373
+ "loss": 0.6787,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
+ "eval_accuracy": 0.7151303505981605,
379
+ "eval_loss": 1.0124437808990479,
380
+ "eval_runtime": 152.3072,
381
+ "eval_samples_per_second": 189.893,
382
+ "eval_steps_per_second": 23.741,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
+ "learning_rate": 3.042110358180058e-06,
388
+ "loss": 0.6808,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
+ "eval_accuracy": 0.7149920475762396,
394
+ "eval_loss": 1.0181164741516113,
395
+ "eval_runtime": 153.0013,
396
+ "eval_samples_per_second": 189.031,
397
+ "eval_steps_per_second": 23.634,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
+ "learning_rate": 2.0059466187249345e-06,
403
+ "loss": 0.6561,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
+ "eval_accuracy": 0.7168245626166931,
409
+ "eval_loss": 1.0144038200378418,
410
+ "eval_runtime": 153.8199,
411
+ "eval_samples_per_second": 188.025,
412
+ "eval_steps_per_second": 23.508,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
+ "learning_rate": 9.697828792698105e-07,
418
+ "loss": 0.6611,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
+ "eval_accuracy": 0.7155106839084434,
424
+ "eval_loss": 1.0154051780700684,
425
+ "eval_runtime": 153.3953,
426
+ "eval_samples_per_second": 188.546,
427
+ "eval_steps_per_second": 23.573,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.0,
432
  "step": 28924,
433
  "total_flos": 7.174773947150918e+19,
434
+ "train_loss": 0.9035214643197474,
435
+ "train_runtime": 13914.0149,
436
+ "train_samples_per_second": 66.516,
437
+ "train_steps_per_second": 2.079
438
  }
439
  ],
440
  "max_steps": 28924,