aalonso-developer commited on
Commit
cc28b46
·
1 Parent(s): 540aea1

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.631180416292096,
4
- "eval_loss": 1.547638177871704,
5
- "eval_runtime": 155.3247,
6
- "eval_samples_per_second": 186.203,
7
- "eval_steps_per_second": 23.28,
8
- "train_loss": 1.8849000129177436,
9
- "train_runtime": 16318.8411,
10
- "train_samples_per_second": 70.893,
11
- "train_steps_per_second": 2.216
12
  }
 
1
  {
2
+ "epoch": 4.7,
3
+ "eval_accuracy": 0.6628518083120116,
4
+ "eval_loss": 1.1537460088729858,
5
+ "eval_runtime": 154.7616,
6
+ "eval_samples_per_second": 186.881,
7
+ "eval_steps_per_second": 23.365,
8
+ "train_loss": 1.1847389993106618,
9
+ "train_runtime": 16307.0712,
10
+ "train_samples_per_second": 70.944,
11
+ "train_steps_per_second": 2.217
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.631180416292096,
4
- "eval_loss": 1.547638177871704,
5
- "eval_runtime": 155.3247,
6
- "eval_samples_per_second": 186.203,
7
- "eval_steps_per_second": 23.28
8
  }
 
1
  {
2
+ "epoch": 4.7,
3
+ "eval_accuracy": 0.6628518083120116,
4
+ "eval_loss": 1.1537460088729858,
5
+ "eval_runtime": 154.7616,
6
+ "eval_samples_per_second": 186.881,
7
+ "eval_steps_per_second": 23.365
8
  }
runs/May31_08-09-02_adrian-development/events.out.tfevents.1685529875.adrian-development.7868.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d62d39ae5eee0fc4d3716b8f1eafe90e41f35e75d0312c80062a93ba28f83104
3
+ size 369
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 1.8849000129177436,
4
- "train_runtime": 16318.8411,
5
- "train_samples_per_second": 70.893,
6
- "train_steps_per_second": 2.216
7
  }
 
1
  {
2
+ "epoch": 4.7,
3
+ "train_loss": 1.1847389993106618,
4
+ "train_runtime": 16307.0712,
5
+ "train_samples_per_second": 70.944,
6
+ "train_steps_per_second": 2.217
7
  }
trainer_state.json CHANGED
@@ -1,565 +1,535 @@
1
  {
2
- "best_metric": 1.547638177871704,
3
- "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-36000",
4
- "epoch": 5.0,
5
- "global_step": 36155,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "learning_rate": 9.723413082561194e-07,
13
- "loss": 3.4054,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
- "eval_accuracy": 0.25032846967706246,
19
- "eval_loss": 3.1049513816833496,
20
- "eval_runtime": 156.7842,
21
- "eval_samples_per_second": 184.47,
22
- "eval_steps_per_second": 23.064,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
- "learning_rate": 9.446826165122389e-07,
28
- "loss": 2.95,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
- "eval_accuracy": 0.3431989488970334,
34
- "eval_loss": 2.8256096839904785,
35
- "eval_runtime": 156.9228,
36
- "eval_samples_per_second": 184.307,
37
- "eval_steps_per_second": 23.043,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
- "learning_rate": 9.170239247683584e-07,
43
- "loss": 2.715,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
- "eval_accuracy": 0.3754581287601134,
49
- "eval_loss": 2.6272990703582764,
50
- "eval_runtime": 146.3102,
51
- "eval_samples_per_second": 197.676,
52
- "eval_steps_per_second": 24.715,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
- "learning_rate": 8.893928917162218e-07,
58
- "loss": 2.552,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
- "eval_accuracy": 0.4142867021644423,
64
- "eval_loss": 2.4831433296203613,
65
- "eval_runtime": 142.6553,
66
- "eval_samples_per_second": 202.74,
67
- "eval_steps_per_second": 25.348,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
- "learning_rate": 8.617341999723413e-07,
73
- "loss": 2.4189,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
- "eval_accuracy": 0.4471336698706867,
79
- "eval_loss": 2.364607095718384,
80
- "eval_runtime": 142.2186,
81
- "eval_samples_per_second": 203.363,
82
- "eval_steps_per_second": 25.426,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
- "learning_rate": 8.341031669202047e-07,
88
- "loss": 2.306,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
- "eval_accuracy": 0.4672567595601964,
94
- "eval_loss": 2.266972303390503,
95
- "eval_runtime": 142.0527,
96
- "eval_samples_per_second": 203.6,
97
- "eval_steps_per_second": 25.455,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
- "learning_rate": 8.064444751763241e-07,
103
- "loss": 2.2189,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
- "eval_accuracy": 0.4896272733559228,
109
- "eval_loss": 2.1854753494262695,
110
- "eval_runtime": 144.7118,
111
- "eval_samples_per_second": 199.859,
112
- "eval_steps_per_second": 24.988,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
- "learning_rate": 7.787857834324436e-07,
118
- "loss": 2.1523,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
- "eval_accuracy": 0.5122743931954913,
124
- "eval_loss": 2.113058567047119,
125
- "eval_runtime": 143.2513,
126
- "eval_samples_per_second": 201.897,
127
- "eval_steps_per_second": 25.242,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
- "learning_rate": 7.51154750380307e-07,
133
- "loss": 2.0765,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
- "eval_accuracy": 0.529423967913699,
139
- "eval_loss": 2.0503833293914795,
140
- "eval_runtime": 142.002,
141
- "eval_samples_per_second": 203.673,
142
- "eval_steps_per_second": 25.464,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
- "learning_rate": 7.234960586364265e-07,
148
- "loss": 2.0082,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
- "eval_accuracy": 0.5425973307516769,
154
- "eval_loss": 1.9951448440551758,
155
- "eval_runtime": 144.1539,
156
- "eval_samples_per_second": 200.633,
157
- "eval_steps_per_second": 25.084,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
- "learning_rate": 6.958926842760338e-07,
163
- "loss": 1.9663,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
- "eval_accuracy": 0.5556669663232142,
169
- "eval_loss": 1.9460337162017822,
170
- "eval_runtime": 144.6987,
171
- "eval_samples_per_second": 199.877,
172
- "eval_steps_per_second": 24.99,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
- "learning_rate": 6.682339925321532e-07,
178
- "loss": 1.9155,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
- "eval_accuracy": 0.5670769656316991,
184
- "eval_loss": 1.9018137454986572,
185
- "eval_runtime": 143.4532,
186
- "eval_samples_per_second": 201.613,
187
- "eval_steps_per_second": 25.207,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
- "learning_rate": 6.406029594800165e-07,
193
- "loss": 1.8748,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
- "eval_accuracy": 0.5768619044326119,
199
- "eval_loss": 1.862241506576538,
200
- "eval_runtime": 143.567,
201
- "eval_samples_per_second": 201.453,
202
- "eval_steps_per_second": 25.187,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
- "learning_rate": 6.129442677361361e-07,
208
- "loss": 1.8318,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
- "eval_accuracy": 0.5785215406956642,
214
- "eval_loss": 1.8258068561553955,
215
- "eval_runtime": 144.3395,
216
- "eval_samples_per_second": 200.375,
217
- "eval_steps_per_second": 25.052,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
- "learning_rate": 5.852855759922556e-07,
223
- "loss": 1.7969,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
- "eval_accuracy": 0.5844339948827882,
229
- "eval_loss": 1.7949597835540771,
230
- "eval_runtime": 142.3732,
231
- "eval_samples_per_second": 203.142,
232
- "eval_steps_per_second": 25.398,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
- "learning_rate": 5.57626884248375e-07,
238
- "loss": 1.769,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
- "eval_accuracy": 0.5934928428186156,
244
- "eval_loss": 1.7645893096923828,
245
- "eval_runtime": 143.1571,
246
- "eval_samples_per_second": 202.03,
247
- "eval_steps_per_second": 25.259,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
- "learning_rate": 5.299958511962384e-07,
253
- "loss": 1.7303,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
- "eval_accuracy": 0.597987691031049,
259
- "eval_loss": 1.7381689548492432,
260
- "eval_runtime": 146.7645,
261
- "eval_samples_per_second": 197.064,
262
- "eval_steps_per_second": 24.638,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
- "learning_rate": 5.023648181441018e-07,
268
- "loss": 1.7159,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
- "eval_accuracy": 0.6031394785976073,
274
- "eval_loss": 1.714737892150879,
275
- "eval_runtime": 145.394,
276
- "eval_samples_per_second": 198.922,
277
- "eval_steps_per_second": 24.87,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
- "learning_rate": 4.7470612640022124e-07,
283
- "loss": 1.6935,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
- "eval_accuracy": 0.6080492358758038,
289
- "eval_loss": 1.6934996843338013,
290
- "eval_runtime": 143.1955,
291
- "eval_samples_per_second": 201.976,
292
- "eval_steps_per_second": 25.252,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
- "learning_rate": 4.4704743465634077e-07,
298
- "loss": 1.6843,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
- "eval_accuracy": 0.6099163266717378,
304
- "eval_loss": 1.674344778060913,
305
- "eval_runtime": 143.1944,
306
- "eval_samples_per_second": 201.977,
307
- "eval_steps_per_second": 25.252,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
- "learning_rate": 4.1938874291246025e-07,
313
- "loss": 1.6495,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
- "eval_accuracy": 0.6116105386902704,
319
- "eval_loss": 1.6570900678634644,
320
- "eval_runtime": 142.9785,
321
- "eval_samples_per_second": 202.282,
322
- "eval_steps_per_second": 25.291,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
- "learning_rate": 3.917577098603236e-07,
328
- "loss": 1.6281,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
- "eval_accuracy": 0.6163474171910657,
334
- "eval_loss": 1.6406831741333008,
335
- "eval_runtime": 144.1751,
336
- "eval_samples_per_second": 200.603,
337
- "eval_steps_per_second": 25.081,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
- "learning_rate": 3.6409901811644307e-07,
343
- "loss": 1.6161,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
- "eval_accuracy": 0.6166931747458682,
349
- "eval_loss": 1.628000259399414,
350
- "eval_runtime": 142.9419,
351
- "eval_samples_per_second": 202.334,
352
- "eval_steps_per_second": 25.297,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
- "learning_rate": 3.3646798506430647e-07,
358
- "loss": 1.6003,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
- "eval_accuracy": 0.6204273563377359,
364
- "eval_loss": 1.6148841381072998,
365
- "eval_runtime": 144.0133,
366
- "eval_samples_per_second": 200.829,
367
- "eval_steps_per_second": 25.109,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
- "learning_rate": 3.0880929332042595e-07,
373
- "loss": 1.5973,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
- "eval_accuracy": 0.6222252956227093,
379
- "eval_loss": 1.6035106182098389,
380
- "eval_runtime": 141.8894,
381
- "eval_samples_per_second": 203.835,
382
- "eval_steps_per_second": 25.485,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
- "learning_rate": 2.811782602682893e-07,
388
- "loss": 1.58,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
- "eval_accuracy": 0.6232625682871171,
394
- "eval_loss": 1.593165636062622,
395
- "eval_runtime": 142.737,
396
- "eval_samples_per_second": 202.624,
397
- "eval_steps_per_second": 25.333,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
- "learning_rate": 2.535195685244088e-07,
403
- "loss": 1.5887,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
- "eval_accuracy": 0.6248876287946892,
409
- "eval_loss": 1.5843583345413208,
410
- "eval_runtime": 142.0792,
411
- "eval_samples_per_second": 203.562,
412
- "eval_steps_per_second": 25.451,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
- "learning_rate": 2.2586087678052825e-07,
418
- "loss": 1.5583,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
- "eval_accuracy": 0.626616416568702,
424
- "eval_loss": 1.576346516609192,
425
- "eval_runtime": 142.1019,
426
- "eval_samples_per_second": 203.53,
427
- "eval_steps_per_second": 25.447,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.01,
432
- "learning_rate": 1.9820218503664775e-07,
433
- "loss": 1.5576,
434
  "step": 29000
435
  },
436
  {
437
  "epoch": 4.01,
438
- "eval_accuracy": 0.6278957195214715,
439
- "eval_loss": 1.5693974494934082,
440
- "eval_runtime": 142.1243,
441
- "eval_samples_per_second": 203.498,
442
- "eval_steps_per_second": 25.443,
443
  "step": 29000
444
  },
445
  {
446
  "epoch": 4.15,
447
- "learning_rate": 1.7054349329276723e-07,
448
- "loss": 1.5538,
449
  "step": 30000
450
  },
451
  {
452
  "epoch": 4.15,
453
- "eval_accuracy": 0.6281031740543531,
454
- "eval_loss": 1.564090371131897,
455
- "eval_runtime": 141.9612,
456
- "eval_samples_per_second": 203.732,
457
- "eval_steps_per_second": 25.472,
458
  "step": 30000
459
  },
460
  {
461
  "epoch": 4.29,
462
- "learning_rate": 1.429124602406306e-07,
463
- "loss": 1.5462,
464
  "step": 31000
465
  },
466
  {
467
  "epoch": 4.29,
468
- "eval_accuracy": 0.6282760528317544,
469
- "eval_loss": 1.5589792728424072,
470
- "eval_runtime": 141.9942,
471
- "eval_samples_per_second": 203.684,
472
- "eval_steps_per_second": 25.466,
473
  "step": 31000
474
  },
475
  {
476
  "epoch": 4.43,
477
- "learning_rate": 1.152537684967501e-07,
478
- "loss": 1.5428,
479
  "step": 32000
480
  },
481
  {
482
  "epoch": 4.43,
483
- "eval_accuracy": 0.630246870894129,
484
- "eval_loss": 1.5552754402160645,
485
- "eval_runtime": 153.7134,
486
- "eval_samples_per_second": 188.155,
487
- "eval_steps_per_second": 23.524,
488
  "step": 32000
489
  },
490
  {
491
  "epoch": 4.56,
492
- "learning_rate": 8.762273544461347e-08,
493
- "loss": 1.5294,
494
  "step": 33000
495
  },
496
  {
497
  "epoch": 4.56,
498
- "eval_accuracy": 0.6294862042735634,
499
- "eval_loss": 1.552242398262024,
500
- "eval_runtime": 153.3276,
501
- "eval_samples_per_second": 188.629,
502
- "eval_steps_per_second": 23.583,
503
  "step": 33000
504
  },
505
  {
506
  "epoch": 4.7,
507
- "learning_rate": 5.999170239247684e-08,
508
- "loss": 1.5362,
509
  "step": 34000
510
  },
511
  {
512
  "epoch": 4.7,
513
- "eval_accuracy": 0.6300048406057672,
514
- "eval_loss": 1.5497843027114868,
515
- "eval_runtime": 153.8161,
516
- "eval_samples_per_second": 188.03,
517
- "eval_steps_per_second": 23.509,
518
  "step": 34000
519
  },
520
  {
521
- "epoch": 4.84,
522
- "learning_rate": 3.233301064859632e-08,
523
- "loss": 1.5302,
524
- "step": 35000
525
- },
526
- {
527
- "epoch": 4.84,
528
- "eval_accuracy": 0.6304543254270106,
529
- "eval_loss": 1.5483404397964478,
530
- "eval_runtime": 153.9073,
531
- "eval_samples_per_second": 187.918,
532
- "eval_steps_per_second": 23.495,
533
- "step": 35000
534
- },
535
- {
536
- "epoch": 4.98,
537
- "learning_rate": 4.701977596459688e-09,
538
- "loss": 1.5222,
539
- "step": 36000
540
- },
541
- {
542
- "epoch": 4.98,
543
- "eval_accuracy": 0.631180416292096,
544
- "eval_loss": 1.547638177871704,
545
- "eval_runtime": 153.639,
546
- "eval_samples_per_second": 188.247,
547
- "eval_steps_per_second": 23.536,
548
- "step": 36000
549
- },
550
- {
551
- "epoch": 5.0,
552
- "step": 36155,
553
- "total_flos": 8.968467433938647e+19,
554
- "train_loss": 1.8849000129177436,
555
- "train_runtime": 16318.8411,
556
- "train_samples_per_second": 70.893,
557
- "train_steps_per_second": 2.216
558
  }
559
  ],
560
  "max_steps": 36155,
561
  "num_train_epochs": 5,
562
- "total_flos": 8.968467433938647e+19,
563
  "trial_name": null,
564
  "trial_params": null
565
  }
 
1
  {
2
+ "best_metric": 1.1537460088729858,
3
+ "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-29000",
4
+ "epoch": 4.701977596459687,
5
+ "global_step": 34000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "learning_rate": 0.0004861844834739317,
13
+ "loss": 1.7696,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
+ "eval_accuracy": 0.51210151441809,
19
+ "eval_loss": 1.6692757606506348,
20
+ "eval_runtime": 156.1015,
21
+ "eval_samples_per_second": 185.277,
22
+ "eval_steps_per_second": 23.164,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
+ "learning_rate": 0.0004723551376019915,
28
+ "loss": 1.5848,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
+ "eval_accuracy": 0.5409031187331443,
34
+ "eval_loss": 1.545974850654602,
35
+ "eval_runtime": 152.8778,
36
+ "eval_samples_per_second": 189.184,
37
+ "eval_steps_per_second": 23.653,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
+ "learning_rate": 0.00045852579173005114,
43
+ "loss": 1.5409,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
+ "eval_accuracy": 0.5449830578798147,
49
+ "eval_loss": 1.539915680885315,
50
+ "eval_runtime": 149.8345,
51
+ "eval_samples_per_second": 193.026,
52
+ "eval_steps_per_second": 24.133,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
+ "learning_rate": 0.0004447102752039829,
58
+ "loss": 1.5147,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
+ "eval_accuracy": 0.5655210566350874,
64
+ "eval_loss": 1.4725779294967651,
65
+ "eval_runtime": 149.8923,
66
+ "eval_samples_per_second": 192.952,
67
+ "eval_steps_per_second": 24.124,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "learning_rate": 0.0004308809293320426,
73
+ "loss": 1.4834,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "eval_accuracy": 0.5699121775810801,
79
+ "eval_loss": 1.4570878744125366,
80
+ "eval_runtime": 150.1106,
81
+ "eval_samples_per_second": 192.671,
82
+ "eval_steps_per_second": 24.089,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
+ "learning_rate": 0.0004170515834601024,
88
+ "loss": 1.4598,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
+ "eval_accuracy": 0.5605421478459305,
94
+ "eval_loss": 1.4731388092041016,
95
+ "eval_runtime": 149.8373,
96
+ "eval_samples_per_second": 193.023,
97
+ "eval_steps_per_second": 24.133,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
+ "learning_rate": 0.00040322223758816204,
103
+ "loss": 1.4368,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "eval_accuracy": 0.5819099647327294,
109
+ "eval_loss": 1.405070424079895,
110
+ "eval_runtime": 150.0888,
111
+ "eval_samples_per_second": 192.699,
112
+ "eval_steps_per_second": 24.092,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
+ "learning_rate": 0.0003893928917162218,
118
+ "loss": 1.3777,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
+ "eval_accuracy": 0.59408063066178,
124
+ "eval_loss": 1.3815604448318481,
125
+ "eval_runtime": 149.8631,
126
+ "eval_samples_per_second": 192.989,
127
+ "eval_steps_per_second": 24.129,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
+ "learning_rate": 0.0003755773751901535,
133
+ "loss": 1.3634,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
+ "eval_accuracy": 0.6013761150681143,
139
+ "eval_loss": 1.356782078742981,
140
+ "eval_runtime": 155.0119,
141
+ "eval_samples_per_second": 186.579,
142
+ "eval_steps_per_second": 23.327,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
+ "learning_rate": 0.0003617480293182133,
148
+ "loss": 1.3494,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
+ "eval_accuracy": 0.5920406610884448,
154
+ "eval_loss": 1.3675092458724976,
155
+ "eval_runtime": 158.2083,
156
+ "eval_samples_per_second": 182.81,
157
+ "eval_steps_per_second": 22.856,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
+ "learning_rate": 0.000347918683446273,
163
+ "loss": 1.3231,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
+ "eval_accuracy": 0.5963280547679967,
169
+ "eval_loss": 1.3448883295059204,
170
+ "eval_runtime": 157.2868,
171
+ "eval_samples_per_second": 183.881,
172
+ "eval_steps_per_second": 22.99,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
+ "learning_rate": 0.0003341031669202047,
178
+ "loss": 1.3001,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
+ "eval_accuracy": 0.6059401147915082,
184
+ "eval_loss": 1.334527850151062,
185
+ "eval_runtime": 155.8494,
186
+ "eval_samples_per_second": 185.577,
187
+ "eval_steps_per_second": 23.202,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
+ "learning_rate": 0.0003202738210482644,
193
+ "loss": 1.2931,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "eval_accuracy": 0.6064933268791923,
199
+ "eval_loss": 1.3300402164459229,
200
+ "eval_runtime": 154.1543,
201
+ "eval_samples_per_second": 187.617,
202
+ "eval_steps_per_second": 23.457,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
+ "learning_rate": 0.0003064444751763242,
208
+ "loss": 1.2888,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
+ "eval_accuracy": 0.6109881750916257,
214
+ "eval_loss": 1.3210688829421997,
215
+ "eval_runtime": 153.4104,
216
+ "eval_samples_per_second": 188.527,
217
+ "eval_steps_per_second": 23.571,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
+ "learning_rate": 0.0002926151293043839,
223
+ "loss": 1.2316,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
+ "eval_accuracy": 0.6146186294170528,
229
+ "eval_loss": 1.301633358001709,
230
+ "eval_runtime": 153.9979,
231
+ "eval_samples_per_second": 187.808,
232
+ "eval_steps_per_second": 23.481,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
+ "learning_rate": 0.0002787996127783156,
238
+ "loss": 1.196,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
+ "eval_accuracy": 0.6186985685637231,
244
+ "eval_loss": 1.2850406169891357,
245
+ "eval_runtime": 156.035,
246
+ "eval_samples_per_second": 185.356,
247
+ "eval_steps_per_second": 23.174,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
+ "learning_rate": 0.0002649702669063753,
253
+ "loss": 1.1827,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
+ "eval_accuracy": 0.62499135606113,
259
+ "eval_loss": 1.2636860609054565,
260
+ "eval_runtime": 153.9031,
261
+ "eval_samples_per_second": 187.923,
262
+ "eval_steps_per_second": 23.495,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
+ "learning_rate": 0.000251154750380307,
268
+ "loss": 1.1656,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
+ "eval_accuracy": 0.6329783555770694,
274
+ "eval_loss": 1.2347491979599,
275
+ "eval_runtime": 156.2721,
276
+ "eval_samples_per_second": 185.075,
277
+ "eval_steps_per_second": 23.139,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
+ "learning_rate": 0.00023732540450836675,
283
+ "loss": 1.1589,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
+ "eval_accuracy": 0.6352603554387664,
289
+ "eval_loss": 1.2342965602874756,
290
+ "eval_runtime": 154.9782,
291
+ "eval_samples_per_second": 186.62,
292
+ "eval_steps_per_second": 23.332,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
+ "learning_rate": 0.0002234960586364265,
298
+ "loss": 1.1458,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
+ "eval_accuracy": 0.6324251434893853,
304
+ "eval_loss": 1.2315857410430908,
305
+ "eval_runtime": 153.0744,
306
+ "eval_samples_per_second": 188.941,
307
+ "eval_steps_per_second": 23.622,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
+ "learning_rate": 0.0002096805421103582,
313
+ "loss": 1.1517,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
+ "eval_accuracy": 0.6440425973307516,
319
+ "eval_loss": 1.2039456367492676,
320
+ "eval_runtime": 155.3634,
321
+ "eval_samples_per_second": 186.157,
322
+ "eval_steps_per_second": 23.274,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
+ "learning_rate": 0.00019585119623841794,
328
+ "loss": 1.0928,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
+ "eval_accuracy": 0.6439042943088307,
334
+ "eval_loss": 1.2146044969558716,
335
+ "eval_runtime": 154.9132,
336
+ "eval_samples_per_second": 186.698,
337
+ "eval_steps_per_second": 23.342,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
+ "learning_rate": 0.00018202185036647766,
343
+ "loss": 1.0052,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
+ "eval_accuracy": 0.6462554456814882,
349
+ "eval_loss": 1.200515866279602,
350
+ "eval_runtime": 154.8032,
351
+ "eval_samples_per_second": 186.831,
352
+ "eval_steps_per_second": 23.359,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
+ "learning_rate": 0.0001681925044945374,
358
+ "loss": 1.0028,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
+ "eval_accuracy": 0.6495055666966323,
364
+ "eval_loss": 1.1917015314102173,
365
+ "eval_runtime": 153.9041,
366
+ "eval_samples_per_second": 187.922,
367
+ "eval_steps_per_second": 23.495,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
+ "learning_rate": 0.00015437698796846912,
373
+ "loss": 1.0131,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
+ "eval_accuracy": 0.6534817785768618,
379
+ "eval_loss": 1.1741726398468018,
380
+ "eval_runtime": 153.9239,
381
+ "eval_samples_per_second": 187.898,
382
+ "eval_steps_per_second": 23.492,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
+ "learning_rate": 0.00014054764209652884,
388
+ "loss": 1.0048,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
+ "eval_accuracy": 0.6559020814604799,
394
+ "eval_loss": 1.1676703691482544,
395
+ "eval_runtime": 156.008,
396
+ "eval_samples_per_second": 185.388,
397
+ "eval_steps_per_second": 23.178,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
+ "learning_rate": 0.0001267182962245886,
403
+ "loss": 0.9948,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
+ "eval_accuracy": 0.661814535647604,
409
+ "eval_loss": 1.1556403636932373,
410
+ "eval_runtime": 154.0881,
411
+ "eval_samples_per_second": 187.698,
412
+ "eval_steps_per_second": 23.467,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
+ "learning_rate": 0.00011288895035264832,
418
+ "loss": 0.9767,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
+ "eval_accuracy": 0.6612958993154,
424
+ "eval_loss": 1.1548937559127808,
425
+ "eval_runtime": 153.955,
426
+ "eval_samples_per_second": 187.86,
427
+ "eval_steps_per_second": 23.487,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.01,
432
+ "learning_rate": 9.907343382658e-05,
433
+ "loss": 0.9648,
434
  "step": 29000
435
  },
436
  {
437
  "epoch": 4.01,
438
+ "eval_accuracy": 0.6628518083120116,
439
+ "eval_loss": 1.1537460088729858,
440
+ "eval_runtime": 154.4616,
441
+ "eval_samples_per_second": 187.244,
442
+ "eval_steps_per_second": 23.41,
443
  "step": 29000
444
  },
445
  {
446
  "epoch": 4.15,
447
+ "learning_rate": 8.524408795463976e-05,
448
+ "loss": 0.7937,
449
  "step": 30000
450
  },
451
  {
452
  "epoch": 4.15,
453
+ "eval_accuracy": 0.6646843233524653,
454
+ "eval_loss": 1.1759380102157593,
455
+ "eval_runtime": 154.4382,
456
+ "eval_samples_per_second": 187.272,
457
+ "eval_steps_per_second": 23.414,
458
  "step": 30000
459
  },
460
  {
461
  "epoch": 4.29,
462
+ "learning_rate": 7.142857142857142e-05,
463
+ "loss": 0.7809,
464
  "step": 31000
465
  },
466
  {
467
  "epoch": 4.29,
468
+ "eval_accuracy": 0.6660327778161953,
469
+ "eval_loss": 1.1789319515228271,
470
+ "eval_runtime": 156.6898,
471
+ "eval_samples_per_second": 184.581,
472
+ "eval_steps_per_second": 23.077,
473
  "step": 31000
474
  },
475
  {
476
  "epoch": 4.43,
477
+ "learning_rate": 5.759922555663117e-05,
478
+ "loss": 0.7919,
479
  "step": 32000
480
  },
481
  {
482
  "epoch": 4.43,
483
+ "eval_accuracy": 0.6640273839983404,
484
+ "eval_loss": 1.1803646087646484,
485
+ "eval_runtime": 153.9058,
486
+ "eval_samples_per_second": 187.92,
487
+ "eval_steps_per_second": 23.495,
488
  "step": 32000
489
  },
490
  {
491
  "epoch": 4.56,
492
+ "learning_rate": 4.3783709030562856e-05,
493
+ "loss": 0.7747,
494
  "step": 33000
495
  },
496
  {
497
  "epoch": 4.56,
498
+ "eval_accuracy": 0.6665859899038794,
499
+ "eval_loss": 1.1784451007843018,
500
+ "eval_runtime": 154.9201,
501
+ "eval_samples_per_second": 186.69,
502
+ "eval_steps_per_second": 23.341,
503
  "step": 33000
504
  },
505
  {
506
  "epoch": 4.7,
507
+ "learning_rate": 2.9954363158622597e-05,
508
+ "loss": 0.7669,
509
  "step": 34000
510
  },
511
  {
512
  "epoch": 4.7,
513
+ "eval_accuracy": 0.6704584745176682,
514
+ "eval_loss": 1.1698390245437622,
515
+ "eval_runtime": 153.1559,
516
+ "eval_samples_per_second": 188.84,
517
+ "eval_steps_per_second": 23.61,
518
  "step": 34000
519
  },
520
  {
521
+ "epoch": 4.7,
522
+ "step": 34000,
523
+ "total_flos": 8.43398821843071e+19,
524
+ "train_loss": 1.1847389993106618,
525
+ "train_runtime": 16307.0712,
526
+ "train_samples_per_second": 70.944,
527
+ "train_steps_per_second": 2.217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  }
529
  ],
530
  "max_steps": 36155,
531
  "num_train_epochs": 5,
532
+ "total_flos": 8.43398821843071e+19,
533
  "trial_name": null,
534
  "trial_params": null
535
  }