aalonso-developer commited on
Commit
b0b6e49
·
1 Parent(s): 7c72e8c

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 0.97,
3
- "eval_accuracy": 0.24808104557084573,
4
- "eval_loss": 2.6692957878112793,
5
- "eval_runtime": 153.5066,
6
- "eval_samples_per_second": 188.409,
7
- "eval_steps_per_second": 23.556,
8
- "train_loss": 1.2184505571637836,
9
- "train_runtime": 3232.5785,
10
- "train_samples_per_second": 357.883,
11
- "train_steps_per_second": 11.185
12
  }
 
1
  {
2
+ "epoch": 3.6,
3
+ "eval_accuracy": 0.7120531083604177,
4
+ "eval_loss": 1.0266574621200562,
5
+ "eval_runtime": 155.4593,
6
+ "eval_samples_per_second": 186.042,
7
+ "eval_steps_per_second": 23.26,
8
+ "train_loss": 1.0817440608097957,
9
+ "train_runtime": 12426.2916,
10
+ "train_samples_per_second": 93.1,
11
+ "train_steps_per_second": 2.91
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.97,
3
- "eval_accuracy": 0.24808104557084573,
4
- "eval_loss": 2.6692957878112793,
5
- "eval_runtime": 153.5066,
6
- "eval_samples_per_second": 188.409,
7
- "eval_steps_per_second": 23.556
8
  }
 
1
  {
2
+ "epoch": 3.6,
3
+ "eval_accuracy": 0.7120531083604177,
4
+ "eval_loss": 1.0266574621200562,
5
+ "eval_runtime": 155.4593,
6
+ "eval_samples_per_second": 186.042,
7
+ "eval_steps_per_second": 23.26
8
  }
runs/May30_14-50-58_adrian-development/events.out.tfevents.1685463710.adrian-development.97399.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fd99b8df63d4946d5622301444a8e7c90db53de0ef0b3d1586916ef048e3d08
3
+ size 369
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 0.97,
3
- "train_loss": 1.2184505571637836,
4
- "train_runtime": 3232.5785,
5
- "train_samples_per_second": 357.883,
6
- "train_steps_per_second": 11.185
7
  }
 
1
  {
2
+ "epoch": 3.6,
3
+ "train_loss": 1.0817440608097957,
4
+ "train_runtime": 12426.2916,
5
+ "train_samples_per_second": 93.1,
6
+ "train_steps_per_second": 2.91
7
  }
trainer_state.json CHANGED
@@ -1,130 +1,415 @@
1
  {
2
- "best_metric": 2.6692957878112793,
3
- "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-2000",
4
- "epoch": 0.968054211035818,
5
- "global_step": 7000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "learning_rate": 0.0019446826165122391,
13
- "loss": 2.7222,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
- "eval_accuracy": 0.25281792407164094,
19
- "eval_loss": 2.676572322845459,
20
- "eval_runtime": 156.0521,
21
- "eval_samples_per_second": 185.336,
22
- "eval_steps_per_second": 23.172,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
- "learning_rate": 0.001889586502558429,
28
- "loss": 2.6768,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
- "eval_accuracy": 0.24808104557084573,
34
- "eval_loss": 2.6692957878112793,
35
- "eval_runtime": 154.1979,
36
- "eval_samples_per_second": 187.564,
37
- "eval_steps_per_second": 23.45,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
- "learning_rate": 0.0018360392753422764,
43
- "loss": 2.6689,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
- "eval_accuracy": 0.22304819860313949,
49
- "eval_loss": NaN,
50
- "eval_runtime": 151.9095,
51
- "eval_samples_per_second": 190.39,
52
- "eval_steps_per_second": 23.804,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
- "learning_rate": 0.0017879131517079244,
58
- "loss": 0.4613,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
- "eval_accuracy": 0.051241269621741234,
64
- "eval_loss": NaN,
65
- "eval_runtime": 152.778,
66
- "eval_samples_per_second": 189.307,
67
- "eval_steps_per_second": 23.668,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
- "learning_rate": 0.0017325957682201632,
73
- "loss": 0.0,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
- "eval_accuracy": 0.051241269621741234,
79
- "eval_loss": NaN,
80
- "eval_runtime": 152.584,
81
- "eval_samples_per_second": 189.548,
82
- "eval_steps_per_second": 23.698,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
- "learning_rate": 0.0016772783847324023,
88
- "loss": 0.0,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
- "eval_accuracy": 0.051241269621741234,
94
- "eval_loss": NaN,
95
- "eval_runtime": 152.8491,
96
- "eval_samples_per_second": 189.219,
97
- "eval_steps_per_second": 23.657,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
- "learning_rate": 0.001621961001244641,
103
- "loss": 0.0,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
- "eval_accuracy": 0.051241269621741234,
109
- "eval_loss": NaN,
110
- "eval_runtime": 151.1266,
111
- "eval_samples_per_second": 191.376,
112
- "eval_steps_per_second": 23.927,
113
  "step": 7000
114
  },
115
  {
116
- "epoch": 0.97,
117
- "step": 7000,
118
- "total_flos": 1.736505102237696e+19,
119
- "train_loss": 1.2184505571637836,
120
- "train_runtime": 3232.5785,
121
- "train_samples_per_second": 357.883,
122
- "train_steps_per_second": 11.185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  ],
125
  "max_steps": 36155,
126
  "num_train_epochs": 5,
127
- "total_flos": 1.736505102237696e+19,
128
  "trial_name": null,
129
  "trial_params": null
130
  }
 
1
  {
2
+ "best_metric": 1.0266574621200562,
3
+ "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-21000",
4
+ "epoch": 3.5956299267044667,
5
+ "global_step": 26000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "learning_rate": 9.723413082561196e-06,
13
+ "loss": 2.4732,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
+ "eval_accuracy": 0.5465735426319065,
19
+ "eval_loss": 1.9312118291854858,
20
+ "eval_runtime": 155.4998,
21
+ "eval_samples_per_second": 185.994,
22
+ "eval_steps_per_second": 23.254,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
+ "learning_rate": 9.44682616512239e-06,
28
+ "loss": 1.7297,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
+ "eval_accuracy": 0.6410345066039693,
34
+ "eval_loss": 1.5575428009033203,
35
+ "eval_runtime": 154.3161,
36
+ "eval_samples_per_second": 187.42,
37
+ "eval_steps_per_second": 23.432,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
+ "learning_rate": 9.170515834601023e-06,
43
+ "loss": 1.4684,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
+ "eval_accuracy": 0.6578383237673743,
49
+ "eval_loss": 1.3884613513946533,
50
+ "eval_runtime": 156.4025,
51
+ "eval_samples_per_second": 184.92,
52
+ "eval_steps_per_second": 23.12,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
+ "learning_rate": 8.89392891716222e-06,
58
+ "loss": 1.3161,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
+ "eval_accuracy": 0.6701472927183459,
64
+ "eval_loss": 1.2846978902816772,
65
+ "eval_runtime": 154.8377,
66
+ "eval_samples_per_second": 186.789,
67
+ "eval_steps_per_second": 23.353,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "learning_rate": 8.617618586640853e-06,
73
+ "loss": 1.2652,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "eval_accuracy": 0.680381716340502,
79
+ "eval_loss": 1.2144489288330078,
80
+ "eval_runtime": 153.0458,
81
+ "eval_samples_per_second": 188.976,
82
+ "eval_steps_per_second": 23.627,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
+ "learning_rate": 8.341031669202048e-06,
88
+ "loss": 1.1962,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
+ "eval_accuracy": 0.6856718069289814,
94
+ "eval_loss": 1.1739206314086914,
95
+ "eval_runtime": 153.7562,
96
+ "eval_samples_per_second": 188.103,
97
+ "eval_steps_per_second": 23.518,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
+ "learning_rate": 8.064444751763242e-06,
103
+ "loss": 1.1536,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "eval_accuracy": 0.6906852914736187,
109
+ "eval_loss": 1.1354219913482666,
110
+ "eval_runtime": 154.4082,
111
+ "eval_samples_per_second": 187.309,
112
+ "eval_steps_per_second": 23.418,
113
  "step": 7000
114
  },
115
  {
116
+ "epoch": 1.11,
117
+ "learning_rate": 7.788134421241876e-06,
118
+ "loss": 1.0931,
119
+ "step": 8000
120
+ },
121
+ {
122
+ "epoch": 1.11,
123
+ "eval_accuracy": 0.6934859276675195,
124
+ "eval_loss": 1.1159623861312866,
125
+ "eval_runtime": 153.8754,
126
+ "eval_samples_per_second": 187.957,
127
+ "eval_steps_per_second": 23.5,
128
+ "step": 8000
129
+ },
130
+ {
131
+ "epoch": 1.24,
132
+ "learning_rate": 7.51182409072051e-06,
133
+ "loss": 1.0486,
134
+ "step": 9000
135
+ },
136
+ {
137
+ "epoch": 1.24,
138
+ "eval_accuracy": 0.6964940183943019,
139
+ "eval_loss": 1.1065187454223633,
140
+ "eval_runtime": 153.5864,
141
+ "eval_samples_per_second": 188.311,
142
+ "eval_steps_per_second": 23.544,
143
+ "step": 9000
144
+ },
145
+ {
146
+ "epoch": 1.38,
147
+ "learning_rate": 7.235513760199143e-06,
148
+ "loss": 1.0242,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 1.38,
153
+ "eval_accuracy": 0.6990180485443607,
154
+ "eval_loss": 1.080113410949707,
155
+ "eval_runtime": 154.1408,
156
+ "eval_samples_per_second": 187.634,
157
+ "eval_steps_per_second": 23.459,
158
+ "step": 10000
159
+ },
160
+ {
161
+ "epoch": 1.52,
162
+ "learning_rate": 6.9589268427603384e-06,
163
+ "loss": 1.0203,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 1.52,
168
+ "eval_accuracy": 0.7030634119355508,
169
+ "eval_loss": 1.0677887201309204,
170
+ "eval_runtime": 153.95,
171
+ "eval_samples_per_second": 187.866,
172
+ "eval_steps_per_second": 23.488,
173
+ "step": 11000
174
+ },
175
+ {
176
+ "epoch": 1.66,
177
+ "learning_rate": 6.682339925321532e-06,
178
+ "loss": 1.0079,
179
+ "step": 12000
180
+ },
181
+ {
182
+ "epoch": 1.66,
183
+ "eval_accuracy": 0.703754927045156,
184
+ "eval_loss": 1.0623527765274048,
185
+ "eval_runtime": 155.0978,
186
+ "eval_samples_per_second": 186.476,
187
+ "eval_steps_per_second": 23.314,
188
+ "step": 12000
189
+ },
190
+ {
191
+ "epoch": 1.8,
192
+ "learning_rate": 6.405753007882728e-06,
193
+ "loss": 0.9962,
194
+ "step": 13000
195
+ },
196
+ {
197
+ "epoch": 1.8,
198
+ "eval_accuracy": 0.7038932300670769,
199
+ "eval_loss": 1.0550236701965332,
200
+ "eval_runtime": 154.4093,
201
+ "eval_samples_per_second": 187.307,
202
+ "eval_steps_per_second": 23.418,
203
+ "step": 13000
204
+ },
205
+ {
206
+ "epoch": 1.94,
207
+ "learning_rate": 6.1291660904439225e-06,
208
+ "loss": 0.9975,
209
+ "step": 14000
210
+ },
211
+ {
212
+ "epoch": 1.94,
213
+ "eval_accuracy": 0.7048613512205242,
214
+ "eval_loss": 1.0496357679367065,
215
+ "eval_runtime": 153.3268,
216
+ "eval_samples_per_second": 188.63,
217
+ "eval_steps_per_second": 23.584,
218
+ "step": 14000
219
+ },
220
+ {
221
+ "epoch": 2.07,
222
+ "learning_rate": 5.852579173005117e-06,
223
+ "loss": 0.9572,
224
+ "step": 15000
225
+ },
226
+ {
227
+ "epoch": 2.07,
228
+ "eval_accuracy": 0.7075928359034644,
229
+ "eval_loss": 1.0450878143310547,
230
+ "eval_runtime": 153.3954,
231
+ "eval_samples_per_second": 188.545,
232
+ "eval_steps_per_second": 23.573,
233
+ "step": 15000
234
+ },
235
+ {
236
+ "epoch": 2.21,
237
+ "learning_rate": 5.575992255566313e-06,
238
+ "loss": 0.8944,
239
+ "step": 16000
240
+ },
241
+ {
242
+ "epoch": 2.21,
243
+ "eval_accuracy": 0.7076274116589447,
244
+ "eval_loss": 1.0498133897781372,
245
+ "eval_runtime": 155.6828,
246
+ "eval_samples_per_second": 185.775,
247
+ "eval_steps_per_second": 23.227,
248
+ "step": 16000
249
+ },
250
+ {
251
+ "epoch": 2.35,
252
+ "learning_rate": 5.2999585119623845e-06,
253
+ "loss": 0.9027,
254
+ "step": 17000
255
+ },
256
+ {
257
+ "epoch": 2.35,
258
+ "eval_accuracy": 0.707938593458267,
259
+ "eval_loss": 1.0396690368652344,
260
+ "eval_runtime": 162.118,
261
+ "eval_samples_per_second": 178.401,
262
+ "eval_steps_per_second": 22.305,
263
+ "step": 17000
264
+ },
265
+ {
266
+ "epoch": 2.49,
267
+ "learning_rate": 5.023371594523579e-06,
268
+ "loss": 0.8806,
269
+ "step": 18000
270
+ },
271
+ {
272
+ "epoch": 2.49,
273
+ "eval_accuracy": 0.7092870479219971,
274
+ "eval_loss": 1.0353764295578003,
275
+ "eval_runtime": 157.661,
276
+ "eval_samples_per_second": 183.444,
277
+ "eval_steps_per_second": 22.935,
278
+ "step": 18000
279
+ },
280
+ {
281
+ "epoch": 2.63,
282
+ "learning_rate": 4.747061264002213e-06,
283
+ "loss": 0.8968,
284
+ "step": 19000
285
+ },
286
+ {
287
+ "epoch": 2.63,
288
+ "eval_accuracy": 0.709010441878155,
289
+ "eval_loss": 1.0362025499343872,
290
+ "eval_runtime": 159.5334,
291
+ "eval_samples_per_second": 181.291,
292
+ "eval_steps_per_second": 22.666,
293
+ "step": 19000
294
+ },
295
+ {
296
+ "epoch": 2.77,
297
+ "learning_rate": 4.470474346563408e-06,
298
+ "loss": 0.8895,
299
+ "step": 20000
300
+ },
301
+ {
302
+ "epoch": 2.77,
303
+ "eval_accuracy": 0.7106009266302469,
304
+ "eval_loss": 1.0285927057266235,
305
+ "eval_runtime": 158.0807,
306
+ "eval_samples_per_second": 182.957,
307
+ "eval_steps_per_second": 22.874,
308
+ "step": 20000
309
+ },
310
+ {
311
+ "epoch": 2.9,
312
+ "learning_rate": 4.193887429124603e-06,
313
+ "loss": 0.8764,
314
+ "step": 21000
315
+ },
316
+ {
317
+ "epoch": 2.9,
318
+ "eval_accuracy": 0.7120531083604177,
319
+ "eval_loss": 1.0266574621200562,
320
+ "eval_runtime": 157.7263,
321
+ "eval_samples_per_second": 183.368,
322
+ "eval_steps_per_second": 22.926,
323
+ "step": 21000
324
+ },
325
+ {
326
+ "epoch": 3.04,
327
+ "learning_rate": 3.917300511685797e-06,
328
+ "loss": 0.8495,
329
+ "step": 22000
330
+ },
331
+ {
332
+ "epoch": 3.04,
333
+ "eval_accuracy": 0.7089412903671946,
334
+ "eval_loss": 1.0354034900665283,
335
+ "eval_runtime": 157.704,
336
+ "eval_samples_per_second": 183.394,
337
+ "eval_steps_per_second": 22.929,
338
+ "step": 22000
339
+ },
340
+ {
341
+ "epoch": 3.18,
342
+ "learning_rate": 3.6412667680818703e-06,
343
+ "loss": 0.7935,
344
+ "step": 23000
345
+ },
346
+ {
347
+ "epoch": 3.18,
348
+ "eval_accuracy": 0.7067630177719383,
349
+ "eval_loss": 1.0451409816741943,
350
+ "eval_runtime": 158.4061,
351
+ "eval_samples_per_second": 182.581,
352
+ "eval_steps_per_second": 22.827,
353
+ "step": 23000
354
+ },
355
+ {
356
+ "epoch": 3.32,
357
+ "learning_rate": 3.364679850643065e-06,
358
+ "loss": 0.7876,
359
+ "step": 24000
360
+ },
361
+ {
362
+ "epoch": 3.32,
363
+ "eval_accuracy": 0.7097019569877602,
364
+ "eval_loss": 1.0455540418624878,
365
+ "eval_runtime": 158.0269,
366
+ "eval_samples_per_second": 183.02,
367
+ "eval_steps_per_second": 22.882,
368
+ "step": 24000
369
+ },
370
+ {
371
+ "epoch": 3.46,
372
+ "learning_rate": 3.0880929332042597e-06,
373
+ "loss": 0.8093,
374
+ "step": 25000
375
+ },
376
+ {
377
+ "epoch": 3.46,
378
+ "eval_accuracy": 0.7088029873452735,
379
+ "eval_loss": 1.04353928565979,
380
+ "eval_runtime": 159.3368,
381
+ "eval_samples_per_second": 181.515,
382
+ "eval_steps_per_second": 22.694,
383
+ "step": 25000
384
+ },
385
+ {
386
+ "epoch": 3.6,
387
+ "learning_rate": 2.8117826026828933e-06,
388
+ "loss": 0.7977,
389
+ "step": 26000
390
+ },
391
+ {
392
+ "epoch": 3.6,
393
+ "eval_accuracy": 0.7094599266993984,
394
+ "eval_loss": 1.0454202890396118,
395
+ "eval_runtime": 156.4417,
396
+ "eval_samples_per_second": 184.874,
397
+ "eval_steps_per_second": 23.114,
398
+ "step": 26000
399
+ },
400
+ {
401
+ "epoch": 3.6,
402
+ "step": 26000,
403
+ "total_flos": 6.449527242554296e+19,
404
+ "train_loss": 1.0817440608097957,
405
+ "train_runtime": 12426.2916,
406
+ "train_samples_per_second": 93.1,
407
+ "train_steps_per_second": 2.91
408
  }
409
  ],
410
  "max_steps": 36155,
411
  "num_train_epochs": 5,
412
+ "total_flos": 6.449527242554296e+19,
413
  "trial_name": null,
414
  "trial_params": null
415
  }