MichalGas commited on
Commit
c2a272b
·
1 Parent(s): f4dcf11

End of training

Browse files
Files changed (5) hide show
  1. README.md +4 -2
  2. all_results.json +12 -0
  3. eval_results.json +8 -0
  4. train_results.json +7 -0
  5. trainer_state.json +501 -0
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - imagefolder
@@ -14,7 +16,7 @@ model-index:
14
  name: Image Classification
15
  type: image-classification
16
  dataset:
17
- name: imagefolder
18
  type: imagefolder
19
  config: default
20
  split: train
@@ -30,7 +32,7 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # vit-base-mgas
32
 
33
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
  - Loss: 0.8530
36
  - Accuracy: 0.7323
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  datasets:
9
  - imagefolder
 
16
  name: Image Classification
17
  type: image-classification
18
  dataset:
19
+ name: ./mgr/dataset/HF_DS
20
  type: imagefolder
21
  config: default
22
  split: train
 
32
 
33
  # vit-base-mgas
34
 
35
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the ./mgr/dataset/HF_DS dataset.
36
  It achieves the following results on the evaluation set:
37
  - Loss: 0.8530
38
  - Accuracy: 0.7323
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.7322834645669292,
4
+ "eval_loss": 0.8530173897743225,
5
+ "eval_runtime": 10.1757,
6
+ "eval_samples_per_second": 12.481,
7
+ "eval_steps_per_second": 1.572,
8
+ "train_loss": 0.18311181501908735,
9
+ "train_runtime": 299.5589,
10
+ "train_samples_per_second": 19.061,
11
+ "train_steps_per_second": 2.387
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.7322834645669292,
4
+ "eval_loss": 0.8530173897743225,
5
+ "eval_runtime": 10.1757,
6
+ "eval_samples_per_second": 12.481,
7
+ "eval_steps_per_second": 1.572
8
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.18311181501908735,
4
+ "train_runtime": 299.5589,
5
+ "train_samples_per_second": 19.061,
6
+ "train_steps_per_second": 2.387
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8530173897743225,
3
+ "best_model_checkpoint": "./beans_outputs/checkpoint-715",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 715,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07,
13
+ "learning_rate": 1.9720279720279722e-05,
14
+ "loss": 1.7594,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.14,
19
+ "learning_rate": 1.944055944055944e-05,
20
+ "loss": 1.7247,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.21,
25
+ "learning_rate": 1.916083916083916e-05,
26
+ "loss": 1.6869,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.28,
31
+ "learning_rate": 1.888111888111888e-05,
32
+ "loss": 1.6615,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.35,
37
+ "learning_rate": 1.8601398601398602e-05,
38
+ "loss": 1.6413,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.42,
43
+ "learning_rate": 1.8321678321678323e-05,
44
+ "loss": 1.6369,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.49,
49
+ "learning_rate": 1.8041958041958044e-05,
50
+ "loss": 1.5629,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "learning_rate": 1.7762237762237765e-05,
56
+ "loss": 1.4909,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.63,
61
+ "learning_rate": 1.7482517482517486e-05,
62
+ "loss": 1.585,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.7,
67
+ "learning_rate": 1.7202797202797203e-05,
68
+ "loss": 1.4428,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.77,
73
+ "learning_rate": 1.6923076923076924e-05,
74
+ "loss": 1.4252,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.84,
79
+ "learning_rate": 1.6643356643356645e-05,
80
+ "loss": 1.3894,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.91,
85
+ "learning_rate": 1.6363636363636366e-05,
86
+ "loss": 1.4099,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.98,
91
+ "learning_rate": 1.6083916083916083e-05,
92
+ "loss": 1.4331,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 1.0,
97
+ "eval_accuracy": 0.48031496062992124,
98
+ "eval_loss": 1.380394458770752,
99
+ "eval_runtime": 13.5035,
100
+ "eval_samples_per_second": 9.405,
101
+ "eval_steps_per_second": 1.185,
102
+ "step": 143
103
+ },
104
+ {
105
+ "epoch": 1.05,
106
+ "learning_rate": 1.5804195804195804e-05,
107
+ "loss": 1.4025,
108
+ "step": 150
109
+ },
110
+ {
111
+ "epoch": 1.12,
112
+ "learning_rate": 1.5524475524475525e-05,
113
+ "loss": 1.341,
114
+ "step": 160
115
+ },
116
+ {
117
+ "epoch": 1.19,
118
+ "learning_rate": 1.5244755244755244e-05,
119
+ "loss": 1.2961,
120
+ "step": 170
121
+ },
122
+ {
123
+ "epoch": 1.26,
124
+ "learning_rate": 1.4965034965034965e-05,
125
+ "loss": 1.3105,
126
+ "step": 180
127
+ },
128
+ {
129
+ "epoch": 1.33,
130
+ "learning_rate": 1.4685314685314686e-05,
131
+ "loss": 1.2829,
132
+ "step": 190
133
+ },
134
+ {
135
+ "epoch": 1.4,
136
+ "learning_rate": 1.4405594405594407e-05,
137
+ "loss": 1.291,
138
+ "step": 200
139
+ },
140
+ {
141
+ "epoch": 1.47,
142
+ "learning_rate": 1.4125874125874126e-05,
143
+ "loss": 1.2477,
144
+ "step": 210
145
+ },
146
+ {
147
+ "epoch": 1.54,
148
+ "learning_rate": 1.3846153846153847e-05,
149
+ "loss": 1.1525,
150
+ "step": 220
151
+ },
152
+ {
153
+ "epoch": 1.61,
154
+ "learning_rate": 1.3566433566433568e-05,
155
+ "loss": 1.2458,
156
+ "step": 230
157
+ },
158
+ {
159
+ "epoch": 1.68,
160
+ "learning_rate": 1.3286713286713288e-05,
161
+ "loss": 1.1054,
162
+ "step": 240
163
+ },
164
+ {
165
+ "epoch": 1.75,
166
+ "learning_rate": 1.3006993006993008e-05,
167
+ "loss": 1.2257,
168
+ "step": 250
169
+ },
170
+ {
171
+ "epoch": 1.82,
172
+ "learning_rate": 1.2727272727272728e-05,
173
+ "loss": 1.1471,
174
+ "step": 260
175
+ },
176
+ {
177
+ "epoch": 1.89,
178
+ "learning_rate": 1.244755244755245e-05,
179
+ "loss": 1.1859,
180
+ "step": 270
181
+ },
182
+ {
183
+ "epoch": 1.96,
184
+ "learning_rate": 1.216783216783217e-05,
185
+ "loss": 1.1653,
186
+ "step": 280
187
+ },
188
+ {
189
+ "epoch": 2.0,
190
+ "eval_accuracy": 0.6850393700787402,
191
+ "eval_loss": 1.0843431949615479,
192
+ "eval_runtime": 10.8218,
193
+ "eval_samples_per_second": 11.736,
194
+ "eval_steps_per_second": 1.479,
195
+ "step": 286
196
+ },
197
+ {
198
+ "epoch": 2.03,
199
+ "learning_rate": 1.1888111888111888e-05,
200
+ "loss": 1.2361,
201
+ "step": 290
202
+ },
203
+ {
204
+ "epoch": 2.1,
205
+ "learning_rate": 1.1608391608391608e-05,
206
+ "loss": 1.1012,
207
+ "step": 300
208
+ },
209
+ {
210
+ "epoch": 2.17,
211
+ "learning_rate": 1.132867132867133e-05,
212
+ "loss": 1.0819,
213
+ "step": 310
214
+ },
215
+ {
216
+ "epoch": 2.24,
217
+ "learning_rate": 1.1048951048951048e-05,
218
+ "loss": 1.0283,
219
+ "step": 320
220
+ },
221
+ {
222
+ "epoch": 2.31,
223
+ "learning_rate": 1.076923076923077e-05,
224
+ "loss": 1.0479,
225
+ "step": 330
226
+ },
227
+ {
228
+ "epoch": 2.38,
229
+ "learning_rate": 1.048951048951049e-05,
230
+ "loss": 1.0286,
231
+ "step": 340
232
+ },
233
+ {
234
+ "epoch": 2.45,
235
+ "learning_rate": 1.0209790209790211e-05,
236
+ "loss": 1.0439,
237
+ "step": 350
238
+ },
239
+ {
240
+ "epoch": 2.52,
241
+ "learning_rate": 9.93006993006993e-06,
242
+ "loss": 1.0562,
243
+ "step": 360
244
+ },
245
+ {
246
+ "epoch": 2.59,
247
+ "learning_rate": 9.650349650349651e-06,
248
+ "loss": 1.0392,
249
+ "step": 370
250
+ },
251
+ {
252
+ "epoch": 2.66,
253
+ "learning_rate": 9.370629370629372e-06,
254
+ "loss": 1.111,
255
+ "step": 380
256
+ },
257
+ {
258
+ "epoch": 2.73,
259
+ "learning_rate": 9.090909090909091e-06,
260
+ "loss": 1.1265,
261
+ "step": 390
262
+ },
263
+ {
264
+ "epoch": 2.8,
265
+ "learning_rate": 8.811188811188812e-06,
266
+ "loss": 1.0098,
267
+ "step": 400
268
+ },
269
+ {
270
+ "epoch": 2.87,
271
+ "learning_rate": 8.531468531468533e-06,
272
+ "loss": 1.1216,
273
+ "step": 410
274
+ },
275
+ {
276
+ "epoch": 2.94,
277
+ "learning_rate": 8.251748251748254e-06,
278
+ "loss": 1.0919,
279
+ "step": 420
280
+ },
281
+ {
282
+ "epoch": 3.0,
283
+ "eval_accuracy": 0.7165354330708661,
284
+ "eval_loss": 0.9539378881454468,
285
+ "eval_runtime": 9.6357,
286
+ "eval_samples_per_second": 13.18,
287
+ "eval_steps_per_second": 1.66,
288
+ "step": 429
289
+ },
290
+ {
291
+ "epoch": 3.01,
292
+ "learning_rate": 7.972027972027973e-06,
293
+ "loss": 1.0267,
294
+ "step": 430
295
+ },
296
+ {
297
+ "epoch": 3.08,
298
+ "learning_rate": 7.692307692307694e-06,
299
+ "loss": 1.042,
300
+ "step": 440
301
+ },
302
+ {
303
+ "epoch": 3.15,
304
+ "learning_rate": 7.412587412587413e-06,
305
+ "loss": 0.8939,
306
+ "step": 450
307
+ },
308
+ {
309
+ "epoch": 3.22,
310
+ "learning_rate": 7.132867132867134e-06,
311
+ "loss": 1.0723,
312
+ "step": 460
313
+ },
314
+ {
315
+ "epoch": 3.29,
316
+ "learning_rate": 6.853146853146854e-06,
317
+ "loss": 1.0155,
318
+ "step": 470
319
+ },
320
+ {
321
+ "epoch": 3.36,
322
+ "learning_rate": 6.573426573426574e-06,
323
+ "loss": 1.0215,
324
+ "step": 480
325
+ },
326
+ {
327
+ "epoch": 3.43,
328
+ "learning_rate": 6.2937062937062944e-06,
329
+ "loss": 0.9736,
330
+ "step": 490
331
+ },
332
+ {
333
+ "epoch": 3.5,
334
+ "learning_rate": 6.013986013986014e-06,
335
+ "loss": 0.968,
336
+ "step": 500
337
+ },
338
+ {
339
+ "epoch": 3.57,
340
+ "learning_rate": 5.7342657342657345e-06,
341
+ "loss": 0.9836,
342
+ "step": 510
343
+ },
344
+ {
345
+ "epoch": 3.64,
346
+ "learning_rate": 5.4545454545454545e-06,
347
+ "loss": 0.9847,
348
+ "step": 520
349
+ },
350
+ {
351
+ "epoch": 3.71,
352
+ "learning_rate": 5.174825174825175e-06,
353
+ "loss": 0.9006,
354
+ "step": 530
355
+ },
356
+ {
357
+ "epoch": 3.78,
358
+ "learning_rate": 4.895104895104895e-06,
359
+ "loss": 0.9513,
360
+ "step": 540
361
+ },
362
+ {
363
+ "epoch": 3.85,
364
+ "learning_rate": 4.615384615384616e-06,
365
+ "loss": 0.7749,
366
+ "step": 550
367
+ },
368
+ {
369
+ "epoch": 3.92,
370
+ "learning_rate": 4.335664335664336e-06,
371
+ "loss": 0.8459,
372
+ "step": 560
373
+ },
374
+ {
375
+ "epoch": 3.99,
376
+ "learning_rate": 4.055944055944056e-06,
377
+ "loss": 0.9689,
378
+ "step": 570
379
+ },
380
+ {
381
+ "epoch": 4.0,
382
+ "eval_accuracy": 0.7322834645669292,
383
+ "eval_loss": 0.8723652958869934,
384
+ "eval_runtime": 9.8772,
385
+ "eval_samples_per_second": 12.858,
386
+ "eval_steps_per_second": 1.62,
387
+ "step": 572
388
+ },
389
+ {
390
+ "epoch": 4.06,
391
+ "learning_rate": 3.776223776223776e-06,
392
+ "loss": 0.9308,
393
+ "step": 580
394
+ },
395
+ {
396
+ "epoch": 4.13,
397
+ "learning_rate": 3.4965034965034966e-06,
398
+ "loss": 0.9629,
399
+ "step": 590
400
+ },
401
+ {
402
+ "epoch": 4.2,
403
+ "learning_rate": 3.216783216783217e-06,
404
+ "loss": 0.9307,
405
+ "step": 600
406
+ },
407
+ {
408
+ "epoch": 4.27,
409
+ "learning_rate": 2.937062937062937e-06,
410
+ "loss": 0.8897,
411
+ "step": 610
412
+ },
413
+ {
414
+ "epoch": 4.34,
415
+ "learning_rate": 2.6573426573426574e-06,
416
+ "loss": 0.921,
417
+ "step": 620
418
+ },
419
+ {
420
+ "epoch": 4.41,
421
+ "learning_rate": 2.377622377622378e-06,
422
+ "loss": 0.9482,
423
+ "step": 630
424
+ },
425
+ {
426
+ "epoch": 4.48,
427
+ "learning_rate": 2.0979020979020983e-06,
428
+ "loss": 0.9577,
429
+ "step": 640
430
+ },
431
+ {
432
+ "epoch": 4.55,
433
+ "learning_rate": 1.8181818181818183e-06,
434
+ "loss": 0.8544,
435
+ "step": 650
436
+ },
437
+ {
438
+ "epoch": 4.62,
439
+ "learning_rate": 1.5384615384615387e-06,
440
+ "loss": 0.9408,
441
+ "step": 660
442
+ },
443
+ {
444
+ "epoch": 4.69,
445
+ "learning_rate": 1.258741258741259e-06,
446
+ "loss": 0.8936,
447
+ "step": 670
448
+ },
449
+ {
450
+ "epoch": 4.76,
451
+ "learning_rate": 9.790209790209791e-07,
452
+ "loss": 0.9222,
453
+ "step": 680
454
+ },
455
+ {
456
+ "epoch": 4.83,
457
+ "learning_rate": 6.993006993006994e-07,
458
+ "loss": 0.8527,
459
+ "step": 690
460
+ },
461
+ {
462
+ "epoch": 4.9,
463
+ "learning_rate": 4.195804195804196e-07,
464
+ "loss": 0.8538,
465
+ "step": 700
466
+ },
467
+ {
468
+ "epoch": 4.97,
469
+ "learning_rate": 1.3986013986013987e-07,
470
+ "loss": 0.9175,
471
+ "step": 710
472
+ },
473
+ {
474
+ "epoch": 5.0,
475
+ "eval_accuracy": 0.7322834645669292,
476
+ "eval_loss": 0.8530173897743225,
477
+ "eval_runtime": 12.0509,
478
+ "eval_samples_per_second": 10.539,
479
+ "eval_steps_per_second": 1.328,
480
+ "step": 715
481
+ },
482
+ {
483
+ "epoch": 5.0,
484
+ "step": 715,
485
+ "total_flos": 4.424951239059456e+17,
486
+ "train_loss": 0.18311181501908735,
487
+ "train_runtime": 299.5589,
488
+ "train_samples_per_second": 19.061,
489
+ "train_steps_per_second": 2.387
490
+ }
491
+ ],
492
+ "logging_steps": 10,
493
+ "max_steps": 715,
494
+ "num_input_tokens_seen": 0,
495
+ "num_train_epochs": 5,
496
+ "save_steps": 500,
497
+ "total_flos": 4.424951239059456e+17,
498
+ "train_batch_size": 8,
499
+ "trial_name": null,
500
+ "trial_params": null
501
+ }