bombshelll commited on
Commit
1ff51cd
·
verified ·
1 Parent(s): b4d99e1

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. all_results.json +11 -11
  3. eval_results.json +6 -6
  4. train_results.json +6 -6
  5. trainer_state.json +309 -71
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.1550
22
- - Accuracy: 0.9110
23
 
24
  ## Model description
25
 
 
18
 
19
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.1451
22
+ - Accuracy: 0.9247
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 6.0,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.03769616782665253,
5
- "eval_runtime": 0.946,
6
- "eval_samples_per_second": 112.046,
7
- "eval_steps_per_second": 4.228,
8
- "total_flos": 1.4153791066361856e+17,
9
- "train_loss": 0.4622622158792284,
10
- "train_runtime": 90.8298,
11
- "train_samples_per_second": 208.962,
12
- "train_steps_per_second": 1.541
13
  }
 
1
  {
2
+ "epoch": 19.51219512195122,
3
+ "eval_accuracy": 0.9246575342465754,
4
+ "eval_loss": 0.14509864151477814,
5
+ "eval_runtime": 1.219,
6
+ "eval_samples_per_second": 119.771,
7
+ "eval_steps_per_second": 4.102,
8
+ "total_flos": 6.330428470091981e+17,
9
+ "train_loss": 0.25806016743183136,
10
+ "train_runtime": 356.8843,
11
+ "train_samples_per_second": 73.133,
12
+ "train_steps_per_second": 0.56
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.0,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.03769616782665253,
5
- "eval_runtime": 0.946,
6
- "eval_samples_per_second": 112.046,
7
- "eval_steps_per_second": 4.228
8
  }
 
1
  {
2
+ "epoch": 19.51219512195122,
3
+ "eval_accuracy": 0.9246575342465754,
4
+ "eval_loss": 0.14509864151477814,
5
+ "eval_runtime": 1.219,
6
+ "eval_samples_per_second": 119.771,
7
+ "eval_steps_per_second": 4.102
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.0,
3
- "total_flos": 1.4153791066361856e+17,
4
- "train_loss": 0.4622622158792284,
5
- "train_runtime": 90.8298,
6
- "train_samples_per_second": 208.962,
7
- "train_steps_per_second": 1.541
8
  }
 
1
  {
2
+ "epoch": 19.51219512195122,
3
+ "total_flos": 6.330428470091981e+17,
4
+ "train_loss": 0.25806016743183136,
5
+ "train_runtime": 356.8843,
6
+ "train_samples_per_second": 73.133,
7
+ "train_steps_per_second": 0.56
8
  }
trainer_state.json CHANGED
@@ -1,107 +1,345 @@
1
  {
2
- "best_metric": 1.0,
3
- "best_model_checkpoint": "/kaggle/working/swin-brain-modality-classification/checkpoint-45",
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.9333333333333333,
13
- "eval_accuracy": 0.6415094339622641,
14
- "eval_loss": 1.0901466608047485,
15
- "eval_runtime": 1.0666,
16
- "eval_samples_per_second": 99.379,
17
- "eval_steps_per_second": 3.75,
18
- "step": 7
19
  },
20
  {
21
- "epoch": 1.3333333333333333,
22
- "grad_norm": 5.982262134552002,
23
- "learning_rate": 3.571428571428572e-05,
24
- "loss": 1.2435,
 
 
25
  "step": 10
26
  },
27
  {
28
- "epoch": 2.0,
29
- "eval_accuracy": 0.8584905660377359,
30
- "eval_loss": 0.42658886313438416,
31
- "eval_runtime": 0.9395,
32
- "eval_samples_per_second": 112.826,
33
- "eval_steps_per_second": 4.258,
34
- "step": 15
35
  },
36
  {
37
- "epoch": 2.6666666666666665,
38
- "grad_norm": 8.782153129577637,
39
- "learning_rate": 4.761904761904762e-05,
40
- "loss": 0.5378,
 
 
41
  "step": 20
42
  },
43
  {
44
- "epoch": 2.9333333333333336,
45
- "eval_accuracy": 0.9528301886792453,
46
- "eval_loss": 0.15236791968345642,
47
- "eval_runtime": 0.9334,
48
- "eval_samples_per_second": 113.563,
49
- "eval_steps_per_second": 4.285,
50
- "step": 22
51
  },
52
  {
53
- "epoch": 4.0,
54
- "grad_norm": 5.220347881317139,
55
- "learning_rate": 4.3650793650793655e-05,
56
- "loss": 0.1747,
 
 
57
  "step": 30
58
  },
 
 
 
 
 
 
 
59
  {
60
  "epoch": 4.0,
61
- "eval_accuracy": 0.9716981132075472,
62
- "eval_loss": 0.07465644180774689,
63
- "eval_runtime": 0.9385,
64
- "eval_samples_per_second": 112.947,
65
- "eval_steps_per_second": 4.262,
66
- "step": 30
67
  },
68
  {
69
- "epoch": 4.933333333333334,
70
- "eval_accuracy": 0.9622641509433962,
71
- "eval_loss": 0.08645126223564148,
72
- "eval_runtime": 0.9952,
73
- "eval_samples_per_second": 106.514,
74
- "eval_steps_per_second": 4.019,
75
- "step": 37
76
  },
77
  {
78
- "epoch": 5.333333333333333,
79
- "grad_norm": 5.531038761138916,
80
- "learning_rate": 3.968253968253968e-05,
81
- "loss": 0.0905,
82
- "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  },
84
  {
85
- "epoch": 6.0,
86
- "eval_accuracy": 1.0,
87
- "eval_loss": 0.03769616782665253,
88
- "eval_runtime": 1.0531,
89
- "eval_samples_per_second": 100.657,
90
- "eval_steps_per_second": 3.798,
91
- "step": 45
92
  },
93
  {
94
- "epoch": 6.0,
95
- "step": 45,
96
- "total_flos": 1.4153791066361856e+17,
97
- "train_loss": 0.4622622158792284,
98
- "train_runtime": 90.8298,
99
- "train_samples_per_second": 208.962,
100
- "train_steps_per_second": 1.541
101
  }
102
  ],
103
  "logging_steps": 10,
104
- "max_steps": 140,
105
  "num_input_tokens_seen": 0,
106
  "num_train_epochs": 20,
107
  "save_steps": 500,
@@ -126,7 +364,7 @@
126
  "attributes": {}
127
  }
128
  },
129
- "total_flos": 1.4153791066361856e+17,
130
  "train_batch_size": 32,
131
  "trial_name": null,
132
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9246575342465754,
3
+ "best_model_checkpoint": "/kaggle/working/swin-brain-modality-classification/checkpoint-82",
4
+ "epoch": 19.51219512195122,
5
  "eval_steps": 500,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.975609756097561,
13
+ "grad_norm": 8.406940460205078,
14
+ "learning_rate": 2.5e-05,
15
+ "loss": 1.2625,
16
+ "step": 10
 
 
17
  },
18
  {
19
+ "epoch": 0.975609756097561,
20
+ "eval_accuracy": 0.589041095890411,
21
+ "eval_loss": 1.0232338905334473,
22
+ "eval_runtime": 1.1501,
23
+ "eval_samples_per_second": 126.951,
24
+ "eval_steps_per_second": 4.348,
25
  "step": 10
26
  },
27
  {
28
+ "epoch": 1.951219512195122,
29
+ "grad_norm": 4.960095405578613,
30
+ "learning_rate": 5e-05,
31
+ "loss": 0.8162,
32
+ "step": 20
 
 
33
  },
34
  {
35
+ "epoch": 1.951219512195122,
36
+ "eval_accuracy": 0.8561643835616438,
37
+ "eval_loss": 0.5372681021690369,
38
+ "eval_runtime": 1.124,
39
+ "eval_samples_per_second": 129.89,
40
+ "eval_steps_per_second": 4.448,
41
  "step": 20
42
  },
43
  {
44
+ "epoch": 2.926829268292683,
45
+ "grad_norm": 5.591886043548584,
46
+ "learning_rate": 4.722222222222222e-05,
47
+ "loss": 0.3924,
48
+ "step": 30
 
 
49
  },
50
  {
51
+ "epoch": 2.926829268292683,
52
+ "eval_accuracy": 0.8904109589041096,
53
+ "eval_loss": 0.24904736876487732,
54
+ "eval_runtime": 1.1996,
55
+ "eval_samples_per_second": 121.707,
56
+ "eval_steps_per_second": 4.168,
57
  "step": 30
58
  },
59
+ {
60
+ "epoch": 3.902439024390244,
61
+ "grad_norm": 8.885392189025879,
62
+ "learning_rate": 4.4444444444444447e-05,
63
+ "loss": 0.2528,
64
+ "step": 40
65
+ },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.8767123287671232,
69
+ "eval_loss": 0.27159520983695984,
70
+ "eval_runtime": 1.1285,
71
+ "eval_samples_per_second": 129.37,
72
+ "eval_steps_per_second": 4.43,
73
+ "step": 41
74
  },
75
  {
76
+ "epoch": 4.878048780487805,
77
+ "grad_norm": 5.296258449554443,
78
+ "learning_rate": 4.166666666666667e-05,
79
+ "loss": 0.2311,
80
+ "step": 50
 
 
81
  },
82
  {
83
+ "epoch": 4.975609756097561,
84
+ "eval_accuracy": 0.8904109589041096,
85
+ "eval_loss": 0.2249891459941864,
86
+ "eval_runtime": 1.1296,
87
+ "eval_samples_per_second": 129.247,
88
+ "eval_steps_per_second": 4.426,
89
+ "step": 51
90
+ },
91
+ {
92
+ "epoch": 5.853658536585366,
93
+ "grad_norm": 4.622660160064697,
94
+ "learning_rate": 3.888888888888889e-05,
95
+ "loss": 0.2013,
96
+ "step": 60
97
+ },
98
+ {
99
+ "epoch": 5.951219512195122,
100
+ "eval_accuracy": 0.910958904109589,
101
+ "eval_loss": 0.21542568504810333,
102
+ "eval_runtime": 1.1318,
103
+ "eval_samples_per_second": 128.996,
104
+ "eval_steps_per_second": 4.418,
105
+ "step": 61
106
+ },
107
+ {
108
+ "epoch": 6.829268292682927,
109
+ "grad_norm": 1.8751587867736816,
110
+ "learning_rate": 3.611111111111111e-05,
111
+ "loss": 0.1955,
112
+ "step": 70
113
+ },
114
+ {
115
+ "epoch": 6.926829268292683,
116
+ "eval_accuracy": 0.9178082191780822,
117
+ "eval_loss": 0.17689338326454163,
118
+ "eval_runtime": 1.1592,
119
+ "eval_samples_per_second": 125.952,
120
+ "eval_steps_per_second": 4.313,
121
+ "step": 71
122
+ },
123
+ {
124
+ "epoch": 7.804878048780488,
125
+ "grad_norm": 2.4484364986419678,
126
+ "learning_rate": 3.3333333333333335e-05,
127
+ "loss": 0.1743,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "eval_accuracy": 0.9246575342465754,
133
+ "eval_loss": 0.14509864151477814,
134
+ "eval_runtime": 1.129,
135
+ "eval_samples_per_second": 129.323,
136
+ "eval_steps_per_second": 4.429,
137
+ "step": 82
138
+ },
139
+ {
140
+ "epoch": 8.78048780487805,
141
+ "grad_norm": 4.154516220092773,
142
+ "learning_rate": 3.055555555555556e-05,
143
+ "loss": 0.1664,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 8.975609756097562,
148
+ "eval_accuracy": 0.9178082191780822,
149
+ "eval_loss": 0.1488887518644333,
150
+ "eval_runtime": 1.1207,
151
+ "eval_samples_per_second": 130.276,
152
+ "eval_steps_per_second": 4.462,
153
+ "step": 92
154
+ },
155
+ {
156
+ "epoch": 9.75609756097561,
157
+ "grad_norm": 2.7535204887390137,
158
+ "learning_rate": 2.777777777777778e-05,
159
+ "loss": 0.1503,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 9.951219512195122,
164
+ "eval_accuracy": 0.8972602739726028,
165
+ "eval_loss": 0.1964411437511444,
166
+ "eval_runtime": 1.1259,
167
+ "eval_samples_per_second": 129.67,
168
+ "eval_steps_per_second": 4.441,
169
+ "step": 102
170
+ },
171
+ {
172
+ "epoch": 10.731707317073171,
173
+ "grad_norm": 3.03983473777771,
174
+ "learning_rate": 2.5e-05,
175
+ "loss": 0.1518,
176
+ "step": 110
177
+ },
178
+ {
179
+ "epoch": 10.926829268292684,
180
+ "eval_accuracy": 0.910958904109589,
181
+ "eval_loss": 0.17079411447048187,
182
+ "eval_runtime": 1.1948,
183
+ "eval_samples_per_second": 122.195,
184
+ "eval_steps_per_second": 4.185,
185
+ "step": 112
186
+ },
187
+ {
188
+ "epoch": 11.707317073170731,
189
+ "grad_norm": 2.2469286918640137,
190
+ "learning_rate": 2.2222222222222223e-05,
191
+ "loss": 0.1456,
192
+ "step": 120
193
+ },
194
+ {
195
+ "epoch": 12.0,
196
+ "eval_accuracy": 0.9041095890410958,
197
+ "eval_loss": 0.1498766541481018,
198
+ "eval_runtime": 1.1695,
199
+ "eval_samples_per_second": 124.845,
200
+ "eval_steps_per_second": 4.275,
201
+ "step": 123
202
+ },
203
+ {
204
+ "epoch": 12.682926829268293,
205
+ "grad_norm": 7.290715217590332,
206
+ "learning_rate": 1.9444444444444445e-05,
207
+ "loss": 0.1307,
208
+ "step": 130
209
+ },
210
+ {
211
+ "epoch": 12.975609756097562,
212
+ "eval_accuracy": 0.9041095890410958,
213
+ "eval_loss": 0.15838505327701569,
214
+ "eval_runtime": 1.122,
215
+ "eval_samples_per_second": 130.13,
216
+ "eval_steps_per_second": 4.457,
217
+ "step": 133
218
+ },
219
+ {
220
+ "epoch": 13.658536585365853,
221
+ "grad_norm": 4.910462379455566,
222
+ "learning_rate": 1.6666666666666667e-05,
223
+ "loss": 0.1546,
224
+ "step": 140
225
+ },
226
+ {
227
+ "epoch": 13.951219512195122,
228
+ "eval_accuracy": 0.910958904109589,
229
+ "eval_loss": 0.16728605329990387,
230
+ "eval_runtime": 1.1455,
231
+ "eval_samples_per_second": 127.459,
232
+ "eval_steps_per_second": 4.365,
233
+ "step": 143
234
+ },
235
+ {
236
+ "epoch": 14.634146341463415,
237
+ "grad_norm": 3.007720947265625,
238
+ "learning_rate": 1.388888888888889e-05,
239
+ "loss": 0.1282,
240
+ "step": 150
241
+ },
242
+ {
243
+ "epoch": 14.926829268292684,
244
+ "eval_accuracy": 0.9041095890410958,
245
+ "eval_loss": 0.15455295145511627,
246
+ "eval_runtime": 1.1933,
247
+ "eval_samples_per_second": 122.354,
248
+ "eval_steps_per_second": 4.19,
249
+ "step": 153
250
+ },
251
+ {
252
+ "epoch": 15.609756097560975,
253
+ "grad_norm": 3.2155206203460693,
254
+ "learning_rate": 1.1111111111111112e-05,
255
+ "loss": 0.121,
256
+ "step": 160
257
+ },
258
+ {
259
+ "epoch": 16.0,
260
+ "eval_accuracy": 0.9041095890410958,
261
+ "eval_loss": 0.163113534450531,
262
+ "eval_runtime": 1.1449,
263
+ "eval_samples_per_second": 127.52,
264
+ "eval_steps_per_second": 4.367,
265
+ "step": 164
266
+ },
267
+ {
268
+ "epoch": 16.585365853658537,
269
+ "grad_norm": 2.8936450481414795,
270
+ "learning_rate": 8.333333333333334e-06,
271
+ "loss": 0.1311,
272
+ "step": 170
273
+ },
274
+ {
275
+ "epoch": 16.975609756097562,
276
+ "eval_accuracy": 0.9041095890410958,
277
+ "eval_loss": 0.148574560880661,
278
+ "eval_runtime": 1.1408,
279
+ "eval_samples_per_second": 127.98,
280
+ "eval_steps_per_second": 4.383,
281
+ "step": 174
282
+ },
283
+ {
284
+ "epoch": 17.5609756097561,
285
+ "grad_norm": 3.644867420196533,
286
+ "learning_rate": 5.555555555555556e-06,
287
+ "loss": 0.1259,
288
+ "step": 180
289
+ },
290
+ {
291
+ "epoch": 17.951219512195124,
292
+ "eval_accuracy": 0.9178082191780822,
293
+ "eval_loss": 0.1642705798149109,
294
+ "eval_runtime": 1.1481,
295
+ "eval_samples_per_second": 127.169,
296
+ "eval_steps_per_second": 4.355,
297
+ "step": 184
298
+ },
299
+ {
300
+ "epoch": 18.536585365853657,
301
+ "grad_norm": 3.4987175464630127,
302
+ "learning_rate": 2.777777777777778e-06,
303
+ "loss": 0.1254,
304
+ "step": 190
305
+ },
306
+ {
307
+ "epoch": 18.926829268292682,
308
+ "eval_accuracy": 0.910958904109589,
309
+ "eval_loss": 0.15694867074489594,
310
+ "eval_runtime": 1.1825,
311
+ "eval_samples_per_second": 123.462,
312
+ "eval_steps_per_second": 4.228,
313
+ "step": 194
314
+ },
315
+ {
316
+ "epoch": 19.51219512195122,
317
+ "grad_norm": 3.0996928215026855,
318
+ "learning_rate": 0.0,
319
+ "loss": 0.1039,
320
+ "step": 200
321
  },
322
  {
323
+ "epoch": 19.51219512195122,
324
+ "eval_accuracy": 0.910958904109589,
325
+ "eval_loss": 0.1550000011920929,
326
+ "eval_runtime": 1.2474,
327
+ "eval_samples_per_second": 117.047,
328
+ "eval_steps_per_second": 4.008,
329
+ "step": 200
330
  },
331
  {
332
+ "epoch": 19.51219512195122,
333
+ "step": 200,
334
+ "total_flos": 6.330428470091981e+17,
335
+ "train_loss": 0.25806016743183136,
336
+ "train_runtime": 356.8843,
337
+ "train_samples_per_second": 73.133,
338
+ "train_steps_per_second": 0.56
339
  }
340
  ],
341
  "logging_steps": 10,
342
+ "max_steps": 200,
343
  "num_input_tokens_seen": 0,
344
  "num_train_epochs": 20,
345
  "save_steps": 500,
 
364
  "attributes": {}
365
  }
366
  },
367
+ "total_flos": 6.330428470091981e+17,
368
  "train_batch_size": 32,
369
  "trial_name": null,
370
  "trial_params": null