RobertoSonic commited on
Commit
7176992
·
verified ·
1 Parent(s): 0616281

End of training

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2223
22
- - Accuracy: 0.9315
23
 
24
  ## Model description
25
 
 
18
 
19
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.2067
22
+ - Accuracy: 0.9589
23
 
24
  ## Model description
25
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 28.585365853658537,
3
+ "eval_accuracy": 0.958904109589041,
4
+ "eval_loss": 0.20672719180583954,
5
+ "eval_runtime": 0.711,
6
+ "eval_samples_per_second": 102.672,
7
+ "eval_steps_per_second": 7.032,
8
+ "total_flos": 6.102198151010058e+17,
9
+ "train_loss": 0.7502146526177724,
10
+ "train_runtime": 483.2384,
11
+ "train_samples_per_second": 40.725,
12
+ "train_steps_per_second": 1.242
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 28.585365853658537,
3
+ "eval_accuracy": 0.958904109589041,
4
+ "eval_loss": 0.20672719180583954,
5
+ "eval_runtime": 0.711,
6
+ "eval_samples_per_second": 102.672,
7
+ "eval_steps_per_second": 7.032
8
+ }
runs/Jan18_08-35-09_850581c2dd71/events.out.tfevents.1737189952.850581c2dd71.12583.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1251f051479cc656c850dd2a01cd47db369a7ab203eebf5de1e4899ffaa00b52
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 28.585365853658537,
3
+ "total_flos": 6.102198151010058e+17,
4
+ "train_loss": 0.7502146526177724,
5
+ "train_runtime": 483.2384,
6
+ "train_samples_per_second": 40.725,
7
+ "train_steps_per_second": 1.242
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,723 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.958904109589041,
3
+ "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV3/checkpoint-525",
4
+ "epoch": 28.585365853658537,
5
+ "eval_steps": 500,
6
+ "global_step": 600,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4878048780487805,
13
+ "grad_norm": 12.429728507995605,
14
+ "learning_rate": 9.999999999999999e-06,
15
+ "loss": 4.7086,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.975609756097561,
20
+ "grad_norm": 15.98269271850586,
21
+ "learning_rate": 1.9999999999999998e-05,
22
+ "loss": 3.9845,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.3424657534246575,
28
+ "eval_loss": 1.6832486391067505,
29
+ "eval_runtime": 0.6873,
30
+ "eval_samples_per_second": 106.209,
31
+ "eval_steps_per_second": 7.275,
32
+ "step": 21
33
+ },
34
+ {
35
+ "epoch": 1.4390243902439024,
36
+ "grad_norm": 19.85782814025879,
37
+ "learning_rate": 3e-05,
38
+ "loss": 2.9173,
39
+ "step": 30
40
+ },
41
+ {
42
+ "epoch": 1.9268292682926829,
43
+ "grad_norm": 23.9186954498291,
44
+ "learning_rate": 2.9473684210526314e-05,
45
+ "loss": 2.4369,
46
+ "step": 40
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_accuracy": 0.4383561643835616,
51
+ "eval_loss": 1.1981052160263062,
52
+ "eval_runtime": 0.7433,
53
+ "eval_samples_per_second": 98.212,
54
+ "eval_steps_per_second": 6.727,
55
+ "step": 42
56
+ },
57
+ {
58
+ "epoch": 2.3902439024390243,
59
+ "grad_norm": 39.31980895996094,
60
+ "learning_rate": 2.8947368421052634e-05,
61
+ "loss": 1.9976,
62
+ "step": 50
63
+ },
64
+ {
65
+ "epoch": 2.8780487804878048,
66
+ "grad_norm": 34.83562469482422,
67
+ "learning_rate": 2.8421052631578946e-05,
68
+ "loss": 1.7752,
69
+ "step": 60
70
+ },
71
+ {
72
+ "epoch": 3.0,
73
+ "eval_accuracy": 0.6301369863013698,
74
+ "eval_loss": 0.8411616086959839,
75
+ "eval_runtime": 0.7016,
76
+ "eval_samples_per_second": 104.055,
77
+ "eval_steps_per_second": 7.127,
78
+ "step": 63
79
+ },
80
+ {
81
+ "epoch": 3.341463414634146,
82
+ "grad_norm": 24.540483474731445,
83
+ "learning_rate": 2.7894736842105263e-05,
84
+ "loss": 1.5326,
85
+ "step": 70
86
+ },
87
+ {
88
+ "epoch": 3.8292682926829267,
89
+ "grad_norm": 33.162715911865234,
90
+ "learning_rate": 2.736842105263158e-05,
91
+ "loss": 1.3772,
92
+ "step": 80
93
+ },
94
+ {
95
+ "epoch": 4.0,
96
+ "eval_accuracy": 0.7123287671232876,
97
+ "eval_loss": 0.7895165681838989,
98
+ "eval_runtime": 0.6266,
99
+ "eval_samples_per_second": 116.511,
100
+ "eval_steps_per_second": 7.98,
101
+ "step": 84
102
+ },
103
+ {
104
+ "epoch": 4.2926829268292686,
105
+ "grad_norm": 21.98711585998535,
106
+ "learning_rate": 2.6842105263157896e-05,
107
+ "loss": 1.414,
108
+ "step": 90
109
+ },
110
+ {
111
+ "epoch": 4.780487804878049,
112
+ "grad_norm": 27.87204360961914,
113
+ "learning_rate": 2.631578947368421e-05,
114
+ "loss": 1.1556,
115
+ "step": 100
116
+ },
117
+ {
118
+ "epoch": 5.0,
119
+ "eval_accuracy": 0.7808219178082192,
120
+ "eval_loss": 0.7384896874427795,
121
+ "eval_runtime": 0.6327,
122
+ "eval_samples_per_second": 115.385,
123
+ "eval_steps_per_second": 7.903,
124
+ "step": 105
125
+ },
126
+ {
127
+ "epoch": 5.2439024390243905,
128
+ "grad_norm": 24.45848274230957,
129
+ "learning_rate": 2.578947368421053e-05,
130
+ "loss": 1.0818,
131
+ "step": 110
132
+ },
133
+ {
134
+ "epoch": 5.7317073170731705,
135
+ "grad_norm": 16.963436126708984,
136
+ "learning_rate": 2.526315789473684e-05,
137
+ "loss": 1.0059,
138
+ "step": 120
139
+ },
140
+ {
141
+ "epoch": 6.0,
142
+ "eval_accuracy": 0.8082191780821918,
143
+ "eval_loss": 0.6626368165016174,
144
+ "eval_runtime": 0.6356,
145
+ "eval_samples_per_second": 114.844,
146
+ "eval_steps_per_second": 7.866,
147
+ "step": 126
148
+ },
149
+ {
150
+ "epoch": 6.195121951219512,
151
+ "grad_norm": 45.89384460449219,
152
+ "learning_rate": 2.4736842105263158e-05,
153
+ "loss": 0.9054,
154
+ "step": 130
155
+ },
156
+ {
157
+ "epoch": 6.682926829268292,
158
+ "grad_norm": 27.633718490600586,
159
+ "learning_rate": 2.4210526315789474e-05,
160
+ "loss": 0.8598,
161
+ "step": 140
162
+ },
163
+ {
164
+ "epoch": 7.0,
165
+ "eval_accuracy": 0.7808219178082192,
166
+ "eval_loss": 0.5402742624282837,
167
+ "eval_runtime": 0.6334,
168
+ "eval_samples_per_second": 115.247,
169
+ "eval_steps_per_second": 7.894,
170
+ "step": 147
171
+ },
172
+ {
173
+ "epoch": 7.146341463414634,
174
+ "grad_norm": 25.943758010864258,
175
+ "learning_rate": 2.368421052631579e-05,
176
+ "loss": 0.8268,
177
+ "step": 150
178
+ },
179
+ {
180
+ "epoch": 7.634146341463414,
181
+ "grad_norm": 37.037078857421875,
182
+ "learning_rate": 2.3157894736842103e-05,
183
+ "loss": 0.8724,
184
+ "step": 160
185
+ },
186
+ {
187
+ "epoch": 8.0,
188
+ "eval_accuracy": 0.821917808219178,
189
+ "eval_loss": 0.5519894361495972,
190
+ "eval_runtime": 0.6379,
191
+ "eval_samples_per_second": 114.432,
192
+ "eval_steps_per_second": 7.838,
193
+ "step": 168
194
+ },
195
+ {
196
+ "epoch": 8.097560975609756,
197
+ "grad_norm": 22.22051429748535,
198
+ "learning_rate": 2.2631578947368423e-05,
199
+ "loss": 0.7427,
200
+ "step": 170
201
+ },
202
+ {
203
+ "epoch": 8.585365853658537,
204
+ "grad_norm": 33.7209587097168,
205
+ "learning_rate": 2.2105263157894736e-05,
206
+ "loss": 0.7096,
207
+ "step": 180
208
+ },
209
+ {
210
+ "epoch": 9.0,
211
+ "eval_accuracy": 0.8356164383561644,
212
+ "eval_loss": 0.5182141661643982,
213
+ "eval_runtime": 0.7436,
214
+ "eval_samples_per_second": 98.169,
215
+ "eval_steps_per_second": 6.724,
216
+ "step": 189
217
+ },
218
+ {
219
+ "epoch": 9.048780487804878,
220
+ "grad_norm": 17.677778244018555,
221
+ "learning_rate": 2.1578947368421053e-05,
222
+ "loss": 0.5608,
223
+ "step": 190
224
+ },
225
+ {
226
+ "epoch": 9.536585365853659,
227
+ "grad_norm": 34.34571075439453,
228
+ "learning_rate": 2.105263157894737e-05,
229
+ "loss": 0.5748,
230
+ "step": 200
231
+ },
232
+ {
233
+ "epoch": 10.0,
234
+ "grad_norm": 9.038191795349121,
235
+ "learning_rate": 2.0526315789473685e-05,
236
+ "loss": 0.5038,
237
+ "step": 210
238
+ },
239
+ {
240
+ "epoch": 10.0,
241
+ "eval_accuracy": 0.8493150684931506,
242
+ "eval_loss": 0.4132954180240631,
243
+ "eval_runtime": 0.6251,
244
+ "eval_samples_per_second": 116.788,
245
+ "eval_steps_per_second": 7.999,
246
+ "step": 210
247
+ },
248
+ {
249
+ "epoch": 10.487804878048781,
250
+ "grad_norm": 31.543596267700195,
251
+ "learning_rate": 1.9999999999999998e-05,
252
+ "loss": 0.5488,
253
+ "step": 220
254
+ },
255
+ {
256
+ "epoch": 10.975609756097562,
257
+ "grad_norm": 23.798019409179688,
258
+ "learning_rate": 1.9473684210526318e-05,
259
+ "loss": 0.4951,
260
+ "step": 230
261
+ },
262
+ {
263
+ "epoch": 11.0,
264
+ "eval_accuracy": 0.8767123287671232,
265
+ "eval_loss": 0.3548040986061096,
266
+ "eval_runtime": 0.6389,
267
+ "eval_samples_per_second": 114.257,
268
+ "eval_steps_per_second": 7.826,
269
+ "step": 231
270
+ },
271
+ {
272
+ "epoch": 11.439024390243903,
273
+ "grad_norm": 16.509632110595703,
274
+ "learning_rate": 1.894736842105263e-05,
275
+ "loss": 0.5018,
276
+ "step": 240
277
+ },
278
+ {
279
+ "epoch": 11.926829268292684,
280
+ "grad_norm": 17.9029598236084,
281
+ "learning_rate": 1.8421052631578947e-05,
282
+ "loss": 0.4692,
283
+ "step": 250
284
+ },
285
+ {
286
+ "epoch": 12.0,
287
+ "eval_accuracy": 0.8493150684931506,
288
+ "eval_loss": 0.38450247049331665,
289
+ "eval_runtime": 0.6355,
290
+ "eval_samples_per_second": 114.878,
291
+ "eval_steps_per_second": 7.868,
292
+ "step": 252
293
+ },
294
+ {
295
+ "epoch": 12.390243902439025,
296
+ "grad_norm": 29.795612335205078,
297
+ "learning_rate": 1.7894736842105264e-05,
298
+ "loss": 0.5316,
299
+ "step": 260
300
+ },
301
+ {
302
+ "epoch": 12.878048780487806,
303
+ "grad_norm": 14.258842468261719,
304
+ "learning_rate": 1.736842105263158e-05,
305
+ "loss": 0.5339,
306
+ "step": 270
307
+ },
308
+ {
309
+ "epoch": 13.0,
310
+ "eval_accuracy": 0.8904109589041096,
311
+ "eval_loss": 0.3178386390209198,
312
+ "eval_runtime": 0.6456,
313
+ "eval_samples_per_second": 113.071,
314
+ "eval_steps_per_second": 7.745,
315
+ "step": 273
316
+ },
317
+ {
318
+ "epoch": 13.341463414634147,
319
+ "grad_norm": 28.944801330566406,
320
+ "learning_rate": 1.6842105263157893e-05,
321
+ "loss": 0.5021,
322
+ "step": 280
323
+ },
324
+ {
325
+ "epoch": 13.829268292682928,
326
+ "grad_norm": 19.169776916503906,
327
+ "learning_rate": 1.6315789473684213e-05,
328
+ "loss": 0.4536,
329
+ "step": 290
330
+ },
331
+ {
332
+ "epoch": 14.0,
333
+ "eval_accuracy": 0.8904109589041096,
334
+ "eval_loss": 0.3252336084842682,
335
+ "eval_runtime": 0.6543,
336
+ "eval_samples_per_second": 111.574,
337
+ "eval_steps_per_second": 7.642,
338
+ "step": 294
339
+ },
340
+ {
341
+ "epoch": 14.292682926829269,
342
+ "grad_norm": 19.354726791381836,
343
+ "learning_rate": 1.5789473684210526e-05,
344
+ "loss": 0.3055,
345
+ "step": 300
346
+ },
347
+ {
348
+ "epoch": 14.78048780487805,
349
+ "grad_norm": 18.675071716308594,
350
+ "learning_rate": 1.5263157894736842e-05,
351
+ "loss": 0.4369,
352
+ "step": 310
353
+ },
354
+ {
355
+ "epoch": 15.0,
356
+ "eval_accuracy": 0.8904109589041096,
357
+ "eval_loss": 0.27849265933036804,
358
+ "eval_runtime": 0.6484,
359
+ "eval_samples_per_second": 112.585,
360
+ "eval_steps_per_second": 7.711,
361
+ "step": 315
362
+ },
363
+ {
364
+ "epoch": 15.24390243902439,
365
+ "grad_norm": 29.19399642944336,
366
+ "learning_rate": 1.4736842105263157e-05,
367
+ "loss": 0.447,
368
+ "step": 320
369
+ },
370
+ {
371
+ "epoch": 15.731707317073171,
372
+ "grad_norm": 5.669158935546875,
373
+ "learning_rate": 1.4210526315789473e-05,
374
+ "loss": 0.3941,
375
+ "step": 330
376
+ },
377
+ {
378
+ "epoch": 16.0,
379
+ "eval_accuracy": 0.9041095890410958,
380
+ "eval_loss": 0.28995245695114136,
381
+ "eval_runtime": 0.6348,
382
+ "eval_samples_per_second": 114.996,
383
+ "eval_steps_per_second": 7.876,
384
+ "step": 336
385
+ },
386
+ {
387
+ "epoch": 16.195121951219512,
388
+ "grad_norm": 21.81118392944336,
389
+ "learning_rate": 1.368421052631579e-05,
390
+ "loss": 0.3228,
391
+ "step": 340
392
+ },
393
+ {
394
+ "epoch": 16.682926829268293,
395
+ "grad_norm": 34.80079650878906,
396
+ "learning_rate": 1.3157894736842104e-05,
397
+ "loss": 0.4363,
398
+ "step": 350
399
+ },
400
+ {
401
+ "epoch": 17.0,
402
+ "eval_accuracy": 0.863013698630137,
403
+ "eval_loss": 0.3426441252231598,
404
+ "eval_runtime": 0.6573,
405
+ "eval_samples_per_second": 111.053,
406
+ "eval_steps_per_second": 7.606,
407
+ "step": 357
408
+ },
409
+ {
410
+ "epoch": 17.146341463414632,
411
+ "grad_norm": 25.49156379699707,
412
+ "learning_rate": 1.263157894736842e-05,
413
+ "loss": 0.3729,
414
+ "step": 360
415
+ },
416
+ {
417
+ "epoch": 17.634146341463413,
418
+ "grad_norm": 25.156068801879883,
419
+ "learning_rate": 1.2105263157894737e-05,
420
+ "loss": 0.2819,
421
+ "step": 370
422
+ },
423
+ {
424
+ "epoch": 18.0,
425
+ "eval_accuracy": 0.9041095890410958,
426
+ "eval_loss": 0.283920019865036,
427
+ "eval_runtime": 0.7572,
428
+ "eval_samples_per_second": 96.41,
429
+ "eval_steps_per_second": 6.603,
430
+ "step": 378
431
+ },
432
+ {
433
+ "epoch": 18.097560975609756,
434
+ "grad_norm": 36.39301300048828,
435
+ "learning_rate": 1.1578947368421052e-05,
436
+ "loss": 0.3253,
437
+ "step": 380
438
+ },
439
+ {
440
+ "epoch": 18.585365853658537,
441
+ "grad_norm": 24.839868545532227,
442
+ "learning_rate": 1.1052631578947368e-05,
443
+ "loss": 0.361,
444
+ "step": 390
445
+ },
446
+ {
447
+ "epoch": 19.0,
448
+ "eval_accuracy": 0.9041095890410958,
449
+ "eval_loss": 0.22234712541103363,
450
+ "eval_runtime": 0.7512,
451
+ "eval_samples_per_second": 97.175,
452
+ "eval_steps_per_second": 6.656,
453
+ "step": 399
454
+ },
455
+ {
456
+ "epoch": 19.048780487804876,
457
+ "grad_norm": 22.352935791015625,
458
+ "learning_rate": 1.0526315789473684e-05,
459
+ "loss": 0.3204,
460
+ "step": 400
461
+ },
462
+ {
463
+ "epoch": 19.536585365853657,
464
+ "grad_norm": 12.527485847473145,
465
+ "learning_rate": 9.999999999999999e-06,
466
+ "loss": 0.2965,
467
+ "step": 410
468
+ },
469
+ {
470
+ "epoch": 20.0,
471
+ "grad_norm": 16.140165328979492,
472
+ "learning_rate": 9.473684210526315e-06,
473
+ "loss": 0.1857,
474
+ "step": 420
475
+ },
476
+ {
477
+ "epoch": 20.0,
478
+ "eval_accuracy": 0.9178082191780822,
479
+ "eval_loss": 0.25217577815055847,
480
+ "eval_runtime": 0.6292,
481
+ "eval_samples_per_second": 116.026,
482
+ "eval_steps_per_second": 7.947,
483
+ "step": 420
484
+ },
485
+ {
486
+ "epoch": 20.48780487804878,
487
+ "grad_norm": 12.794975280761719,
488
+ "learning_rate": 8.947368421052632e-06,
489
+ "loss": 0.1904,
490
+ "step": 430
491
+ },
492
+ {
493
+ "epoch": 20.975609756097562,
494
+ "grad_norm": 32.452125549316406,
495
+ "learning_rate": 8.421052631578947e-06,
496
+ "loss": 0.3161,
497
+ "step": 440
498
+ },
499
+ {
500
+ "epoch": 21.0,
501
+ "eval_accuracy": 0.9178082191780822,
502
+ "eval_loss": 0.21637919545173645,
503
+ "eval_runtime": 0.6487,
504
+ "eval_samples_per_second": 112.528,
505
+ "eval_steps_per_second": 7.707,
506
+ "step": 441
507
+ },
508
+ {
509
+ "epoch": 21.4390243902439,
510
+ "grad_norm": 17.289514541625977,
511
+ "learning_rate": 7.894736842105263e-06,
512
+ "loss": 0.2523,
513
+ "step": 450
514
+ },
515
+ {
516
+ "epoch": 21.926829268292682,
517
+ "grad_norm": 42.501861572265625,
518
+ "learning_rate": 7.3684210526315784e-06,
519
+ "loss": 0.3273,
520
+ "step": 460
521
+ },
522
+ {
523
+ "epoch": 22.0,
524
+ "eval_accuracy": 0.9315068493150684,
525
+ "eval_loss": 0.22238127887248993,
526
+ "eval_runtime": 0.6384,
527
+ "eval_samples_per_second": 114.34,
528
+ "eval_steps_per_second": 7.831,
529
+ "step": 462
530
+ },
531
+ {
532
+ "epoch": 22.390243902439025,
533
+ "grad_norm": 32.69398498535156,
534
+ "learning_rate": 6.842105263157895e-06,
535
+ "loss": 0.1996,
536
+ "step": 470
537
+ },
538
+ {
539
+ "epoch": 22.878048780487806,
540
+ "grad_norm": 21.498504638671875,
541
+ "learning_rate": 6.31578947368421e-06,
542
+ "loss": 0.3458,
543
+ "step": 480
544
+ },
545
+ {
546
+ "epoch": 23.0,
547
+ "eval_accuracy": 0.9452054794520548,
548
+ "eval_loss": 0.21988777816295624,
549
+ "eval_runtime": 0.7497,
550
+ "eval_samples_per_second": 97.368,
551
+ "eval_steps_per_second": 6.669,
552
+ "step": 483
553
+ },
554
+ {
555
+ "epoch": 23.341463414634145,
556
+ "grad_norm": 8.661595344543457,
557
+ "learning_rate": 5.789473684210526e-06,
558
+ "loss": 0.2248,
559
+ "step": 490
560
+ },
561
+ {
562
+ "epoch": 23.829268292682926,
563
+ "grad_norm": 23.505203247070312,
564
+ "learning_rate": 5.263157894736842e-06,
565
+ "loss": 0.337,
566
+ "step": 500
567
+ },
568
+ {
569
+ "epoch": 24.0,
570
+ "eval_accuracy": 0.9315068493150684,
571
+ "eval_loss": 0.23766528069972992,
572
+ "eval_runtime": 0.768,
573
+ "eval_samples_per_second": 95.058,
574
+ "eval_steps_per_second": 6.511,
575
+ "step": 504
576
+ },
577
+ {
578
+ "epoch": 24.29268292682927,
579
+ "grad_norm": 13.873770713806152,
580
+ "learning_rate": 4.736842105263158e-06,
581
+ "loss": 0.201,
582
+ "step": 510
583
+ },
584
+ {
585
+ "epoch": 24.78048780487805,
586
+ "grad_norm": 16.176956176757812,
587
+ "learning_rate": 4.210526315789473e-06,
588
+ "loss": 0.1801,
589
+ "step": 520
590
+ },
591
+ {
592
+ "epoch": 25.0,
593
+ "eval_accuracy": 0.958904109589041,
594
+ "eval_loss": 0.20672719180583954,
595
+ "eval_runtime": 0.6363,
596
+ "eval_samples_per_second": 114.732,
597
+ "eval_steps_per_second": 7.858,
598
+ "step": 525
599
+ },
600
+ {
601
+ "epoch": 25.24390243902439,
602
+ "grad_norm": 39.25809097290039,
603
+ "learning_rate": 3.6842105263157892e-06,
604
+ "loss": 0.2077,
605
+ "step": 530
606
+ },
607
+ {
608
+ "epoch": 25.73170731707317,
609
+ "grad_norm": 46.6181755065918,
610
+ "learning_rate": 3.157894736842105e-06,
611
+ "loss": 0.3283,
612
+ "step": 540
613
+ },
614
+ {
615
+ "epoch": 26.0,
616
+ "eval_accuracy": 0.9315068493150684,
617
+ "eval_loss": 0.24006159603595734,
618
+ "eval_runtime": 0.6339,
619
+ "eval_samples_per_second": 115.158,
620
+ "eval_steps_per_second": 7.888,
621
+ "step": 546
622
+ },
623
+ {
624
+ "epoch": 26.195121951219512,
625
+ "grad_norm": 10.685463905334473,
626
+ "learning_rate": 2.631578947368421e-06,
627
+ "loss": 0.3054,
628
+ "step": 550
629
+ },
630
+ {
631
+ "epoch": 26.682926829268293,
632
+ "grad_norm": 28.955364227294922,
633
+ "learning_rate": 2.1052631578947366e-06,
634
+ "loss": 0.2211,
635
+ "step": 560
636
+ },
637
+ {
638
+ "epoch": 27.0,
639
+ "eval_accuracy": 0.9315068493150684,
640
+ "eval_loss": 0.21667610108852386,
641
+ "eval_runtime": 0.6327,
642
+ "eval_samples_per_second": 115.37,
643
+ "eval_steps_per_second": 7.902,
644
+ "step": 567
645
+ },
646
+ {
647
+ "epoch": 27.146341463414632,
648
+ "grad_norm": 20.726945877075195,
649
+ "learning_rate": 1.5789473684210526e-06,
650
+ "loss": 0.2556,
651
+ "step": 570
652
+ },
653
+ {
654
+ "epoch": 27.634146341463413,
655
+ "grad_norm": 6.582526206970215,
656
+ "learning_rate": 1.0526315789473683e-06,
657
+ "loss": 0.1783,
658
+ "step": 580
659
+ },
660
+ {
661
+ "epoch": 28.0,
662
+ "eval_accuracy": 0.9315068493150684,
663
+ "eval_loss": 0.21801576018333435,
664
+ "eval_runtime": 0.6499,
665
+ "eval_samples_per_second": 112.334,
666
+ "eval_steps_per_second": 7.694,
667
+ "step": 588
668
+ },
669
+ {
670
+ "epoch": 28.097560975609756,
671
+ "grad_norm": 16.22028923034668,
672
+ "learning_rate": 5.263157894736842e-07,
673
+ "loss": 0.1982,
674
+ "step": 590
675
+ },
676
+ {
677
+ "epoch": 28.585365853658537,
678
+ "grad_norm": 15.770591735839844,
679
+ "learning_rate": 0.0,
680
+ "loss": 0.2783,
681
+ "step": 600
682
+ },
683
+ {
684
+ "epoch": 28.585365853658537,
685
+ "eval_accuracy": 0.9315068493150684,
686
+ "eval_loss": 0.22231744229793549,
687
+ "eval_runtime": 0.8149,
688
+ "eval_samples_per_second": 89.581,
689
+ "eval_steps_per_second": 6.136,
690
+ "step": 600
691
+ },
692
+ {
693
+ "epoch": 28.585365853658537,
694
+ "step": 600,
695
+ "total_flos": 6.102198151010058e+17,
696
+ "train_loss": 0.7502146526177724,
697
+ "train_runtime": 483.2384,
698
+ "train_samples_per_second": 40.725,
699
+ "train_steps_per_second": 1.242
700
+ }
701
+ ],
702
+ "logging_steps": 10,
703
+ "max_steps": 600,
704
+ "num_input_tokens_seen": 0,
705
+ "num_train_epochs": 30,
706
+ "save_steps": 500,
707
+ "stateful_callbacks": {
708
+ "TrainerControl": {
709
+ "args": {
710
+ "should_epoch_stop": false,
711
+ "should_evaluate": false,
712
+ "should_log": false,
713
+ "should_save": true,
714
+ "should_training_stop": true
715
+ },
716
+ "attributes": {}
717
+ }
718
+ },
719
+ "total_flos": 6.102198151010058e+17,
720
+ "train_batch_size": 16,
721
+ "trial_name": null,
722
+ "trial_params": null
723
+ }