RobertoSonic commited on
Commit
f5286e8
·
verified ·
1 Parent(s): 0df222c

End of training

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.5819
22
- - Accuracy: 0.6346
23
 
24
  ## Model description
25
 
 
18
 
19
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.9577
22
+ - Accuracy: 0.7308
23
 
24
  ## Model description
25
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 41.93023255813954,
3
+ "eval_accuracy": 0.7307692307692307,
4
+ "eval_loss": 0.9577280879020691,
5
+ "eval_runtime": 1.0772,
6
+ "eval_samples_per_second": 48.273,
7
+ "eval_steps_per_second": 1.857,
8
+ "total_flos": 1.8345690247389512e+18,
9
+ "train_loss": 1.2270459805216107,
10
+ "train_runtime": 1356.3174,
11
+ "train_samples_per_second": 42.176,
12
+ "train_steps_per_second": 0.31
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 41.93023255813954,
3
+ "eval_accuracy": 0.7307692307692307,
4
+ "eval_loss": 0.9577280879020691,
5
+ "eval_runtime": 1.0772,
6
+ "eval_samples_per_second": 48.273,
7
+ "eval_steps_per_second": 1.857
8
+ }
runs/Jan21_15-51-11_aabb36fdf211/events.out.tfevents.1737476616.aabb36fdf211.212.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db4350dd8e40a29cbb42e2f4443060de81f2261a34f39edd86eb8f9062e73f1
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 41.93023255813954,
3
+ "total_flos": 1.8345690247389512e+18,
4
+ "train_loss": 1.2270459805216107,
5
+ "train_runtime": 1356.3174,
6
+ "train_samples_per_second": 42.176,
7
+ "train_steps_per_second": 0.31
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,665 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7307692307692307,
3
+ "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV21/checkpoint-90",
4
+ "epoch": 41.93023255813954,
5
+ "eval_steps": 500,
6
+ "global_step": 420,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9302325581395349,
13
+ "eval_accuracy": 0.23076923076923078,
14
+ "eval_loss": 1.600624680519104,
15
+ "eval_runtime": 0.9466,
16
+ "eval_samples_per_second": 54.935,
17
+ "eval_steps_per_second": 2.113,
18
+ "step": 10
19
+ },
20
+ {
21
+ "epoch": 1.1860465116279069,
22
+ "grad_norm": 11.869792938232422,
23
+ "learning_rate": 1.4285714285714285e-05,
24
+ "loss": 6.7273,
25
+ "step": 12
26
+ },
27
+ {
28
+ "epoch": 1.9302325581395348,
29
+ "eval_accuracy": 0.3076923076923077,
30
+ "eval_loss": 1.4678974151611328,
31
+ "eval_runtime": 0.9407,
32
+ "eval_samples_per_second": 55.281,
33
+ "eval_steps_per_second": 2.126,
34
+ "step": 20
35
+ },
36
+ {
37
+ "epoch": 2.3720930232558137,
38
+ "grad_norm": 21.593393325805664,
39
+ "learning_rate": 2.857142857142857e-05,
40
+ "loss": 5.9474,
41
+ "step": 24
42
+ },
43
+ {
44
+ "epoch": 2.9302325581395348,
45
+ "eval_accuracy": 0.5384615384615384,
46
+ "eval_loss": 1.1914443969726562,
47
+ "eval_runtime": 0.9141,
48
+ "eval_samples_per_second": 56.886,
49
+ "eval_steps_per_second": 2.188,
50
+ "step": 30
51
+ },
52
+ {
53
+ "epoch": 3.558139534883721,
54
+ "grad_norm": 30.464523315429688,
55
+ "learning_rate": 4.2857142857142856e-05,
56
+ "loss": 4.3002,
57
+ "step": 36
58
+ },
59
+ {
60
+ "epoch": 3.9302325581395348,
61
+ "eval_accuracy": 0.46153846153846156,
62
+ "eval_loss": 1.1951944828033447,
63
+ "eval_runtime": 1.2786,
64
+ "eval_samples_per_second": 40.671,
65
+ "eval_steps_per_second": 1.564,
66
+ "step": 40
67
+ },
68
+ {
69
+ "epoch": 4.7441860465116275,
70
+ "grad_norm": 25.736072540283203,
71
+ "learning_rate": 4.9206349206349204e-05,
72
+ "loss": 2.9717,
73
+ "step": 48
74
+ },
75
+ {
76
+ "epoch": 4.930232558139535,
77
+ "eval_accuracy": 0.5769230769230769,
78
+ "eval_loss": 0.9782841205596924,
79
+ "eval_runtime": 0.9391,
80
+ "eval_samples_per_second": 55.375,
81
+ "eval_steps_per_second": 2.13,
82
+ "step": 50
83
+ },
84
+ {
85
+ "epoch": 5.930232558139535,
86
+ "grad_norm": 54.5507698059082,
87
+ "learning_rate": 4.761904761904762e-05,
88
+ "loss": 2.4828,
89
+ "step": 60
90
+ },
91
+ {
92
+ "epoch": 5.930232558139535,
93
+ "eval_accuracy": 0.5769230769230769,
94
+ "eval_loss": 0.9832176566123962,
95
+ "eval_runtime": 0.9619,
96
+ "eval_samples_per_second": 54.06,
97
+ "eval_steps_per_second": 2.079,
98
+ "step": 60
99
+ },
100
+ {
101
+ "epoch": 6.930232558139535,
102
+ "eval_accuracy": 0.5,
103
+ "eval_loss": 1.0621165037155151,
104
+ "eval_runtime": 1.3284,
105
+ "eval_samples_per_second": 39.145,
106
+ "eval_steps_per_second": 1.506,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 7.186046511627907,
111
+ "grad_norm": 26.069013595581055,
112
+ "learning_rate": 4.603174603174603e-05,
113
+ "loss": 2.0276,
114
+ "step": 72
115
+ },
116
+ {
117
+ "epoch": 7.930232558139535,
118
+ "eval_accuracy": 0.6538461538461539,
119
+ "eval_loss": 0.9040729999542236,
120
+ "eval_runtime": 0.9561,
121
+ "eval_samples_per_second": 54.39,
122
+ "eval_steps_per_second": 2.092,
123
+ "step": 80
124
+ },
125
+ {
126
+ "epoch": 8.372093023255815,
127
+ "grad_norm": 43.163116455078125,
128
+ "learning_rate": 4.4444444444444447e-05,
129
+ "loss": 1.7041,
130
+ "step": 84
131
+ },
132
+ {
133
+ "epoch": 8.930232558139535,
134
+ "eval_accuracy": 0.7307692307692307,
135
+ "eval_loss": 0.9577280879020691,
136
+ "eval_runtime": 0.9834,
137
+ "eval_samples_per_second": 52.878,
138
+ "eval_steps_per_second": 2.034,
139
+ "step": 90
140
+ },
141
+ {
142
+ "epoch": 9.55813953488372,
143
+ "grad_norm": 45.423728942871094,
144
+ "learning_rate": 4.2857142857142856e-05,
145
+ "loss": 1.3967,
146
+ "step": 96
147
+ },
148
+ {
149
+ "epoch": 9.930232558139535,
150
+ "eval_accuracy": 0.6730769230769231,
151
+ "eval_loss": 1.0290472507476807,
152
+ "eval_runtime": 0.9566,
153
+ "eval_samples_per_second": 54.362,
154
+ "eval_steps_per_second": 2.091,
155
+ "step": 100
156
+ },
157
+ {
158
+ "epoch": 10.744186046511627,
159
+ "grad_norm": 29.1428279876709,
160
+ "learning_rate": 4.126984126984127e-05,
161
+ "loss": 1.3079,
162
+ "step": 108
163
+ },
164
+ {
165
+ "epoch": 10.930232558139535,
166
+ "eval_accuracy": 0.7115384615384616,
167
+ "eval_loss": 0.9863845109939575,
168
+ "eval_runtime": 1.3391,
169
+ "eval_samples_per_second": 38.832,
170
+ "eval_steps_per_second": 1.494,
171
+ "step": 110
172
+ },
173
+ {
174
+ "epoch": 11.930232558139535,
175
+ "grad_norm": 25.176292419433594,
176
+ "learning_rate": 3.968253968253968e-05,
177
+ "loss": 1.0486,
178
+ "step": 120
179
+ },
180
+ {
181
+ "epoch": 11.930232558139535,
182
+ "eval_accuracy": 0.6730769230769231,
183
+ "eval_loss": 1.0005784034729004,
184
+ "eval_runtime": 0.9808,
185
+ "eval_samples_per_second": 53.016,
186
+ "eval_steps_per_second": 2.039,
187
+ "step": 120
188
+ },
189
+ {
190
+ "epoch": 12.930232558139535,
191
+ "eval_accuracy": 0.6153846153846154,
192
+ "eval_loss": 0.9503486156463623,
193
+ "eval_runtime": 0.9746,
194
+ "eval_samples_per_second": 53.353,
195
+ "eval_steps_per_second": 2.052,
196
+ "step": 130
197
+ },
198
+ {
199
+ "epoch": 13.186046511627907,
200
+ "grad_norm": 55.16592025756836,
201
+ "learning_rate": 3.809523809523809e-05,
202
+ "loss": 1.0704,
203
+ "step": 132
204
+ },
205
+ {
206
+ "epoch": 13.930232558139535,
207
+ "eval_accuracy": 0.7307692307692307,
208
+ "eval_loss": 0.9434700012207031,
209
+ "eval_runtime": 1.3209,
210
+ "eval_samples_per_second": 39.368,
211
+ "eval_steps_per_second": 1.514,
212
+ "step": 140
213
+ },
214
+ {
215
+ "epoch": 14.372093023255815,
216
+ "grad_norm": 31.888456344604492,
217
+ "learning_rate": 3.650793650793651e-05,
218
+ "loss": 0.9929,
219
+ "step": 144
220
+ },
221
+ {
222
+ "epoch": 14.930232558139535,
223
+ "eval_accuracy": 0.6346153846153846,
224
+ "eval_loss": 1.0601013898849487,
225
+ "eval_runtime": 0.9793,
226
+ "eval_samples_per_second": 53.098,
227
+ "eval_steps_per_second": 2.042,
228
+ "step": 150
229
+ },
230
+ {
231
+ "epoch": 15.55813953488372,
232
+ "grad_norm": 29.11701202392578,
233
+ "learning_rate": 3.492063492063492e-05,
234
+ "loss": 0.8911,
235
+ "step": 156
236
+ },
237
+ {
238
+ "epoch": 15.930232558139535,
239
+ "eval_accuracy": 0.6730769230769231,
240
+ "eval_loss": 1.0281590223312378,
241
+ "eval_runtime": 0.9645,
242
+ "eval_samples_per_second": 53.912,
243
+ "eval_steps_per_second": 2.074,
244
+ "step": 160
245
+ },
246
+ {
247
+ "epoch": 16.74418604651163,
248
+ "grad_norm": 32.15597152709961,
249
+ "learning_rate": 3.3333333333333335e-05,
250
+ "loss": 0.7702,
251
+ "step": 168
252
+ },
253
+ {
254
+ "epoch": 16.930232558139537,
255
+ "eval_accuracy": 0.6923076923076923,
256
+ "eval_loss": 1.1857846975326538,
257
+ "eval_runtime": 0.9593,
258
+ "eval_samples_per_second": 54.207,
259
+ "eval_steps_per_second": 2.085,
260
+ "step": 170
261
+ },
262
+ {
263
+ "epoch": 17.930232558139537,
264
+ "grad_norm": 32.97303771972656,
265
+ "learning_rate": 3.1746031746031745e-05,
266
+ "loss": 0.718,
267
+ "step": 180
268
+ },
269
+ {
270
+ "epoch": 17.930232558139537,
271
+ "eval_accuracy": 0.6538461538461539,
272
+ "eval_loss": 1.2644145488739014,
273
+ "eval_runtime": 1.3041,
274
+ "eval_samples_per_second": 39.875,
275
+ "eval_steps_per_second": 1.534,
276
+ "step": 180
277
+ },
278
+ {
279
+ "epoch": 18.930232558139537,
280
+ "eval_accuracy": 0.6346153846153846,
281
+ "eval_loss": 1.1403751373291016,
282
+ "eval_runtime": 0.957,
283
+ "eval_samples_per_second": 54.339,
284
+ "eval_steps_per_second": 2.09,
285
+ "step": 190
286
+ },
287
+ {
288
+ "epoch": 19.186046511627907,
289
+ "grad_norm": 18.562591552734375,
290
+ "learning_rate": 3.0158730158730158e-05,
291
+ "loss": 0.6526,
292
+ "step": 192
293
+ },
294
+ {
295
+ "epoch": 19.930232558139537,
296
+ "eval_accuracy": 0.6923076923076923,
297
+ "eval_loss": 1.2202544212341309,
298
+ "eval_runtime": 0.981,
299
+ "eval_samples_per_second": 53.008,
300
+ "eval_steps_per_second": 2.039,
301
+ "step": 200
302
+ },
303
+ {
304
+ "epoch": 20.372093023255815,
305
+ "grad_norm": 41.22922897338867,
306
+ "learning_rate": 2.857142857142857e-05,
307
+ "loss": 0.6617,
308
+ "step": 204
309
+ },
310
+ {
311
+ "epoch": 20.930232558139537,
312
+ "eval_accuracy": 0.6730769230769231,
313
+ "eval_loss": 1.3465415239334106,
314
+ "eval_runtime": 1.3212,
315
+ "eval_samples_per_second": 39.359,
316
+ "eval_steps_per_second": 1.514,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 21.558139534883722,
321
+ "grad_norm": 28.829620361328125,
322
+ "learning_rate": 2.6984126984126984e-05,
323
+ "loss": 0.5699,
324
+ "step": 216
325
+ },
326
+ {
327
+ "epoch": 21.930232558139537,
328
+ "eval_accuracy": 0.6538461538461539,
329
+ "eval_loss": 1.3862558603286743,
330
+ "eval_runtime": 0.9516,
331
+ "eval_samples_per_second": 54.644,
332
+ "eval_steps_per_second": 2.102,
333
+ "step": 220
334
+ },
335
+ {
336
+ "epoch": 22.74418604651163,
337
+ "grad_norm": 23.03827476501465,
338
+ "learning_rate": 2.5396825396825397e-05,
339
+ "loss": 0.5228,
340
+ "step": 228
341
+ },
342
+ {
343
+ "epoch": 22.930232558139537,
344
+ "eval_accuracy": 0.7115384615384616,
345
+ "eval_loss": 1.272707223892212,
346
+ "eval_runtime": 0.985,
347
+ "eval_samples_per_second": 52.794,
348
+ "eval_steps_per_second": 2.031,
349
+ "step": 230
350
+ },
351
+ {
352
+ "epoch": 23.930232558139537,
353
+ "grad_norm": 27.27239990234375,
354
+ "learning_rate": 2.380952380952381e-05,
355
+ "loss": 0.4321,
356
+ "step": 240
357
+ },
358
+ {
359
+ "epoch": 23.930232558139537,
360
+ "eval_accuracy": 0.7115384615384616,
361
+ "eval_loss": 1.194717288017273,
362
+ "eval_runtime": 0.9431,
363
+ "eval_samples_per_second": 55.135,
364
+ "eval_steps_per_second": 2.121,
365
+ "step": 240
366
+ },
367
+ {
368
+ "epoch": 24.930232558139537,
369
+ "eval_accuracy": 0.6346153846153846,
370
+ "eval_loss": 1.397331714630127,
371
+ "eval_runtime": 1.2955,
372
+ "eval_samples_per_second": 40.139,
373
+ "eval_steps_per_second": 1.544,
374
+ "step": 250
375
+ },
376
+ {
377
+ "epoch": 25.186046511627907,
378
+ "grad_norm": 24.13581085205078,
379
+ "learning_rate": 2.2222222222222223e-05,
380
+ "loss": 0.557,
381
+ "step": 252
382
+ },
383
+ {
384
+ "epoch": 25.930232558139537,
385
+ "eval_accuracy": 0.6538461538461539,
386
+ "eval_loss": 1.4321506023406982,
387
+ "eval_runtime": 1.3216,
388
+ "eval_samples_per_second": 39.345,
389
+ "eval_steps_per_second": 1.513,
390
+ "step": 260
391
+ },
392
+ {
393
+ "epoch": 26.372093023255815,
394
+ "grad_norm": 34.4373664855957,
395
+ "learning_rate": 2.0634920634920636e-05,
396
+ "loss": 0.4569,
397
+ "step": 264
398
+ },
399
+ {
400
+ "epoch": 26.930232558139537,
401
+ "eval_accuracy": 0.6538461538461539,
402
+ "eval_loss": 1.3255730867385864,
403
+ "eval_runtime": 0.9379,
404
+ "eval_samples_per_second": 55.443,
405
+ "eval_steps_per_second": 2.132,
406
+ "step": 270
407
+ },
408
+ {
409
+ "epoch": 27.558139534883722,
410
+ "grad_norm": 19.65439224243164,
411
+ "learning_rate": 1.9047619047619046e-05,
412
+ "loss": 0.4395,
413
+ "step": 276
414
+ },
415
+ {
416
+ "epoch": 27.930232558139537,
417
+ "eval_accuracy": 0.6730769230769231,
418
+ "eval_loss": 1.3907530307769775,
419
+ "eval_runtime": 1.2894,
420
+ "eval_samples_per_second": 40.33,
421
+ "eval_steps_per_second": 1.551,
422
+ "step": 280
423
+ },
424
+ {
425
+ "epoch": 28.74418604651163,
426
+ "grad_norm": 31.272125244140625,
427
+ "learning_rate": 1.746031746031746e-05,
428
+ "loss": 0.5587,
429
+ "step": 288
430
+ },
431
+ {
432
+ "epoch": 28.930232558139537,
433
+ "eval_accuracy": 0.6923076923076923,
434
+ "eval_loss": 1.5055606365203857,
435
+ "eval_runtime": 0.9347,
436
+ "eval_samples_per_second": 55.632,
437
+ "eval_steps_per_second": 2.14,
438
+ "step": 290
439
+ },
440
+ {
441
+ "epoch": 29.930232558139537,
442
+ "grad_norm": 21.733734130859375,
443
+ "learning_rate": 1.5873015873015872e-05,
444
+ "loss": 0.439,
445
+ "step": 300
446
+ },
447
+ {
448
+ "epoch": 29.930232558139537,
449
+ "eval_accuracy": 0.6538461538461539,
450
+ "eval_loss": 1.3810527324676514,
451
+ "eval_runtime": 0.9297,
452
+ "eval_samples_per_second": 55.934,
453
+ "eval_steps_per_second": 2.151,
454
+ "step": 300
455
+ },
456
+ {
457
+ "epoch": 30.930232558139537,
458
+ "eval_accuracy": 0.6153846153846154,
459
+ "eval_loss": 1.3693809509277344,
460
+ "eval_runtime": 1.2587,
461
+ "eval_samples_per_second": 41.311,
462
+ "eval_steps_per_second": 1.589,
463
+ "step": 310
464
+ },
465
+ {
466
+ "epoch": 31.186046511627907,
467
+ "grad_norm": 24.466344833374023,
468
+ "learning_rate": 1.4285714285714285e-05,
469
+ "loss": 0.3989,
470
+ "step": 312
471
+ },
472
+ {
473
+ "epoch": 31.930232558139537,
474
+ "eval_accuracy": 0.6538461538461539,
475
+ "eval_loss": 1.4759361743927002,
476
+ "eval_runtime": 0.9328,
477
+ "eval_samples_per_second": 55.746,
478
+ "eval_steps_per_second": 2.144,
479
+ "step": 320
480
+ },
481
+ {
482
+ "epoch": 32.372093023255815,
483
+ "grad_norm": 35.87159729003906,
484
+ "learning_rate": 1.2698412698412699e-05,
485
+ "loss": 0.312,
486
+ "step": 324
487
+ },
488
+ {
489
+ "epoch": 32.93023255813954,
490
+ "eval_accuracy": 0.6346153846153846,
491
+ "eval_loss": 1.5513783693313599,
492
+ "eval_runtime": 0.9364,
493
+ "eval_samples_per_second": 55.53,
494
+ "eval_steps_per_second": 2.136,
495
+ "step": 330
496
+ },
497
+ {
498
+ "epoch": 33.55813953488372,
499
+ "grad_norm": 14.943767547607422,
500
+ "learning_rate": 1.1111111111111112e-05,
501
+ "loss": 0.3728,
502
+ "step": 336
503
+ },
504
+ {
505
+ "epoch": 33.93023255813954,
506
+ "eval_accuracy": 0.6538461538461539,
507
+ "eval_loss": 1.5128782987594604,
508
+ "eval_runtime": 1.2844,
509
+ "eval_samples_per_second": 40.485,
510
+ "eval_steps_per_second": 1.557,
511
+ "step": 340
512
+ },
513
+ {
514
+ "epoch": 34.74418604651163,
515
+ "grad_norm": 15.3562593460083,
516
+ "learning_rate": 9.523809523809523e-06,
517
+ "loss": 0.4031,
518
+ "step": 348
519
+ },
520
+ {
521
+ "epoch": 34.93023255813954,
522
+ "eval_accuracy": 0.6538461538461539,
523
+ "eval_loss": 1.4914475679397583,
524
+ "eval_runtime": 0.948,
525
+ "eval_samples_per_second": 54.852,
526
+ "eval_steps_per_second": 2.11,
527
+ "step": 350
528
+ },
529
+ {
530
+ "epoch": 35.93023255813954,
531
+ "grad_norm": 23.48607635498047,
532
+ "learning_rate": 7.936507936507936e-06,
533
+ "loss": 0.3523,
534
+ "step": 360
535
+ },
536
+ {
537
+ "epoch": 35.93023255813954,
538
+ "eval_accuracy": 0.6730769230769231,
539
+ "eval_loss": 1.5588839054107666,
540
+ "eval_runtime": 0.9436,
541
+ "eval_samples_per_second": 55.108,
542
+ "eval_steps_per_second": 2.12,
543
+ "step": 360
544
+ },
545
+ {
546
+ "epoch": 36.93023255813954,
547
+ "eval_accuracy": 0.6730769230769231,
548
+ "eval_loss": 1.568899154663086,
549
+ "eval_runtime": 1.2885,
550
+ "eval_samples_per_second": 40.359,
551
+ "eval_steps_per_second": 1.552,
552
+ "step": 370
553
+ },
554
+ {
555
+ "epoch": 37.18604651162791,
556
+ "grad_norm": 31.557363510131836,
557
+ "learning_rate": 6.349206349206349e-06,
558
+ "loss": 0.265,
559
+ "step": 372
560
+ },
561
+ {
562
+ "epoch": 37.93023255813954,
563
+ "eval_accuracy": 0.6730769230769231,
564
+ "eval_loss": 1.6178526878356934,
565
+ "eval_runtime": 0.9634,
566
+ "eval_samples_per_second": 53.974,
567
+ "eval_steps_per_second": 2.076,
568
+ "step": 380
569
+ },
570
+ {
571
+ "epoch": 38.372093023255815,
572
+ "grad_norm": 27.208581924438477,
573
+ "learning_rate": 4.7619047619047615e-06,
574
+ "loss": 0.3189,
575
+ "step": 384
576
+ },
577
+ {
578
+ "epoch": 38.93023255813954,
579
+ "eval_accuracy": 0.6538461538461539,
580
+ "eval_loss": 1.5962971448898315,
581
+ "eval_runtime": 0.9485,
582
+ "eval_samples_per_second": 54.826,
583
+ "eval_steps_per_second": 2.109,
584
+ "step": 390
585
+ },
586
+ {
587
+ "epoch": 39.55813953488372,
588
+ "grad_norm": 30.211496353149414,
589
+ "learning_rate": 3.1746031746031746e-06,
590
+ "loss": 0.2976,
591
+ "step": 396
592
+ },
593
+ {
594
+ "epoch": 39.93023255813954,
595
+ "eval_accuracy": 0.6346153846153846,
596
+ "eval_loss": 1.5738095045089722,
597
+ "eval_runtime": 1.1624,
598
+ "eval_samples_per_second": 44.733,
599
+ "eval_steps_per_second": 1.721,
600
+ "step": 400
601
+ },
602
+ {
603
+ "epoch": 40.74418604651163,
604
+ "grad_norm": 16.808286666870117,
605
+ "learning_rate": 1.5873015873015873e-06,
606
+ "loss": 0.3097,
607
+ "step": 408
608
+ },
609
+ {
610
+ "epoch": 40.93023255813954,
611
+ "eval_accuracy": 0.6346153846153846,
612
+ "eval_loss": 1.578661561012268,
613
+ "eval_runtime": 0.9279,
614
+ "eval_samples_per_second": 56.041,
615
+ "eval_steps_per_second": 2.155,
616
+ "step": 410
617
+ },
618
+ {
619
+ "epoch": 41.93023255813954,
620
+ "grad_norm": 11.15967082977295,
621
+ "learning_rate": 0.0,
622
+ "loss": 0.269,
623
+ "step": 420
624
+ },
625
+ {
626
+ "epoch": 41.93023255813954,
627
+ "eval_accuracy": 0.6346153846153846,
628
+ "eval_loss": 1.5818698406219482,
629
+ "eval_runtime": 1.6499,
630
+ "eval_samples_per_second": 31.516,
631
+ "eval_steps_per_second": 1.212,
632
+ "step": 420
633
+ },
634
+ {
635
+ "epoch": 41.93023255813954,
636
+ "step": 420,
637
+ "total_flos": 1.8345690247389512e+18,
638
+ "train_loss": 1.2270459805216107,
639
+ "train_runtime": 1356.3174,
640
+ "train_samples_per_second": 42.176,
641
+ "train_steps_per_second": 0.31
642
+ }
643
+ ],
644
+ "logging_steps": 12,
645
+ "max_steps": 420,
646
+ "num_input_tokens_seen": 0,
647
+ "num_train_epochs": 42,
648
+ "save_steps": 500,
649
+ "stateful_callbacks": {
650
+ "TrainerControl": {
651
+ "args": {
652
+ "should_epoch_stop": false,
653
+ "should_evaluate": false,
654
+ "should_log": false,
655
+ "should_save": true,
656
+ "should_training_stop": true
657
+ },
658
+ "attributes": {}
659
+ }
660
+ },
661
+ "total_flos": 1.8345690247389512e+18,
662
+ "train_batch_size": 32,
663
+ "trial_name": null,
664
+ "trial_params": null
665
+ }