Shawon16 commited on
Commit
f421e60
·
verified ·
1 Parent(s): 523d376

End of training

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [facebook/timesformer-base-finetuned-k400](https://huggingface.co/facebook/timesformer-base-finetuned-k400) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.7324
22
- - Accuracy: 0.7868
23
 
24
  ## Model description
25
 
@@ -51,16 +51,16 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:------:|:----:|:---------------:|:--------:|
54
- | 0.2094 | 0.1001 | 904 | 0.1862 | 0.9676 |
55
- | 0.0148 | 1.1001 | 1808 | 0.0287 | 0.9913 |
56
- | 0.013 | 2.1001 | 2712 | 0.0247 | 0.9963 |
57
- | 0.0092 | 3.1001 | 3616 | 0.0133 | 0.9988 |
58
- | 0.0003 | 4.1001 | 4520 | 0.0111 | 0.9988 |
59
- | 0.0002 | 5.1001 | 5424 | 0.0138 | 0.9988 |
60
- | 0.0001 | 6.1001 | 6328 | 0.0142 | 0.9988 |
61
- | 0.0001 | 7.1001 | 7232 | 0.0143 | 0.9988 |
62
- | 0.0001 | 8.1001 | 8136 | 0.0144 | 0.9988 |
63
- | 0.0001 | 9.0990 | 9030 | 0.0144 | 0.9988 |
64
 
65
 
66
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [facebook/timesformer-base-finetuned-k400](https://huggingface.co/facebook/timesformer-base-finetuned-k400) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.8547
22
+ - Accuracy: 0.7665
23
 
24
  ## Model description
25
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:------:|:----:|:---------------:|:--------:|
54
+ | 0.2384 | 0.1001 | 904 | 0.2019 | 0.9639 |
55
+ | 0.0207 | 1.1001 | 1808 | 0.0325 | 0.9900 |
56
+ | 0.0116 | 2.1001 | 2712 | 0.0229 | 0.9963 |
57
+ | 0.0017 | 3.1001 | 3616 | 0.0222 | 0.9950 |
58
+ | 0.0156 | 4.1001 | 4520 | 0.0402 | 0.9900 |
59
+ | 0.0002 | 5.1001 | 5424 | 0.0083 | 0.9975 |
60
+ | 0.0001 | 6.1001 | 6328 | 0.0088 | 0.9975 |
61
+ | 0.0001 | 7.1001 | 7232 | 0.0094 | 0.9975 |
62
+ | 0.0001 | 8.1001 | 8136 | 0.0097 | 0.9975 |
63
+ | 0.0001 | 9.0990 | 9030 | 0.0098 | 0.9975 |
64
 
65
 
66
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.099003322259136,
3
- "eval_accuracy": 0.786833855799373,
4
- "eval_loss": 0.7324321866035461,
5
- "eval_runtime": 466.9382,
6
- "eval_samples_per_second": 2.733,
7
- "eval_steps_per_second": 0.343
8
  }
 
1
  {
2
  "epoch": 9.099003322259136,
3
+ "eval_accuracy": 0.7664576802507836,
4
+ "eval_loss": 0.8547028303146362,
5
+ "eval_runtime": 304.3291,
6
+ "eval_samples_per_second": 4.193,
7
+ "eval_steps_per_second": 0.526
8
  }
confusion_matrix_test_fold_3.png ADDED
confusion_matrix_valid_fold_3.png ADDED
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb470187a83ffe3c3b39648b83f147be196b17920fb192e45ea4169441d33ebe
3
  size 485250680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a92405eb1a2048bc73411fe4a8f83de063bcb7ea17e092cef106be338573e70d
3
  size 485250680
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.099003322259136,
3
- "eval_accuracy": 0.786833855799373,
4
- "eval_loss": 0.7324321866035461,
5
- "eval_runtime": 466.9382,
6
- "eval_samples_per_second": 2.733,
7
- "eval_steps_per_second": 0.343
8
  }
 
1
  {
2
  "epoch": 9.099003322259136,
3
+ "eval_accuracy": 0.7664576802507836,
4
+ "eval_loss": 0.8547028303146362,
5
+ "eval_runtime": 304.3291,
6
+ "eval_samples_per_second": 4.193,
7
+ "eval_steps_per_second": 0.526
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9987546699875467,
3
- "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/10 fold timesformer/Timesformer_default_fold_10_10_epoch_noAug_batch8/checkpoint-3616",
4
  "epoch": 9.099003322259136,
5
  "eval_steps": 500,
6
  "global_step": 9030,
@@ -10,740 +10,740 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.01107419712070875,
13
- "grad_norm": 12.277300834655762,
14
  "learning_rate": 5.537098560354375e-06,
15
- "loss": 4.1981,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.0221483942414175,
20
- "grad_norm": 13.852692604064941,
21
  "learning_rate": 1.107419712070875e-05,
22
- "loss": 3.8197,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.03322259136212625,
27
- "grad_norm": 16.045228958129883,
28
  "learning_rate": 1.6611295681063124e-05,
29
- "loss": 2.9761,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.044296788482835,
34
- "grad_norm": 12.293411254882812,
35
  "learning_rate": 2.21483942414175e-05,
36
- "loss": 2.068,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.05537098560354374,
41
- "grad_norm": 13.963603019714355,
42
  "learning_rate": 2.7685492801771873e-05,
43
- "loss": 1.2657,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.0664451827242525,
48
- "grad_norm": 9.983071327209473,
49
  "learning_rate": 3.322259136212625e-05,
50
- "loss": 0.8121,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.07751937984496124,
55
- "grad_norm": 9.858047485351562,
56
  "learning_rate": 3.875968992248062e-05,
57
- "loss": 0.4991,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.08859357696567,
62
- "grad_norm": 4.425586223602295,
63
  "learning_rate": 4.4296788482835e-05,
64
- "loss": 0.2899,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.09966777408637874,
69
- "grad_norm": 9.35398006439209,
70
  "learning_rate": 4.983388704318937e-05,
71
- "loss": 0.2094,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.10011074197120709,
76
- "eval_accuracy": 0.9676214196762142,
77
- "eval_loss": 0.1862427294254303,
78
- "eval_runtime": 254.6757,
79
- "eval_samples_per_second": 3.153,
80
- "eval_steps_per_second": 0.397,
81
  "step": 904
82
  },
83
  {
84
  "epoch": 1.0106312292358803,
85
- "grad_norm": 4.818030834197998,
86
  "learning_rate": 4.940322382182848e-05,
87
- "loss": 0.1397,
88
  "step": 1000
89
  },
90
  {
91
  "epoch": 1.0217054263565892,
92
- "grad_norm": 0.25109994411468506,
93
  "learning_rate": 4.878799064845577e-05,
94
- "loss": 0.0963,
95
  "step": 1100
96
  },
97
  {
98
  "epoch": 1.032779623477298,
99
- "grad_norm": 1.5516589879989624,
100
  "learning_rate": 4.8172757475083056e-05,
101
- "loss": 0.0711,
102
  "step": 1200
103
  },
104
  {
105
  "epoch": 1.0438538205980066,
106
- "grad_norm": 6.34712553024292,
107
  "learning_rate": 4.755752430171035e-05,
108
- "loss": 0.0844,
109
  "step": 1300
110
  },
111
  {
112
  "epoch": 1.0549280177187155,
113
- "grad_norm": 0.4094001352787018,
114
  "learning_rate": 4.694229112833764e-05,
115
- "loss": 0.0452,
116
  "step": 1400
117
  },
118
  {
119
  "epoch": 1.0660022148394241,
120
- "grad_norm": 0.5541088581085205,
121
  "learning_rate": 4.6327057954964936e-05,
122
- "loss": 0.0554,
123
  "step": 1500
124
  },
125
  {
126
  "epoch": 1.0770764119601328,
127
- "grad_norm": 0.06750523298978806,
128
  "learning_rate": 4.571182478159223e-05,
129
- "loss": 0.027,
130
  "step": 1600
131
  },
132
  {
133
  "epoch": 1.0881506090808417,
134
- "grad_norm": 0.0328923799097538,
135
  "learning_rate": 4.5096591608219516e-05,
136
- "loss": 0.0283,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 1.0992248062015504,
141
- "grad_norm": 0.020282607525587082,
142
  "learning_rate": 4.448135843484681e-05,
143
- "loss": 0.0148,
144
  "step": 1800
145
  },
146
  {
147
  "epoch": 1.100110741971207,
148
- "eval_accuracy": 0.9912826899128269,
149
- "eval_loss": 0.02874130941927433,
150
- "eval_runtime": 268.3856,
151
- "eval_samples_per_second": 2.992,
152
- "eval_steps_per_second": 0.376,
153
  "step": 1808
154
  },
155
  {
156
  "epoch": 2.010188261351052,
157
- "grad_norm": 0.013696623966097832,
158
  "learning_rate": 4.38661252614741e-05,
159
- "loss": 0.0125,
160
  "step": 1900
161
  },
162
  {
163
  "epoch": 2.0212624584717607,
164
- "grad_norm": 0.02626732923090458,
165
  "learning_rate": 4.325089208810139e-05,
166
- "loss": 0.0016,
167
  "step": 2000
168
  },
169
  {
170
  "epoch": 2.0323366555924696,
171
- "grad_norm": 0.021943334490060806,
172
  "learning_rate": 4.263565891472868e-05,
173
- "loss": 0.0028,
174
  "step": 2100
175
  },
176
  {
177
  "epoch": 2.0434108527131785,
178
- "grad_norm": 0.8382033109664917,
179
  "learning_rate": 4.2020425741355975e-05,
180
- "loss": 0.0286,
181
  "step": 2200
182
  },
183
  {
184
  "epoch": 2.054485049833887,
185
- "grad_norm": 0.06917964667081833,
186
  "learning_rate": 4.140519256798327e-05,
187
- "loss": 0.0148,
188
  "step": 2300
189
  },
190
  {
191
  "epoch": 2.065559246954596,
192
- "grad_norm": 0.0369587242603302,
193
  "learning_rate": 4.078995939461056e-05,
194
- "loss": 0.0137,
195
  "step": 2400
196
  },
197
  {
198
  "epoch": 2.0766334440753047,
199
- "grad_norm": 0.009241752326488495,
200
  "learning_rate": 4.0174726221237855e-05,
201
- "loss": 0.0111,
202
  "step": 2500
203
  },
204
  {
205
  "epoch": 2.087707641196013,
206
- "grad_norm": 0.023627281188964844,
207
  "learning_rate": 3.955949304786514e-05,
208
- "loss": 0.0074,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 2.098781838316722,
213
- "grad_norm": 0.014712699688971043,
214
  "learning_rate": 3.8944259874492434e-05,
215
- "loss": 0.013,
216
  "step": 2700
217
  },
218
  {
219
  "epoch": 2.100110741971207,
220
  "eval_accuracy": 0.9962640099626401,
221
- "eval_loss": 0.0246839988976717,
222
- "eval_runtime": 282.3356,
223
- "eval_samples_per_second": 2.844,
224
- "eval_steps_per_second": 0.358,
225
  "step": 2712
226
  },
227
  {
228
  "epoch": 3.009745293466224,
229
- "grad_norm": 0.016267647966742516,
230
  "learning_rate": 3.832902670111973e-05,
231
- "loss": 0.0011,
232
  "step": 2800
233
  },
234
  {
235
  "epoch": 3.0208194905869323,
236
- "grad_norm": 0.017402295023202896,
237
  "learning_rate": 3.7713793527747014e-05,
238
- "loss": 0.0056,
239
  "step": 2900
240
  },
241
  {
242
  "epoch": 3.0318936877076412,
243
- "grad_norm": 0.011280537582933903,
244
  "learning_rate": 3.7098560354374314e-05,
245
- "loss": 0.0326,
246
  "step": 3000
247
  },
248
  {
249
  "epoch": 3.04296788482835,
250
- "grad_norm": 0.021694917231798172,
251
  "learning_rate": 3.64833271810016e-05,
252
- "loss": 0.0096,
253
  "step": 3100
254
  },
255
  {
256
  "epoch": 3.0540420819490586,
257
- "grad_norm": 0.038911569863557816,
258
  "learning_rate": 3.5868094007628894e-05,
259
- "loss": 0.0182,
260
  "step": 3200
261
  },
262
  {
263
  "epoch": 3.0651162790697675,
264
- "grad_norm": 0.008056244812905788,
265
  "learning_rate": 3.525286083425619e-05,
266
- "loss": 0.0062,
267
  "step": 3300
268
  },
269
  {
270
  "epoch": 3.0761904761904764,
271
- "grad_norm": 0.05477767437696457,
272
  "learning_rate": 3.463762766088347e-05,
273
- "loss": 0.0108,
274
  "step": 3400
275
  },
276
  {
277
  "epoch": 3.087264673311185,
278
- "grad_norm": 0.025373445823788643,
279
  "learning_rate": 3.4022394487510767e-05,
280
- "loss": 0.0092,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 3.0983388704318937,
285
- "grad_norm": 0.005799598526209593,
286
  "learning_rate": 3.340716131413806e-05,
287
- "loss": 0.0092,
288
  "step": 3600
289
  },
290
  {
291
  "epoch": 3.100110741971207,
292
- "eval_accuracy": 0.9987546699875467,
293
- "eval_loss": 0.013343892991542816,
294
- "eval_runtime": 190.8463,
295
- "eval_samples_per_second": 4.208,
296
- "eval_steps_per_second": 0.529,
297
  "step": 3616
298
  },
299
  {
300
  "epoch": 4.0093023255813955,
301
- "grad_norm": 0.004197492729872465,
302
  "learning_rate": 3.2791928140765346e-05,
303
  "loss": 0.0005,
304
  "step": 3700
305
  },
306
  {
307
  "epoch": 4.020376522702104,
308
- "grad_norm": 0.004163427744060755,
309
  "learning_rate": 3.2176694967392646e-05,
310
- "loss": 0.0003,
311
  "step": 3800
312
  },
313
  {
314
  "epoch": 4.0314507198228124,
315
- "grad_norm": 0.006535384338349104,
316
  "learning_rate": 3.156146179401994e-05,
317
  "loss": 0.0003,
318
  "step": 3900
319
  },
320
  {
321
  "epoch": 4.042524916943521,
322
- "grad_norm": 0.0030044761952012777,
323
  "learning_rate": 3.0946228620647226e-05,
324
- "loss": 0.0003,
325
  "step": 4000
326
  },
327
  {
328
  "epoch": 4.05359911406423,
329
- "grad_norm": 0.0030457417014986277,
330
  "learning_rate": 3.033099544727452e-05,
331
- "loss": 0.0002,
332
  "step": 4100
333
  },
334
  {
335
  "epoch": 4.064673311184939,
336
- "grad_norm": 0.00839043315500021,
337
  "learning_rate": 2.971576227390181e-05,
338
- "loss": 0.011,
339
  "step": 4200
340
  },
341
  {
342
  "epoch": 4.075747508305648,
343
- "grad_norm": 0.0101741598919034,
344
  "learning_rate": 2.91005291005291e-05,
345
- "loss": 0.0045,
346
  "step": 4300
347
  },
348
  {
349
  "epoch": 4.086821705426357,
350
- "grad_norm": 0.0031424041371792555,
351
  "learning_rate": 2.8485295927156392e-05,
352
- "loss": 0.0006,
353
  "step": 4400
354
  },
355
  {
356
  "epoch": 4.097895902547065,
357
- "grad_norm": 0.0021799150854349136,
358
  "learning_rate": 2.787006275378369e-05,
359
- "loss": 0.0003,
360
  "step": 4500
361
  },
362
  {
363
  "epoch": 4.100110741971207,
364
- "eval_accuracy": 0.9987546699875467,
365
- "eval_loss": 0.01112725306302309,
366
- "eval_runtime": 187.0791,
367
- "eval_samples_per_second": 4.292,
368
- "eval_steps_per_second": 0.54,
369
  "step": 4520
370
  },
371
  {
372
  "epoch": 5.008859357696567,
373
- "grad_norm": 0.0032888855785131454,
374
  "learning_rate": 2.725482958041098e-05,
375
- "loss": 0.0002,
376
  "step": 4600
377
  },
378
  {
379
  "epoch": 5.019933554817276,
380
- "grad_norm": 0.0020696651190519333,
381
  "learning_rate": 2.6639596407038268e-05,
382
- "loss": 0.0002,
383
  "step": 4700
384
  },
385
  {
386
  "epoch": 5.0310077519379846,
387
- "grad_norm": 0.008721988648176193,
388
  "learning_rate": 2.602436323366556e-05,
389
- "loss": 0.0141,
390
  "step": 4800
391
  },
392
  {
393
  "epoch": 5.0420819490586934,
394
- "grad_norm": 0.004613637924194336,
395
  "learning_rate": 2.540913006029285e-05,
396
- "loss": 0.0008,
397
  "step": 4900
398
  },
399
  {
400
  "epoch": 5.053156146179402,
401
- "grad_norm": 0.013342674821615219,
402
  "learning_rate": 2.479389688692014e-05,
403
- "loss": 0.0052,
404
  "step": 5000
405
  },
406
  {
407
  "epoch": 5.06423034330011,
408
- "grad_norm": 0.008498313836753368,
409
  "learning_rate": 2.4178663713547438e-05,
410
- "loss": 0.0003,
411
  "step": 5100
412
  },
413
  {
414
  "epoch": 5.075304540420819,
415
- "grad_norm": 0.004820178262889385,
416
  "learning_rate": 2.3563430540174727e-05,
417
- "loss": 0.0002,
418
  "step": 5200
419
  },
420
  {
421
  "epoch": 5.086378737541528,
422
- "grad_norm": 0.0017530409386381507,
423
  "learning_rate": 2.2948197366802017e-05,
424
  "loss": 0.0002,
425
  "step": 5300
426
  },
427
  {
428
  "epoch": 5.097452934662237,
429
- "grad_norm": 0.0029693867545574903,
430
  "learning_rate": 2.233296419342931e-05,
431
  "loss": 0.0002,
432
  "step": 5400
433
  },
434
  {
435
  "epoch": 5.100110741971207,
436
- "eval_accuracy": 0.9987546699875467,
437
- "eval_loss": 0.01381352636963129,
438
- "eval_runtime": 190.9557,
439
- "eval_samples_per_second": 4.205,
440
- "eval_steps_per_second": 0.529,
441
  "step": 5424
442
  },
443
  {
444
  "epoch": 6.008416389811739,
445
- "grad_norm": 0.0020572063513100147,
446
  "learning_rate": 2.1717731020056604e-05,
447
  "loss": 0.0002,
448
  "step": 5500
449
  },
450
  {
451
  "epoch": 6.019490586932448,
452
- "grad_norm": 0.0013224915601313114,
453
  "learning_rate": 2.1102497846683894e-05,
454
  "loss": 0.0001,
455
  "step": 5600
456
  },
457
  {
458
  "epoch": 6.030564784053156,
459
- "grad_norm": 0.0022878097370266914,
460
  "learning_rate": 2.0487264673311183e-05,
461
  "loss": 0.0001,
462
  "step": 5700
463
  },
464
  {
465
  "epoch": 6.041638981173865,
466
- "grad_norm": 0.001476996229030192,
467
  "learning_rate": 1.987203149993848e-05,
468
  "loss": 0.0001,
469
  "step": 5800
470
  },
471
  {
472
  "epoch": 6.052713178294574,
473
- "grad_norm": 0.001298259710893035,
474
  "learning_rate": 1.925679832656577e-05,
475
  "loss": 0.0001,
476
  "step": 5900
477
  },
478
  {
479
  "epoch": 6.0637873754152825,
480
- "grad_norm": 0.0013261692365631461,
481
  "learning_rate": 1.864156515319306e-05,
482
  "loss": 0.0001,
483
  "step": 6000
484
  },
485
  {
486
  "epoch": 6.074861572535991,
487
- "grad_norm": 0.001477518817409873,
488
  "learning_rate": 1.8026331979820353e-05,
489
  "loss": 0.0001,
490
  "step": 6100
491
  },
492
  {
493
  "epoch": 6.0859357696567,
494
- "grad_norm": 0.00216505816206336,
495
  "learning_rate": 1.7411098806447646e-05,
496
  "loss": 0.0001,
497
  "step": 6200
498
  },
499
  {
500
  "epoch": 6.097009966777408,
501
- "grad_norm": 0.0018889505881816149,
502
  "learning_rate": 1.6795865633074936e-05,
503
  "loss": 0.0001,
504
  "step": 6300
505
  },
506
  {
507
  "epoch": 6.100110741971207,
508
- "eval_accuracy": 0.9987546699875467,
509
- "eval_loss": 0.014198515564203262,
510
- "eval_runtime": 183.8325,
511
- "eval_samples_per_second": 4.368,
512
- "eval_steps_per_second": 0.549,
513
  "step": 6328
514
  },
515
  {
516
  "epoch": 7.00797342192691,
517
- "grad_norm": 0.0011213194811716676,
518
  "learning_rate": 1.6180632459702226e-05,
519
  "loss": 0.0001,
520
  "step": 6400
521
  },
522
  {
523
  "epoch": 7.019047619047619,
524
- "grad_norm": 0.0013905749656260014,
525
  "learning_rate": 1.556539928632952e-05,
526
  "loss": 0.0001,
527
  "step": 6500
528
  },
529
  {
530
  "epoch": 7.030121816168328,
531
- "grad_norm": 0.0012657454935833812,
532
  "learning_rate": 1.4950166112956812e-05,
533
  "loss": 0.0001,
534
  "step": 6600
535
  },
536
  {
537
  "epoch": 7.041196013289037,
538
- "grad_norm": 0.0014162349980324507,
539
  "learning_rate": 1.4334932939584104e-05,
540
  "loss": 0.0001,
541
  "step": 6700
542
  },
543
  {
544
  "epoch": 7.052270210409746,
545
- "grad_norm": 0.0011958391405642033,
546
  "learning_rate": 1.3719699766211393e-05,
547
  "loss": 0.0001,
548
  "step": 6800
549
  },
550
  {
551
  "epoch": 7.063344407530454,
552
- "grad_norm": 0.0014284063363447785,
553
  "learning_rate": 1.3104466592838688e-05,
554
  "loss": 0.0001,
555
  "step": 6900
556
  },
557
  {
558
  "epoch": 7.074418604651163,
559
- "grad_norm": 0.003346965415403247,
560
  "learning_rate": 1.2489233419465978e-05,
561
  "loss": 0.0001,
562
  "step": 7000
563
  },
564
  {
565
  "epoch": 7.0854928017718715,
566
- "grad_norm": 0.0008867617580108345,
567
  "learning_rate": 1.187400024609327e-05,
568
  "loss": 0.0001,
569
  "step": 7100
570
  },
571
  {
572
  "epoch": 7.09656699889258,
573
- "grad_norm": 0.0012929540826007724,
574
  "learning_rate": 1.1258767072720563e-05,
575
  "loss": 0.0001,
576
  "step": 7200
577
  },
578
  {
579
  "epoch": 7.100110741971207,
580
- "eval_accuracy": 0.9987546699875467,
581
- "eval_loss": 0.014337223954498768,
582
- "eval_runtime": 191.8755,
583
- "eval_samples_per_second": 4.185,
584
- "eval_steps_per_second": 0.526,
585
  "step": 7232
586
  },
587
  {
588
  "epoch": 8.007530454042081,
589
- "grad_norm": 0.0015993459383025765,
590
  "learning_rate": 1.0643533899347853e-05,
591
  "loss": 0.0001,
592
  "step": 7300
593
  },
594
  {
595
  "epoch": 8.018604651162791,
596
- "grad_norm": 0.0008497968083247542,
597
  "learning_rate": 1.0028300725975146e-05,
598
  "loss": 0.0001,
599
  "step": 7400
600
  },
601
  {
602
  "epoch": 8.029678848283499,
603
- "grad_norm": 0.0010410414543002844,
604
  "learning_rate": 9.413067552602436e-06,
605
  "loss": 0.0001,
606
  "step": 7500
607
  },
608
  {
609
  "epoch": 8.040753045404209,
610
- "grad_norm": 0.0010427006054669619,
611
  "learning_rate": 8.797834379229729e-06,
612
  "loss": 0.0001,
613
  "step": 7600
614
  },
615
  {
616
  "epoch": 8.051827242524917,
617
- "grad_norm": 0.0008805838297121227,
618
  "learning_rate": 8.18260120585702e-06,
619
  "loss": 0.0001,
620
  "step": 7700
621
  },
622
  {
623
  "epoch": 8.062901439645625,
624
- "grad_norm": 0.0008669462986290455,
625
  "learning_rate": 7.567368032484312e-06,
626
  "loss": 0.0001,
627
  "step": 7800
628
  },
629
  {
630
  "epoch": 8.073975636766335,
631
- "grad_norm": 0.0008293281425721943,
632
  "learning_rate": 6.952134859111603e-06,
633
  "loss": 0.0001,
634
  "step": 7900
635
  },
636
  {
637
  "epoch": 8.085049833887043,
638
- "grad_norm": 0.0006111183320172131,
639
  "learning_rate": 6.336901685738895e-06,
640
  "loss": 0.0001,
641
  "step": 8000
642
  },
643
  {
644
  "epoch": 8.096124031007752,
645
- "grad_norm": 0.000657613156363368,
646
  "learning_rate": 5.7216685123661875e-06,
647
  "loss": 0.0001,
648
  "step": 8100
649
  },
650
  {
651
  "epoch": 8.100110741971207,
652
- "eval_accuracy": 0.9987546699875467,
653
- "eval_loss": 0.014379492029547691,
654
- "eval_runtime": 202.2981,
655
- "eval_samples_per_second": 3.969,
656
- "eval_steps_per_second": 0.499,
657
  "step": 8136
658
  },
659
  {
660
  "epoch": 9.007087486157253,
661
- "grad_norm": 0.0013073045993223786,
662
  "learning_rate": 5.106435338993479e-06,
663
  "loss": 0.0001,
664
  "step": 8200
665
  },
666
  {
667
  "epoch": 9.018161683277963,
668
- "grad_norm": 0.0009112409316003323,
669
  "learning_rate": 4.4912021656207705e-06,
670
  "loss": 0.0001,
671
  "step": 8300
672
  },
673
  {
674
  "epoch": 9.029235880398671,
675
- "grad_norm": 0.0008883228874765337,
676
  "learning_rate": 3.875968992248062e-06,
677
  "loss": 0.0001,
678
  "step": 8400
679
  },
680
  {
681
  "epoch": 9.04031007751938,
682
- "grad_norm": 0.0008643298642709851,
683
  "learning_rate": 3.260735818875354e-06,
684
  "loss": 0.0001,
685
  "step": 8500
686
  },
687
  {
688
  "epoch": 9.051384274640089,
689
- "grad_norm": 0.0008729179389774799,
690
  "learning_rate": 2.6455026455026455e-06,
691
  "loss": 0.0001,
692
  "step": 8600
693
  },
694
  {
695
  "epoch": 9.062458471760797,
696
- "grad_norm": 0.0009373608627356589,
697
  "learning_rate": 2.0302694721299375e-06,
698
  "loss": 0.0001,
699
  "step": 8700
700
  },
701
  {
702
  "epoch": 9.073532668881507,
703
- "grad_norm": 0.000793347368016839,
704
  "learning_rate": 1.415036298757229e-06,
705
  "loss": 0.0001,
706
  "step": 8800
707
  },
708
  {
709
  "epoch": 9.084606866002215,
710
- "grad_norm": 0.0008423990220762789,
711
  "learning_rate": 7.998031253845208e-07,
712
  "loss": 0.0001,
713
  "step": 8900
714
  },
715
  {
716
  "epoch": 9.095681063122923,
717
- "grad_norm": 0.000606866495218128,
718
  "learning_rate": 1.8456995201181249e-07,
719
  "loss": 0.0001,
720
  "step": 9000
721
  },
722
  {
723
  "epoch": 9.099003322259136,
724
- "eval_accuracy": 0.9987546699875467,
725
- "eval_loss": 0.014405355788767338,
726
- "eval_runtime": 197.2284,
727
- "eval_samples_per_second": 4.071,
728
- "eval_steps_per_second": 0.512,
729
  "step": 9030
730
  },
731
  {
732
  "epoch": 9.099003322259136,
733
  "step": 9030,
734
  "total_flos": 6.328460909097596e+19,
735
- "train_loss": 0.18771534425460198,
736
- "train_runtime": 26926.0352,
737
- "train_samples_per_second": 2.683,
738
- "train_steps_per_second": 0.335
739
  },
740
  {
741
  "epoch": 9.099003322259136,
742
- "eval_accuracy": 0.786833855799373,
743
- "eval_loss": 0.7324321866035461,
744
- "eval_runtime": 466.9382,
745
- "eval_samples_per_second": 2.733,
746
- "eval_steps_per_second": 0.343,
747
  "step": 9030
748
  }
749
  ],
 
1
  {
2
+ "best_metric": 0.9975093399750934,
3
+ "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/10 fold timesformer/Timesformer_default_fold_10_10_epoch_noAug_batch8/checkpoint-5424",
4
  "epoch": 9.099003322259136,
5
  "eval_steps": 500,
6
  "global_step": 9030,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.01107419712070875,
13
+ "grad_norm": 12.339608192443848,
14
  "learning_rate": 5.537098560354375e-06,
15
+ "loss": 4.1774,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.0221483942414175,
20
+ "grad_norm": 14.543400764465332,
21
  "learning_rate": 1.107419712070875e-05,
22
+ "loss": 3.8205,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.03322259136212625,
27
+ "grad_norm": 15.178791046142578,
28
  "learning_rate": 1.6611295681063124e-05,
29
+ "loss": 2.9812,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.044296788482835,
34
+ "grad_norm": 12.90939712524414,
35
  "learning_rate": 2.21483942414175e-05,
36
+ "loss": 2.072,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.05537098560354374,
41
+ "grad_norm": 12.425464630126953,
42
  "learning_rate": 2.7685492801771873e-05,
43
+ "loss": 1.265,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.0664451827242525,
48
+ "grad_norm": 7.096066474914551,
49
  "learning_rate": 3.322259136212625e-05,
50
+ "loss": 0.8079,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.07751937984496124,
55
+ "grad_norm": 9.374312400817871,
56
  "learning_rate": 3.875968992248062e-05,
57
+ "loss": 0.4583,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.08859357696567,
62
+ "grad_norm": 2.866607904434204,
63
  "learning_rate": 4.4296788482835e-05,
64
+ "loss": 0.2863,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.09966777408637874,
69
+ "grad_norm": 9.150209426879883,
70
  "learning_rate": 4.983388704318937e-05,
71
+ "loss": 0.2384,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.10011074197120709,
76
+ "eval_accuracy": 0.9638854296388543,
77
+ "eval_loss": 0.20189498364925385,
78
+ "eval_runtime": 306.3851,
79
+ "eval_samples_per_second": 2.621,
80
+ "eval_steps_per_second": 0.33,
81
  "step": 904
82
  },
83
  {
84
  "epoch": 1.0106312292358803,
85
+ "grad_norm": 0.4338759779930115,
86
  "learning_rate": 4.940322382182848e-05,
87
+ "loss": 0.1309,
88
  "step": 1000
89
  },
90
  {
91
  "epoch": 1.0217054263565892,
92
+ "grad_norm": 0.7621960639953613,
93
  "learning_rate": 4.878799064845577e-05,
94
+ "loss": 0.1036,
95
  "step": 1100
96
  },
97
  {
98
  "epoch": 1.032779623477298,
99
+ "grad_norm": 0.665744960308075,
100
  "learning_rate": 4.8172757475083056e-05,
101
+ "loss": 0.075,
102
  "step": 1200
103
  },
104
  {
105
  "epoch": 1.0438538205980066,
106
+ "grad_norm": 11.760805130004883,
107
  "learning_rate": 4.755752430171035e-05,
108
+ "loss": 0.0808,
109
  "step": 1300
110
  },
111
  {
112
  "epoch": 1.0549280177187155,
113
+ "grad_norm": 2.482733726501465,
114
  "learning_rate": 4.694229112833764e-05,
115
+ "loss": 0.0308,
116
  "step": 1400
117
  },
118
  {
119
  "epoch": 1.0660022148394241,
120
+ "grad_norm": 0.05557706952095032,
121
  "learning_rate": 4.6327057954964936e-05,
122
+ "loss": 0.0475,
123
  "step": 1500
124
  },
125
  {
126
  "epoch": 1.0770764119601328,
127
+ "grad_norm": 0.05685936659574509,
128
  "learning_rate": 4.571182478159223e-05,
129
+ "loss": 0.0211,
130
  "step": 1600
131
  },
132
  {
133
  "epoch": 1.0881506090808417,
134
+ "grad_norm": 0.07164409011602402,
135
  "learning_rate": 4.5096591608219516e-05,
136
+ "loss": 0.0367,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 1.0992248062015504,
141
+ "grad_norm": 0.07703804969787598,
142
  "learning_rate": 4.448135843484681e-05,
143
+ "loss": 0.0207,
144
  "step": 1800
145
  },
146
  {
147
  "epoch": 1.100110741971207,
148
+ "eval_accuracy": 0.9900373599003736,
149
+ "eval_loss": 0.03251923993229866,
150
+ "eval_runtime": 319.7386,
151
+ "eval_samples_per_second": 2.511,
152
+ "eval_steps_per_second": 0.316,
153
  "step": 1808
154
  },
155
  {
156
  "epoch": 2.010188261351052,
157
+ "grad_norm": 0.049545079469680786,
158
  "learning_rate": 4.38661252614741e-05,
159
+ "loss": 0.0066,
160
  "step": 1900
161
  },
162
  {
163
  "epoch": 2.0212624584717607,
164
+ "grad_norm": 0.023045338690280914,
165
  "learning_rate": 4.325089208810139e-05,
166
+ "loss": 0.0035,
167
  "step": 2000
168
  },
169
  {
170
  "epoch": 2.0323366555924696,
171
+ "grad_norm": 0.016869375482201576,
172
  "learning_rate": 4.263565891472868e-05,
173
+ "loss": 0.0162,
174
  "step": 2100
175
  },
176
  {
177
  "epoch": 2.0434108527131785,
178
+ "grad_norm": 0.029845820739865303,
179
  "learning_rate": 4.2020425741355975e-05,
180
+ "loss": 0.0185,
181
  "step": 2200
182
  },
183
  {
184
  "epoch": 2.054485049833887,
185
+ "grad_norm": 0.009218129329383373,
186
  "learning_rate": 4.140519256798327e-05,
187
+ "loss": 0.0109,
188
  "step": 2300
189
  },
190
  {
191
  "epoch": 2.065559246954596,
192
+ "grad_norm": 0.07040040194988251,
193
  "learning_rate": 4.078995939461056e-05,
194
+ "loss": 0.0023,
195
  "step": 2400
196
  },
197
  {
198
  "epoch": 2.0766334440753047,
199
+ "grad_norm": 0.12176311016082764,
200
  "learning_rate": 4.0174726221237855e-05,
201
+ "loss": 0.0258,
202
  "step": 2500
203
  },
204
  {
205
  "epoch": 2.087707641196013,
206
+ "grad_norm": 0.01268716063350439,
207
  "learning_rate": 3.955949304786514e-05,
208
+ "loss": 0.0098,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 2.098781838316722,
213
+ "grad_norm": 0.03558783233165741,
214
  "learning_rate": 3.8944259874492434e-05,
215
+ "loss": 0.0116,
216
  "step": 2700
217
  },
218
  {
219
  "epoch": 2.100110741971207,
220
  "eval_accuracy": 0.9962640099626401,
221
+ "eval_loss": 0.02290569432079792,
222
+ "eval_runtime": 324.5071,
223
+ "eval_samples_per_second": 2.475,
224
+ "eval_steps_per_second": 0.311,
225
  "step": 2712
226
  },
227
  {
228
  "epoch": 3.009745293466224,
229
+ "grad_norm": 0.021466247737407684,
230
  "learning_rate": 3.832902670111973e-05,
231
+ "loss": 0.0169,
232
  "step": 2800
233
  },
234
  {
235
  "epoch": 3.0208194905869323,
236
+ "grad_norm": 0.014765599742531776,
237
  "learning_rate": 3.7713793527747014e-05,
238
+ "loss": 0.0008,
239
  "step": 2900
240
  },
241
  {
242
  "epoch": 3.0318936877076412,
243
+ "grad_norm": 0.012668099254369736,
244
  "learning_rate": 3.7098560354374314e-05,
245
+ "loss": 0.0021,
246
  "step": 3000
247
  },
248
  {
249
  "epoch": 3.04296788482835,
250
+ "grad_norm": 0.007325606886297464,
251
  "learning_rate": 3.64833271810016e-05,
252
+ "loss": 0.0043,
253
  "step": 3100
254
  },
255
  {
256
  "epoch": 3.0540420819490586,
257
+ "grad_norm": 0.010318818502128124,
258
  "learning_rate": 3.5868094007628894e-05,
259
+ "loss": 0.0119,
260
  "step": 3200
261
  },
262
  {
263
  "epoch": 3.0651162790697675,
264
+ "grad_norm": 0.00951201282441616,
265
  "learning_rate": 3.525286083425619e-05,
266
+ "loss": 0.0006,
267
  "step": 3300
268
  },
269
  {
270
  "epoch": 3.0761904761904764,
271
+ "grad_norm": 0.0036382139660418034,
272
  "learning_rate": 3.463762766088347e-05,
273
+ "loss": 0.0006,
274
  "step": 3400
275
  },
276
  {
277
  "epoch": 3.087264673311185,
278
+ "grad_norm": 0.014880606904625893,
279
  "learning_rate": 3.4022394487510767e-05,
280
+ "loss": 0.0112,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 3.0983388704318937,
285
+ "grad_norm": 0.02532036043703556,
286
  "learning_rate": 3.340716131413806e-05,
287
+ "loss": 0.0017,
288
  "step": 3600
289
  },
290
  {
291
  "epoch": 3.100110741971207,
292
+ "eval_accuracy": 0.9950186799501868,
293
+ "eval_loss": 0.022232333198189735,
294
+ "eval_runtime": 321.5404,
295
+ "eval_samples_per_second": 2.497,
296
+ "eval_steps_per_second": 0.314,
297
  "step": 3616
298
  },
299
  {
300
  "epoch": 4.0093023255813955,
301
+ "grad_norm": 0.006754144560545683,
302
  "learning_rate": 3.2791928140765346e-05,
303
  "loss": 0.0005,
304
  "step": 3700
305
  },
306
  {
307
  "epoch": 4.020376522702104,
308
+ "grad_norm": 0.005147715099155903,
309
  "learning_rate": 3.2176694967392646e-05,
310
+ "loss": 0.0149,
311
  "step": 3800
312
  },
313
  {
314
  "epoch": 4.0314507198228124,
315
+ "grad_norm": 0.007229967508465052,
316
  "learning_rate": 3.156146179401994e-05,
317
  "loss": 0.0003,
318
  "step": 3900
319
  },
320
  {
321
  "epoch": 4.042524916943521,
322
+ "grad_norm": 0.014596535824239254,
323
  "learning_rate": 3.0946228620647226e-05,
324
+ "loss": 0.0004,
325
  "step": 4000
326
  },
327
  {
328
  "epoch": 4.05359911406423,
329
+ "grad_norm": 0.0032493751496076584,
330
  "learning_rate": 3.033099544727452e-05,
331
+ "loss": 0.0003,
332
  "step": 4100
333
  },
334
  {
335
  "epoch": 4.064673311184939,
336
+ "grad_norm": 0.004764176905155182,
337
  "learning_rate": 2.971576227390181e-05,
338
+ "loss": 0.0002,
339
  "step": 4200
340
  },
341
  {
342
  "epoch": 4.075747508305648,
343
+ "grad_norm": 0.005187960807234049,
344
  "learning_rate": 2.91005291005291e-05,
345
+ "loss": 0.0002,
346
  "step": 4300
347
  },
348
  {
349
  "epoch": 4.086821705426357,
350
+ "grad_norm": 0.008601618930697441,
351
  "learning_rate": 2.8485295927156392e-05,
352
+ "loss": 0.0004,
353
  "step": 4400
354
  },
355
  {
356
  "epoch": 4.097895902547065,
357
+ "grad_norm": 0.5019575357437134,
358
  "learning_rate": 2.787006275378369e-05,
359
+ "loss": 0.0156,
360
  "step": 4500
361
  },
362
  {
363
  "epoch": 4.100110741971207,
364
+ "eval_accuracy": 0.9900373599003736,
365
+ "eval_loss": 0.04016907513141632,
366
+ "eval_runtime": 316.7446,
367
+ "eval_samples_per_second": 2.535,
368
+ "eval_steps_per_second": 0.319,
369
  "step": 4520
370
  },
371
  {
372
  "epoch": 5.008859357696567,
373
+ "grad_norm": 0.09171910583972931,
374
  "learning_rate": 2.725482958041098e-05,
375
+ "loss": 0.0379,
376
  "step": 4600
377
  },
378
  {
379
  "epoch": 5.019933554817276,
380
+ "grad_norm": 0.012514377012848854,
381
  "learning_rate": 2.6639596407038268e-05,
382
+ "loss": 0.0049,
383
  "step": 4700
384
  },
385
  {
386
  "epoch": 5.0310077519379846,
387
+ "grad_norm": 0.009760179556906223,
388
  "learning_rate": 2.602436323366556e-05,
389
+ "loss": 0.0008,
390
  "step": 4800
391
  },
392
  {
393
  "epoch": 5.0420819490586934,
394
+ "grad_norm": 0.00494693499058485,
395
  "learning_rate": 2.540913006029285e-05,
396
+ "loss": 0.0094,
397
  "step": 4900
398
  },
399
  {
400
  "epoch": 5.053156146179402,
401
+ "grad_norm": 0.003839722601696849,
402
  "learning_rate": 2.479389688692014e-05,
403
+ "loss": 0.0003,
404
  "step": 5000
405
  },
406
  {
407
  "epoch": 5.06423034330011,
408
+ "grad_norm": 0.004592357203364372,
409
  "learning_rate": 2.4178663713547438e-05,
410
+ "loss": 0.0002,
411
  "step": 5100
412
  },
413
  {
414
  "epoch": 5.075304540420819,
415
+ "grad_norm": 0.00357171637006104,
416
  "learning_rate": 2.3563430540174727e-05,
417
+ "loss": 0.0003,
418
  "step": 5200
419
  },
420
  {
421
  "epoch": 5.086378737541528,
422
+ "grad_norm": 0.004462802782654762,
423
  "learning_rate": 2.2948197366802017e-05,
424
  "loss": 0.0002,
425
  "step": 5300
426
  },
427
  {
428
  "epoch": 5.097452934662237,
429
+ "grad_norm": 0.002604804467409849,
430
  "learning_rate": 2.233296419342931e-05,
431
  "loss": 0.0002,
432
  "step": 5400
433
  },
434
  {
435
  "epoch": 5.100110741971207,
436
+ "eval_accuracy": 0.9975093399750934,
437
+ "eval_loss": 0.008347271010279655,
438
+ "eval_runtime": 311.1635,
439
+ "eval_samples_per_second": 2.581,
440
+ "eval_steps_per_second": 0.325,
441
  "step": 5424
442
  },
443
  {
444
  "epoch": 6.008416389811739,
445
+ "grad_norm": 0.0028000217862427235,
446
  "learning_rate": 2.1717731020056604e-05,
447
  "loss": 0.0002,
448
  "step": 5500
449
  },
450
  {
451
  "epoch": 6.019490586932448,
452
+ "grad_norm": 0.0019141006050631404,
453
  "learning_rate": 2.1102497846683894e-05,
454
  "loss": 0.0001,
455
  "step": 5600
456
  },
457
  {
458
  "epoch": 6.030564784053156,
459
+ "grad_norm": 0.0038908650167286396,
460
  "learning_rate": 2.0487264673311183e-05,
461
  "loss": 0.0001,
462
  "step": 5700
463
  },
464
  {
465
  "epoch": 6.041638981173865,
466
+ "grad_norm": 0.0013375241542235017,
467
  "learning_rate": 1.987203149993848e-05,
468
  "loss": 0.0001,
469
  "step": 5800
470
  },
471
  {
472
  "epoch": 6.052713178294574,
473
+ "grad_norm": 0.0016891614068299532,
474
  "learning_rate": 1.925679832656577e-05,
475
  "loss": 0.0001,
476
  "step": 5900
477
  },
478
  {
479
  "epoch": 6.0637873754152825,
480
+ "grad_norm": 0.0015141346957534552,
481
  "learning_rate": 1.864156515319306e-05,
482
  "loss": 0.0001,
483
  "step": 6000
484
  },
485
  {
486
  "epoch": 6.074861572535991,
487
+ "grad_norm": 0.0012503410689532757,
488
  "learning_rate": 1.8026331979820353e-05,
489
  "loss": 0.0001,
490
  "step": 6100
491
  },
492
  {
493
  "epoch": 6.0859357696567,
494
+ "grad_norm": 0.0019348779460415244,
495
  "learning_rate": 1.7411098806447646e-05,
496
  "loss": 0.0001,
497
  "step": 6200
498
  },
499
  {
500
  "epoch": 6.097009966777408,
501
+ "grad_norm": 0.001840105396695435,
502
  "learning_rate": 1.6795865633074936e-05,
503
  "loss": 0.0001,
504
  "step": 6300
505
  },
506
  {
507
  "epoch": 6.100110741971207,
508
+ "eval_accuracy": 0.9975093399750934,
509
+ "eval_loss": 0.008798662573099136,
510
+ "eval_runtime": 315.9616,
511
+ "eval_samples_per_second": 2.541,
512
+ "eval_steps_per_second": 0.32,
513
  "step": 6328
514
  },
515
  {
516
  "epoch": 7.00797342192691,
517
+ "grad_norm": 0.0018666000105440617,
518
  "learning_rate": 1.6180632459702226e-05,
519
  "loss": 0.0001,
520
  "step": 6400
521
  },
522
  {
523
  "epoch": 7.019047619047619,
524
+ "grad_norm": 0.0011329470435157418,
525
  "learning_rate": 1.556539928632952e-05,
526
  "loss": 0.0001,
527
  "step": 6500
528
  },
529
  {
530
  "epoch": 7.030121816168328,
531
+ "grad_norm": 0.0014819415519014,
532
  "learning_rate": 1.4950166112956812e-05,
533
  "loss": 0.0001,
534
  "step": 6600
535
  },
536
  {
537
  "epoch": 7.041196013289037,
538
+ "grad_norm": 0.0013377605937421322,
539
  "learning_rate": 1.4334932939584104e-05,
540
  "loss": 0.0001,
541
  "step": 6700
542
  },
543
  {
544
  "epoch": 7.052270210409746,
545
+ "grad_norm": 0.0014949695905670524,
546
  "learning_rate": 1.3719699766211393e-05,
547
  "loss": 0.0001,
548
  "step": 6800
549
  },
550
  {
551
  "epoch": 7.063344407530454,
552
+ "grad_norm": 0.0020517068915069103,
553
  "learning_rate": 1.3104466592838688e-05,
554
  "loss": 0.0001,
555
  "step": 6900
556
  },
557
  {
558
  "epoch": 7.074418604651163,
559
+ "grad_norm": 0.0016913407016545534,
560
  "learning_rate": 1.2489233419465978e-05,
561
  "loss": 0.0001,
562
  "step": 7000
563
  },
564
  {
565
  "epoch": 7.0854928017718715,
566
+ "grad_norm": 0.0019159069051966071,
567
  "learning_rate": 1.187400024609327e-05,
568
  "loss": 0.0001,
569
  "step": 7100
570
  },
571
  {
572
  "epoch": 7.09656699889258,
573
+ "grad_norm": 0.0017233892576768994,
574
  "learning_rate": 1.1258767072720563e-05,
575
  "loss": 0.0001,
576
  "step": 7200
577
  },
578
  {
579
  "epoch": 7.100110741971207,
580
+ "eval_accuracy": 0.9975093399750934,
581
+ "eval_loss": 0.009401123970746994,
582
+ "eval_runtime": 205.4199,
583
+ "eval_samples_per_second": 3.909,
584
+ "eval_steps_per_second": 0.492,
585
  "step": 7232
586
  },
587
  {
588
  "epoch": 8.007530454042081,
589
+ "grad_norm": 0.0016070300480350852,
590
  "learning_rate": 1.0643533899347853e-05,
591
  "loss": 0.0001,
592
  "step": 7300
593
  },
594
  {
595
  "epoch": 8.018604651162791,
596
+ "grad_norm": 0.0009037270210683346,
597
  "learning_rate": 1.0028300725975146e-05,
598
  "loss": 0.0001,
599
  "step": 7400
600
  },
601
  {
602
  "epoch": 8.029678848283499,
603
+ "grad_norm": 0.0009796767262741923,
604
  "learning_rate": 9.413067552602436e-06,
605
  "loss": 0.0001,
606
  "step": 7500
607
  },
608
  {
609
  "epoch": 8.040753045404209,
610
+ "grad_norm": 0.009153931401669979,
611
  "learning_rate": 8.797834379229729e-06,
612
  "loss": 0.0001,
613
  "step": 7600
614
  },
615
  {
616
  "epoch": 8.051827242524917,
617
+ "grad_norm": 0.0009286152780987322,
618
  "learning_rate": 8.18260120585702e-06,
619
  "loss": 0.0001,
620
  "step": 7700
621
  },
622
  {
623
  "epoch": 8.062901439645625,
624
+ "grad_norm": 0.0012453129747882485,
625
  "learning_rate": 7.567368032484312e-06,
626
  "loss": 0.0001,
627
  "step": 7800
628
  },
629
  {
630
  "epoch": 8.073975636766335,
631
+ "grad_norm": 0.000763101561460644,
632
  "learning_rate": 6.952134859111603e-06,
633
  "loss": 0.0001,
634
  "step": 7900
635
  },
636
  {
637
  "epoch": 8.085049833887043,
638
+ "grad_norm": 0.0010468490654602647,
639
  "learning_rate": 6.336901685738895e-06,
640
  "loss": 0.0001,
641
  "step": 8000
642
  },
643
  {
644
  "epoch": 8.096124031007752,
645
+ "grad_norm": 0.0007441536872647703,
646
  "learning_rate": 5.7216685123661875e-06,
647
  "loss": 0.0001,
648
  "step": 8100
649
  },
650
  {
651
  "epoch": 8.100110741971207,
652
+ "eval_accuracy": 0.9975093399750934,
653
+ "eval_loss": 0.009709909558296204,
654
+ "eval_runtime": 213.0126,
655
+ "eval_samples_per_second": 3.77,
656
+ "eval_steps_per_second": 0.474,
657
  "step": 8136
658
  },
659
  {
660
  "epoch": 9.007087486157253,
661
+ "grad_norm": 0.0011277415324002504,
662
  "learning_rate": 5.106435338993479e-06,
663
  "loss": 0.0001,
664
  "step": 8200
665
  },
666
  {
667
  "epoch": 9.018161683277963,
668
+ "grad_norm": 0.0012697929050773382,
669
  "learning_rate": 4.4912021656207705e-06,
670
  "loss": 0.0001,
671
  "step": 8300
672
  },
673
  {
674
  "epoch": 9.029235880398671,
675
+ "grad_norm": 0.0028499774634838104,
676
  "learning_rate": 3.875968992248062e-06,
677
  "loss": 0.0001,
678
  "step": 8400
679
  },
680
  {
681
  "epoch": 9.04031007751938,
682
+ "grad_norm": 0.0017348791006952524,
683
  "learning_rate": 3.260735818875354e-06,
684
  "loss": 0.0001,
685
  "step": 8500
686
  },
687
  {
688
  "epoch": 9.051384274640089,
689
+ "grad_norm": 0.0006883046007715166,
690
  "learning_rate": 2.6455026455026455e-06,
691
  "loss": 0.0001,
692
  "step": 8600
693
  },
694
  {
695
  "epoch": 9.062458471760797,
696
+ "grad_norm": 0.00142951391171664,
697
  "learning_rate": 2.0302694721299375e-06,
698
  "loss": 0.0001,
699
  "step": 8700
700
  },
701
  {
702
  "epoch": 9.073532668881507,
703
+ "grad_norm": 0.000702825200278312,
704
  "learning_rate": 1.415036298757229e-06,
705
  "loss": 0.0001,
706
  "step": 8800
707
  },
708
  {
709
  "epoch": 9.084606866002215,
710
+ "grad_norm": 0.0009480651351623237,
711
  "learning_rate": 7.998031253845208e-07,
712
  "loss": 0.0001,
713
  "step": 8900
714
  },
715
  {
716
  "epoch": 9.095681063122923,
717
+ "grad_norm": 0.0010264083975926042,
718
  "learning_rate": 1.8456995201181249e-07,
719
  "loss": 0.0001,
720
  "step": 9000
721
  },
722
  {
723
  "epoch": 9.099003322259136,
724
+ "eval_accuracy": 0.9975093399750934,
725
+ "eval_loss": 0.009837848134338856,
726
+ "eval_runtime": 215.3394,
727
+ "eval_samples_per_second": 3.729,
728
+ "eval_steps_per_second": 0.469,
729
  "step": 9030
730
  },
731
  {
732
  "epoch": 9.099003322259136,
733
  "step": 9030,
734
  "total_flos": 6.328460909097596e+19,
735
+ "train_loss": 0.18714786747627524,
736
+ "train_runtime": 33340.824,
737
+ "train_samples_per_second": 2.167,
738
+ "train_steps_per_second": 0.271
739
  },
740
  {
741
  "epoch": 9.099003322259136,
742
+ "eval_accuracy": 0.7664576802507836,
743
+ "eval_loss": 0.8547028303146362,
744
+ "eval_runtime": 304.3291,
745
+ "eval_samples_per_second": 4.193,
746
+ "eval_steps_per_second": 0.526,
747
  "step": 9030
748
  }
749
  ],