Shawon16 commited on
Commit
881a317
·
verified ·
1 Parent(s): 29544ff

End of training

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [facebook/timesformer-base-finetuned-k400](https://huggingface.co/facebook/timesformer-base-finetuned-k400) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.8547
22
- - Accuracy: 0.7665
23
 
24
  ## Model description
25
 
@@ -51,16 +51,16 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:------:|:----:|:---------------:|:--------:|
54
- | 0.2384 | 0.1001 | 904 | 0.2019 | 0.9639 |
55
- | 0.0207 | 1.1001 | 1808 | 0.0325 | 0.9900 |
56
- | 0.0116 | 2.1001 | 2712 | 0.0229 | 0.9963 |
57
- | 0.0017 | 3.1001 | 3616 | 0.0222 | 0.9950 |
58
- | 0.0156 | 4.1001 | 4520 | 0.0402 | 0.9900 |
59
- | 0.0002 | 5.1001 | 5424 | 0.0083 | 0.9975 |
60
- | 0.0001 | 6.1001 | 6328 | 0.0088 | 0.9975 |
61
- | 0.0001 | 7.1001 | 7232 | 0.0094 | 0.9975 |
62
- | 0.0001 | 8.1001 | 8136 | 0.0097 | 0.9975 |
63
- | 0.0001 | 9.0990 | 9030 | 0.0098 | 0.9975 |
64
 
65
 
66
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [facebook/timesformer-base-finetuned-k400](https://huggingface.co/facebook/timesformer-base-finetuned-k400) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.7984
22
+ - Accuracy: 0.7774
23
 
24
  ## Model description
25
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:------:|:----:|:---------------:|:--------:|
54
+ | 0.2704 | 0.1001 | 904 | 0.2209 | 0.9552 |
55
+ | 0.0155 | 1.1001 | 1808 | 0.0417 | 0.9913 |
56
+ | 0.0021 | 2.1001 | 2712 | 0.0268 | 0.9938 |
57
+ | 0.0004 | 3.1001 | 3616 | 0.0116 | 0.9975 |
58
+ | 0.0125 | 4.1001 | 4520 | 0.0085 | 0.9963 |
59
+ | 0.0043 | 5.1001 | 5424 | 0.0280 | 0.9913 |
60
+ | 0.0002 | 6.1001 | 6328 | 0.0100 | 0.9975 |
61
+ | 0.0001 | 7.1001 | 7232 | 0.0030 | 0.9988 |
62
+ | 0.0001 | 8.1001 | 8136 | 0.0023 | 1.0 |
63
+ | 0.0001 | 9.0990 | 9030 | 0.0023 | 1.0 |
64
 
65
 
66
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.099003322259136,
3
- "eval_accuracy": 0.7664576802507836,
4
- "eval_loss": 0.8547028303146362,
5
- "eval_runtime": 304.3291,
6
- "eval_samples_per_second": 4.193,
7
- "eval_steps_per_second": 0.526
8
  }
 
1
  {
2
  "epoch": 9.099003322259136,
3
+ "eval_accuracy": 0.7774294670846394,
4
+ "eval_loss": 0.7984064817428589,
5
+ "eval_runtime": 302.1425,
6
+ "eval_samples_per_second": 4.223,
7
+ "eval_steps_per_second": 0.53
8
  }
confusion_matrix_test_fold_4.png ADDED
confusion_matrix_valid_fold_4.png ADDED
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0b19d4679f3e973d69e0d98a227b309b37c1cfc9cc13c05de9e0161d3bb10f3
3
  size 485250680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67d7a44bf7d4a571e9fd58edc04d4d487ed0e0483490e331ed8db5f61972b7b0
3
  size 485250680
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.099003322259136,
3
- "eval_accuracy": 0.7664576802507836,
4
- "eval_loss": 0.8547028303146362,
5
- "eval_runtime": 304.3291,
6
- "eval_samples_per_second": 4.193,
7
- "eval_steps_per_second": 0.526
8
  }
 
1
  {
2
  "epoch": 9.099003322259136,
3
+ "eval_accuracy": 0.7774294670846394,
4
+ "eval_loss": 0.7984064817428589,
5
+ "eval_runtime": 302.1425,
6
+ "eval_samples_per_second": 4.223,
7
+ "eval_steps_per_second": 0.53
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9975093399750934,
3
- "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/10 fold timesformer/Timesformer_default_fold_10_10_epoch_noAug_batch8/checkpoint-5424",
4
  "epoch": 9.099003322259136,
5
  "eval_steps": 500,
6
  "global_step": 9030,
@@ -10,740 +10,740 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.01107419712070875,
13
- "grad_norm": 12.339608192443848,
14
  "learning_rate": 5.537098560354375e-06,
15
- "loss": 4.1774,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.0221483942414175,
20
- "grad_norm": 14.543400764465332,
21
  "learning_rate": 1.107419712070875e-05,
22
- "loss": 3.8205,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.03322259136212625,
27
- "grad_norm": 15.178791046142578,
28
  "learning_rate": 1.6611295681063124e-05,
29
- "loss": 2.9812,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.044296788482835,
34
- "grad_norm": 12.90939712524414,
35
  "learning_rate": 2.21483942414175e-05,
36
- "loss": 2.072,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.05537098560354374,
41
- "grad_norm": 12.425464630126953,
42
  "learning_rate": 2.7685492801771873e-05,
43
- "loss": 1.265,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.0664451827242525,
48
- "grad_norm": 7.096066474914551,
49
  "learning_rate": 3.322259136212625e-05,
50
- "loss": 0.8079,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.07751937984496124,
55
- "grad_norm": 9.374312400817871,
56
  "learning_rate": 3.875968992248062e-05,
57
- "loss": 0.4583,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.08859357696567,
62
- "grad_norm": 2.866607904434204,
63
  "learning_rate": 4.4296788482835e-05,
64
- "loss": 0.2863,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.09966777408637874,
69
- "grad_norm": 9.150209426879883,
70
  "learning_rate": 4.983388704318937e-05,
71
- "loss": 0.2384,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.10011074197120709,
76
- "eval_accuracy": 0.9638854296388543,
77
- "eval_loss": 0.20189498364925385,
78
- "eval_runtime": 306.3851,
79
- "eval_samples_per_second": 2.621,
80
- "eval_steps_per_second": 0.33,
81
  "step": 904
82
  },
83
  {
84
  "epoch": 1.0106312292358803,
85
- "grad_norm": 0.4338759779930115,
86
  "learning_rate": 4.940322382182848e-05,
87
- "loss": 0.1309,
88
  "step": 1000
89
  },
90
  {
91
  "epoch": 1.0217054263565892,
92
- "grad_norm": 0.7621960639953613,
93
  "learning_rate": 4.878799064845577e-05,
94
- "loss": 0.1036,
95
  "step": 1100
96
  },
97
  {
98
  "epoch": 1.032779623477298,
99
- "grad_norm": 0.665744960308075,
100
  "learning_rate": 4.8172757475083056e-05,
101
- "loss": 0.075,
102
  "step": 1200
103
  },
104
  {
105
  "epoch": 1.0438538205980066,
106
- "grad_norm": 11.760805130004883,
107
  "learning_rate": 4.755752430171035e-05,
108
- "loss": 0.0808,
109
  "step": 1300
110
  },
111
  {
112
  "epoch": 1.0549280177187155,
113
- "grad_norm": 2.482733726501465,
114
  "learning_rate": 4.694229112833764e-05,
115
- "loss": 0.0308,
116
  "step": 1400
117
  },
118
  {
119
  "epoch": 1.0660022148394241,
120
- "grad_norm": 0.05557706952095032,
121
  "learning_rate": 4.6327057954964936e-05,
122
- "loss": 0.0475,
123
  "step": 1500
124
  },
125
  {
126
  "epoch": 1.0770764119601328,
127
- "grad_norm": 0.05685936659574509,
128
  "learning_rate": 4.571182478159223e-05,
129
- "loss": 0.0211,
130
  "step": 1600
131
  },
132
  {
133
  "epoch": 1.0881506090808417,
134
- "grad_norm": 0.07164409011602402,
135
  "learning_rate": 4.5096591608219516e-05,
136
- "loss": 0.0367,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 1.0992248062015504,
141
- "grad_norm": 0.07703804969787598,
142
  "learning_rate": 4.448135843484681e-05,
143
- "loss": 0.0207,
144
  "step": 1800
145
  },
146
  {
147
  "epoch": 1.100110741971207,
148
- "eval_accuracy": 0.9900373599003736,
149
- "eval_loss": 0.03251923993229866,
150
- "eval_runtime": 319.7386,
151
- "eval_samples_per_second": 2.511,
152
- "eval_steps_per_second": 0.316,
153
  "step": 1808
154
  },
155
  {
156
  "epoch": 2.010188261351052,
157
- "grad_norm": 0.049545079469680786,
158
  "learning_rate": 4.38661252614741e-05,
159
- "loss": 0.0066,
160
  "step": 1900
161
  },
162
  {
163
  "epoch": 2.0212624584717607,
164
- "grad_norm": 0.023045338690280914,
165
  "learning_rate": 4.325089208810139e-05,
166
- "loss": 0.0035,
167
  "step": 2000
168
  },
169
  {
170
  "epoch": 2.0323366555924696,
171
- "grad_norm": 0.016869375482201576,
172
  "learning_rate": 4.263565891472868e-05,
173
- "loss": 0.0162,
174
  "step": 2100
175
  },
176
  {
177
  "epoch": 2.0434108527131785,
178
- "grad_norm": 0.029845820739865303,
179
  "learning_rate": 4.2020425741355975e-05,
180
- "loss": 0.0185,
181
  "step": 2200
182
  },
183
  {
184
  "epoch": 2.054485049833887,
185
- "grad_norm": 0.009218129329383373,
186
  "learning_rate": 4.140519256798327e-05,
187
- "loss": 0.0109,
188
  "step": 2300
189
  },
190
  {
191
  "epoch": 2.065559246954596,
192
- "grad_norm": 0.07040040194988251,
193
  "learning_rate": 4.078995939461056e-05,
194
- "loss": 0.0023,
195
  "step": 2400
196
  },
197
  {
198
  "epoch": 2.0766334440753047,
199
- "grad_norm": 0.12176311016082764,
200
  "learning_rate": 4.0174726221237855e-05,
201
- "loss": 0.0258,
202
  "step": 2500
203
  },
204
  {
205
  "epoch": 2.087707641196013,
206
- "grad_norm": 0.01268716063350439,
207
  "learning_rate": 3.955949304786514e-05,
208
- "loss": 0.0098,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 2.098781838316722,
213
- "grad_norm": 0.03558783233165741,
214
  "learning_rate": 3.8944259874492434e-05,
215
- "loss": 0.0116,
216
  "step": 2700
217
  },
218
  {
219
  "epoch": 2.100110741971207,
220
- "eval_accuracy": 0.9962640099626401,
221
- "eval_loss": 0.02290569432079792,
222
- "eval_runtime": 324.5071,
223
- "eval_samples_per_second": 2.475,
224
- "eval_steps_per_second": 0.311,
225
  "step": 2712
226
  },
227
  {
228
  "epoch": 3.009745293466224,
229
- "grad_norm": 0.021466247737407684,
230
  "learning_rate": 3.832902670111973e-05,
231
- "loss": 0.0169,
232
  "step": 2800
233
  },
234
  {
235
  "epoch": 3.0208194905869323,
236
- "grad_norm": 0.014765599742531776,
237
  "learning_rate": 3.7713793527747014e-05,
238
- "loss": 0.0008,
239
  "step": 2900
240
  },
241
  {
242
  "epoch": 3.0318936877076412,
243
- "grad_norm": 0.012668099254369736,
244
  "learning_rate": 3.7098560354374314e-05,
245
- "loss": 0.0021,
246
  "step": 3000
247
  },
248
  {
249
  "epoch": 3.04296788482835,
250
- "grad_norm": 0.007325606886297464,
251
  "learning_rate": 3.64833271810016e-05,
252
- "loss": 0.0043,
253
  "step": 3100
254
  },
255
  {
256
  "epoch": 3.0540420819490586,
257
- "grad_norm": 0.010318818502128124,
258
  "learning_rate": 3.5868094007628894e-05,
259
- "loss": 0.0119,
260
  "step": 3200
261
  },
262
  {
263
  "epoch": 3.0651162790697675,
264
- "grad_norm": 0.00951201282441616,
265
  "learning_rate": 3.525286083425619e-05,
266
- "loss": 0.0006,
267
  "step": 3300
268
  },
269
  {
270
  "epoch": 3.0761904761904764,
271
- "grad_norm": 0.0036382139660418034,
272
  "learning_rate": 3.463762766088347e-05,
273
- "loss": 0.0006,
274
  "step": 3400
275
  },
276
  {
277
  "epoch": 3.087264673311185,
278
- "grad_norm": 0.014880606904625893,
279
  "learning_rate": 3.4022394487510767e-05,
280
- "loss": 0.0112,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 3.0983388704318937,
285
- "grad_norm": 0.02532036043703556,
286
  "learning_rate": 3.340716131413806e-05,
287
- "loss": 0.0017,
288
  "step": 3600
289
  },
290
  {
291
  "epoch": 3.100110741971207,
292
- "eval_accuracy": 0.9950186799501868,
293
- "eval_loss": 0.022232333198189735,
294
- "eval_runtime": 321.5404,
295
- "eval_samples_per_second": 2.497,
296
- "eval_steps_per_second": 0.314,
297
  "step": 3616
298
  },
299
  {
300
  "epoch": 4.0093023255813955,
301
- "grad_norm": 0.006754144560545683,
302
  "learning_rate": 3.2791928140765346e-05,
303
- "loss": 0.0005,
304
  "step": 3700
305
  },
306
  {
307
  "epoch": 4.020376522702104,
308
- "grad_norm": 0.005147715099155903,
309
  "learning_rate": 3.2176694967392646e-05,
310
- "loss": 0.0149,
311
  "step": 3800
312
  },
313
  {
314
  "epoch": 4.0314507198228124,
315
- "grad_norm": 0.007229967508465052,
316
  "learning_rate": 3.156146179401994e-05,
317
  "loss": 0.0003,
318
  "step": 3900
319
  },
320
  {
321
  "epoch": 4.042524916943521,
322
- "grad_norm": 0.014596535824239254,
323
  "learning_rate": 3.0946228620647226e-05,
324
- "loss": 0.0004,
325
  "step": 4000
326
  },
327
  {
328
  "epoch": 4.05359911406423,
329
- "grad_norm": 0.0032493751496076584,
330
  "learning_rate": 3.033099544727452e-05,
331
  "loss": 0.0003,
332
  "step": 4100
333
  },
334
  {
335
  "epoch": 4.064673311184939,
336
- "grad_norm": 0.004764176905155182,
337
  "learning_rate": 2.971576227390181e-05,
338
  "loss": 0.0002,
339
  "step": 4200
340
  },
341
  {
342
  "epoch": 4.075747508305648,
343
- "grad_norm": 0.005187960807234049,
344
  "learning_rate": 2.91005291005291e-05,
345
  "loss": 0.0002,
346
  "step": 4300
347
  },
348
  {
349
  "epoch": 4.086821705426357,
350
- "grad_norm": 0.008601618930697441,
351
  "learning_rate": 2.8485295927156392e-05,
352
- "loss": 0.0004,
353
  "step": 4400
354
  },
355
  {
356
  "epoch": 4.097895902547065,
357
- "grad_norm": 0.5019575357437134,
358
  "learning_rate": 2.787006275378369e-05,
359
- "loss": 0.0156,
360
  "step": 4500
361
  },
362
  {
363
  "epoch": 4.100110741971207,
364
- "eval_accuracy": 0.9900373599003736,
365
- "eval_loss": 0.04016907513141632,
366
- "eval_runtime": 316.7446,
367
- "eval_samples_per_second": 2.535,
368
- "eval_steps_per_second": 0.319,
369
  "step": 4520
370
  },
371
  {
372
  "epoch": 5.008859357696567,
373
- "grad_norm": 0.09171910583972931,
374
  "learning_rate": 2.725482958041098e-05,
375
- "loss": 0.0379,
376
  "step": 4600
377
  },
378
  {
379
  "epoch": 5.019933554817276,
380
- "grad_norm": 0.012514377012848854,
381
  "learning_rate": 2.6639596407038268e-05,
382
- "loss": 0.0049,
383
  "step": 4700
384
  },
385
  {
386
  "epoch": 5.0310077519379846,
387
- "grad_norm": 0.009760179556906223,
388
  "learning_rate": 2.602436323366556e-05,
389
- "loss": 0.0008,
390
  "step": 4800
391
  },
392
  {
393
  "epoch": 5.0420819490586934,
394
- "grad_norm": 0.00494693499058485,
395
  "learning_rate": 2.540913006029285e-05,
396
- "loss": 0.0094,
397
  "step": 4900
398
  },
399
  {
400
  "epoch": 5.053156146179402,
401
- "grad_norm": 0.003839722601696849,
402
  "learning_rate": 2.479389688692014e-05,
403
- "loss": 0.0003,
404
  "step": 5000
405
  },
406
  {
407
  "epoch": 5.06423034330011,
408
- "grad_norm": 0.004592357203364372,
409
  "learning_rate": 2.4178663713547438e-05,
410
- "loss": 0.0002,
411
  "step": 5100
412
  },
413
  {
414
  "epoch": 5.075304540420819,
415
- "grad_norm": 0.00357171637006104,
416
  "learning_rate": 2.3563430540174727e-05,
417
- "loss": 0.0003,
418
  "step": 5200
419
  },
420
  {
421
  "epoch": 5.086378737541528,
422
- "grad_norm": 0.004462802782654762,
423
  "learning_rate": 2.2948197366802017e-05,
424
- "loss": 0.0002,
425
  "step": 5300
426
  },
427
  {
428
  "epoch": 5.097452934662237,
429
- "grad_norm": 0.002604804467409849,
430
  "learning_rate": 2.233296419342931e-05,
431
- "loss": 0.0002,
432
  "step": 5400
433
  },
434
  {
435
  "epoch": 5.100110741971207,
436
- "eval_accuracy": 0.9975093399750934,
437
- "eval_loss": 0.008347271010279655,
438
- "eval_runtime": 311.1635,
439
- "eval_samples_per_second": 2.581,
440
- "eval_steps_per_second": 0.325,
441
  "step": 5424
442
  },
443
  {
444
  "epoch": 6.008416389811739,
445
- "grad_norm": 0.0028000217862427235,
446
  "learning_rate": 2.1717731020056604e-05,
447
- "loss": 0.0002,
448
  "step": 5500
449
  },
450
  {
451
  "epoch": 6.019490586932448,
452
- "grad_norm": 0.0019141006050631404,
453
  "learning_rate": 2.1102497846683894e-05,
454
- "loss": 0.0001,
455
  "step": 5600
456
  },
457
  {
458
  "epoch": 6.030564784053156,
459
- "grad_norm": 0.0038908650167286396,
460
  "learning_rate": 2.0487264673311183e-05,
461
- "loss": 0.0001,
462
  "step": 5700
463
  },
464
  {
465
  "epoch": 6.041638981173865,
466
- "grad_norm": 0.0013375241542235017,
467
  "learning_rate": 1.987203149993848e-05,
468
- "loss": 0.0001,
469
  "step": 5800
470
  },
471
  {
472
  "epoch": 6.052713178294574,
473
- "grad_norm": 0.0016891614068299532,
474
  "learning_rate": 1.925679832656577e-05,
475
- "loss": 0.0001,
476
  "step": 5900
477
  },
478
  {
479
  "epoch": 6.0637873754152825,
480
- "grad_norm": 0.0015141346957534552,
481
  "learning_rate": 1.864156515319306e-05,
482
- "loss": 0.0001,
483
  "step": 6000
484
  },
485
  {
486
  "epoch": 6.074861572535991,
487
- "grad_norm": 0.0012503410689532757,
488
  "learning_rate": 1.8026331979820353e-05,
489
- "loss": 0.0001,
490
  "step": 6100
491
  },
492
  {
493
  "epoch": 6.0859357696567,
494
- "grad_norm": 0.0019348779460415244,
495
  "learning_rate": 1.7411098806447646e-05,
496
- "loss": 0.0001,
497
  "step": 6200
498
  },
499
  {
500
  "epoch": 6.097009966777408,
501
- "grad_norm": 0.001840105396695435,
502
  "learning_rate": 1.6795865633074936e-05,
503
- "loss": 0.0001,
504
  "step": 6300
505
  },
506
  {
507
  "epoch": 6.100110741971207,
508
  "eval_accuracy": 0.9975093399750934,
509
- "eval_loss": 0.008798662573099136,
510
- "eval_runtime": 315.9616,
511
- "eval_samples_per_second": 2.541,
512
- "eval_steps_per_second": 0.32,
513
  "step": 6328
514
  },
515
  {
516
  "epoch": 7.00797342192691,
517
- "grad_norm": 0.0018666000105440617,
518
  "learning_rate": 1.6180632459702226e-05,
519
- "loss": 0.0001,
520
  "step": 6400
521
  },
522
  {
523
  "epoch": 7.019047619047619,
524
- "grad_norm": 0.0011329470435157418,
525
  "learning_rate": 1.556539928632952e-05,
526
- "loss": 0.0001,
527
  "step": 6500
528
  },
529
  {
530
  "epoch": 7.030121816168328,
531
- "grad_norm": 0.0014819415519014,
532
  "learning_rate": 1.4950166112956812e-05,
533
- "loss": 0.0001,
534
  "step": 6600
535
  },
536
  {
537
  "epoch": 7.041196013289037,
538
- "grad_norm": 0.0013377605937421322,
539
  "learning_rate": 1.4334932939584104e-05,
540
- "loss": 0.0001,
541
  "step": 6700
542
  },
543
  {
544
  "epoch": 7.052270210409746,
545
- "grad_norm": 0.0014949695905670524,
546
  "learning_rate": 1.3719699766211393e-05,
547
- "loss": 0.0001,
548
  "step": 6800
549
  },
550
  {
551
  "epoch": 7.063344407530454,
552
- "grad_norm": 0.0020517068915069103,
553
  "learning_rate": 1.3104466592838688e-05,
554
- "loss": 0.0001,
555
  "step": 6900
556
  },
557
  {
558
  "epoch": 7.074418604651163,
559
- "grad_norm": 0.0016913407016545534,
560
  "learning_rate": 1.2489233419465978e-05,
561
- "loss": 0.0001,
562
  "step": 7000
563
  },
564
  {
565
  "epoch": 7.0854928017718715,
566
- "grad_norm": 0.0019159069051966071,
567
  "learning_rate": 1.187400024609327e-05,
568
- "loss": 0.0001,
569
  "step": 7100
570
  },
571
  {
572
  "epoch": 7.09656699889258,
573
- "grad_norm": 0.0017233892576768994,
574
  "learning_rate": 1.1258767072720563e-05,
575
  "loss": 0.0001,
576
  "step": 7200
577
  },
578
  {
579
  "epoch": 7.100110741971207,
580
- "eval_accuracy": 0.9975093399750934,
581
- "eval_loss": 0.009401123970746994,
582
- "eval_runtime": 205.4199,
583
- "eval_samples_per_second": 3.909,
584
- "eval_steps_per_second": 0.492,
585
  "step": 7232
586
  },
587
  {
588
  "epoch": 8.007530454042081,
589
- "grad_norm": 0.0016070300480350852,
590
  "learning_rate": 1.0643533899347853e-05,
591
  "loss": 0.0001,
592
  "step": 7300
593
  },
594
  {
595
  "epoch": 8.018604651162791,
596
- "grad_norm": 0.0009037270210683346,
597
  "learning_rate": 1.0028300725975146e-05,
598
  "loss": 0.0001,
599
  "step": 7400
600
  },
601
  {
602
  "epoch": 8.029678848283499,
603
- "grad_norm": 0.0009796767262741923,
604
  "learning_rate": 9.413067552602436e-06,
605
  "loss": 0.0001,
606
  "step": 7500
607
  },
608
  {
609
  "epoch": 8.040753045404209,
610
- "grad_norm": 0.009153931401669979,
611
  "learning_rate": 8.797834379229729e-06,
612
  "loss": 0.0001,
613
  "step": 7600
614
  },
615
  {
616
  "epoch": 8.051827242524917,
617
- "grad_norm": 0.0009286152780987322,
618
  "learning_rate": 8.18260120585702e-06,
619
  "loss": 0.0001,
620
  "step": 7700
621
  },
622
  {
623
  "epoch": 8.062901439645625,
624
- "grad_norm": 0.0012453129747882485,
625
  "learning_rate": 7.567368032484312e-06,
626
  "loss": 0.0001,
627
  "step": 7800
628
  },
629
  {
630
  "epoch": 8.073975636766335,
631
- "grad_norm": 0.000763101561460644,
632
  "learning_rate": 6.952134859111603e-06,
633
  "loss": 0.0001,
634
  "step": 7900
635
  },
636
  {
637
  "epoch": 8.085049833887043,
638
- "grad_norm": 0.0010468490654602647,
639
  "learning_rate": 6.336901685738895e-06,
640
  "loss": 0.0001,
641
  "step": 8000
642
  },
643
  {
644
  "epoch": 8.096124031007752,
645
- "grad_norm": 0.0007441536872647703,
646
  "learning_rate": 5.7216685123661875e-06,
647
  "loss": 0.0001,
648
  "step": 8100
649
  },
650
  {
651
  "epoch": 8.100110741971207,
652
- "eval_accuracy": 0.9975093399750934,
653
- "eval_loss": 0.009709909558296204,
654
- "eval_runtime": 213.0126,
655
- "eval_samples_per_second": 3.77,
656
- "eval_steps_per_second": 0.474,
657
  "step": 8136
658
  },
659
  {
660
  "epoch": 9.007087486157253,
661
- "grad_norm": 0.0011277415324002504,
662
  "learning_rate": 5.106435338993479e-06,
663
  "loss": 0.0001,
664
  "step": 8200
665
  },
666
  {
667
  "epoch": 9.018161683277963,
668
- "grad_norm": 0.0012697929050773382,
669
  "learning_rate": 4.4912021656207705e-06,
670
  "loss": 0.0001,
671
  "step": 8300
672
  },
673
  {
674
  "epoch": 9.029235880398671,
675
- "grad_norm": 0.0028499774634838104,
676
  "learning_rate": 3.875968992248062e-06,
677
  "loss": 0.0001,
678
  "step": 8400
679
  },
680
  {
681
  "epoch": 9.04031007751938,
682
- "grad_norm": 0.0017348791006952524,
683
  "learning_rate": 3.260735818875354e-06,
684
  "loss": 0.0001,
685
  "step": 8500
686
  },
687
  {
688
  "epoch": 9.051384274640089,
689
- "grad_norm": 0.0006883046007715166,
690
  "learning_rate": 2.6455026455026455e-06,
691
  "loss": 0.0001,
692
  "step": 8600
693
  },
694
  {
695
  "epoch": 9.062458471760797,
696
- "grad_norm": 0.00142951391171664,
697
  "learning_rate": 2.0302694721299375e-06,
698
  "loss": 0.0001,
699
  "step": 8700
700
  },
701
  {
702
  "epoch": 9.073532668881507,
703
- "grad_norm": 0.000702825200278312,
704
  "learning_rate": 1.415036298757229e-06,
705
  "loss": 0.0001,
706
  "step": 8800
707
  },
708
  {
709
  "epoch": 9.084606866002215,
710
- "grad_norm": 0.0009480651351623237,
711
  "learning_rate": 7.998031253845208e-07,
712
  "loss": 0.0001,
713
  "step": 8900
714
  },
715
  {
716
  "epoch": 9.095681063122923,
717
- "grad_norm": 0.0010264083975926042,
718
  "learning_rate": 1.8456995201181249e-07,
719
  "loss": 0.0001,
720
  "step": 9000
721
  },
722
  {
723
  "epoch": 9.099003322259136,
724
- "eval_accuracy": 0.9975093399750934,
725
- "eval_loss": 0.009837848134338856,
726
- "eval_runtime": 215.3394,
727
- "eval_samples_per_second": 3.729,
728
- "eval_steps_per_second": 0.469,
729
  "step": 9030
730
  },
731
  {
732
  "epoch": 9.099003322259136,
733
  "step": 9030,
734
  "total_flos": 6.328460909097596e+19,
735
- "train_loss": 0.18714786747627524,
736
- "train_runtime": 33340.824,
737
- "train_samples_per_second": 2.167,
738
- "train_steps_per_second": 0.271
739
  },
740
  {
741
  "epoch": 9.099003322259136,
742
- "eval_accuracy": 0.7664576802507836,
743
- "eval_loss": 0.8547028303146362,
744
- "eval_runtime": 304.3291,
745
- "eval_samples_per_second": 4.193,
746
- "eval_steps_per_second": 0.526,
747
  "step": 9030
748
  }
749
  ],
 
1
  {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/10 fold timesformer/Timesformer_default_fold_10_10_epoch_noAug_batch8/checkpoint-8136",
4
  "epoch": 9.099003322259136,
5
  "eval_steps": 500,
6
  "global_step": 9030,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.01107419712070875,
13
+ "grad_norm": 12.285748481750488,
14
  "learning_rate": 5.537098560354375e-06,
15
+ "loss": 4.1825,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.0221483942414175,
20
+ "grad_norm": 13.900195121765137,
21
  "learning_rate": 1.107419712070875e-05,
22
+ "loss": 3.8327,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.03322259136212625,
27
+ "grad_norm": 16.053707122802734,
28
  "learning_rate": 1.6611295681063124e-05,
29
+ "loss": 3.0094,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.044296788482835,
34
+ "grad_norm": 14.44247817993164,
35
  "learning_rate": 2.21483942414175e-05,
36
+ "loss": 2.0742,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.05537098560354374,
41
+ "grad_norm": 11.22198486328125,
42
  "learning_rate": 2.7685492801771873e-05,
43
+ "loss": 1.3003,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.0664451827242525,
48
+ "grad_norm": 8.951163291931152,
49
  "learning_rate": 3.322259136212625e-05,
50
+ "loss": 0.7913,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.07751937984496124,
55
+ "grad_norm": 6.106560707092285,
56
  "learning_rate": 3.875968992248062e-05,
57
+ "loss": 0.4525,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.08859357696567,
62
+ "grad_norm": 3.4112484455108643,
63
  "learning_rate": 4.4296788482835e-05,
64
+ "loss": 0.2954,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.09966777408637874,
69
+ "grad_norm": 5.202641487121582,
70
  "learning_rate": 4.983388704318937e-05,
71
+ "loss": 0.2704,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.10011074197120709,
76
+ "eval_accuracy": 0.9551681195516812,
77
+ "eval_loss": 0.22091799974441528,
78
+ "eval_runtime": 204.9768,
79
+ "eval_samples_per_second": 3.918,
80
+ "eval_steps_per_second": 0.493,
81
  "step": 904
82
  },
83
  {
84
  "epoch": 1.0106312292358803,
85
+ "grad_norm": 2.3844780921936035,
86
  "learning_rate": 4.940322382182848e-05,
87
+ "loss": 0.1136,
88
  "step": 1000
89
  },
90
  {
91
  "epoch": 1.0217054263565892,
92
+ "grad_norm": 0.3309445083141327,
93
  "learning_rate": 4.878799064845577e-05,
94
+ "loss": 0.1143,
95
  "step": 1100
96
  },
97
  {
98
  "epoch": 1.032779623477298,
99
+ "grad_norm": 1.7956221103668213,
100
  "learning_rate": 4.8172757475083056e-05,
101
+ "loss": 0.0711,
102
  "step": 1200
103
  },
104
  {
105
  "epoch": 1.0438538205980066,
106
+ "grad_norm": 4.114385604858398,
107
  "learning_rate": 4.755752430171035e-05,
108
+ "loss": 0.0928,
109
  "step": 1300
110
  },
111
  {
112
  "epoch": 1.0549280177187155,
113
+ "grad_norm": 13.343818664550781,
114
  "learning_rate": 4.694229112833764e-05,
115
+ "loss": 0.072,
116
  "step": 1400
117
  },
118
  {
119
  "epoch": 1.0660022148394241,
120
+ "grad_norm": 0.0704650729894638,
121
  "learning_rate": 4.6327057954964936e-05,
122
+ "loss": 0.0522,
123
  "step": 1500
124
  },
125
  {
126
  "epoch": 1.0770764119601328,
127
+ "grad_norm": 0.06872345507144928,
128
  "learning_rate": 4.571182478159223e-05,
129
+ "loss": 0.0108,
130
  "step": 1600
131
  },
132
  {
133
  "epoch": 1.0881506090808417,
134
+ "grad_norm": 0.07814284414052963,
135
  "learning_rate": 4.5096591608219516e-05,
136
+ "loss": 0.0261,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 1.0992248062015504,
141
+ "grad_norm": 0.09340647608041763,
142
  "learning_rate": 4.448135843484681e-05,
143
+ "loss": 0.0155,
144
  "step": 1800
145
  },
146
  {
147
  "epoch": 1.100110741971207,
148
+ "eval_accuracy": 0.9912826899128269,
149
+ "eval_loss": 0.041718773543834686,
150
+ "eval_runtime": 205.7202,
151
+ "eval_samples_per_second": 3.903,
152
+ "eval_steps_per_second": 0.491,
153
  "step": 1808
154
  },
155
  {
156
  "epoch": 2.010188261351052,
157
+ "grad_norm": 0.061494044959545135,
158
  "learning_rate": 4.38661252614741e-05,
159
+ "loss": 0.0069,
160
  "step": 1900
161
  },
162
  {
163
  "epoch": 2.0212624584717607,
164
+ "grad_norm": 0.024138756096363068,
165
  "learning_rate": 4.325089208810139e-05,
166
+ "loss": 0.0161,
167
  "step": 2000
168
  },
169
  {
170
  "epoch": 2.0323366555924696,
171
+ "grad_norm": 0.17499598860740662,
172
  "learning_rate": 4.263565891472868e-05,
173
+ "loss": 0.0145,
174
  "step": 2100
175
  },
176
  {
177
  "epoch": 2.0434108527131785,
178
+ "grad_norm": 10.606599807739258,
179
  "learning_rate": 4.2020425741355975e-05,
180
+ "loss": 0.0575,
181
  "step": 2200
182
  },
183
  {
184
  "epoch": 2.054485049833887,
185
+ "grad_norm": 0.1084849014878273,
186
  "learning_rate": 4.140519256798327e-05,
187
+ "loss": 0.0037,
188
  "step": 2300
189
  },
190
  {
191
  "epoch": 2.065559246954596,
192
+ "grad_norm": 0.06579738110303879,
193
  "learning_rate": 4.078995939461056e-05,
194
+ "loss": 0.0033,
195
  "step": 2400
196
  },
197
  {
198
  "epoch": 2.0766334440753047,
199
+ "grad_norm": 0.0159724373370409,
200
  "learning_rate": 4.0174726221237855e-05,
201
+ "loss": 0.0038,
202
  "step": 2500
203
  },
204
  {
205
  "epoch": 2.087707641196013,
206
+ "grad_norm": 0.032099511474370956,
207
  "learning_rate": 3.955949304786514e-05,
208
+ "loss": 0.0106,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 2.098781838316722,
213
+ "grad_norm": 0.18046538531780243,
214
  "learning_rate": 3.8944259874492434e-05,
215
+ "loss": 0.0021,
216
  "step": 2700
217
  },
218
  {
219
  "epoch": 2.100110741971207,
220
+ "eval_accuracy": 0.9937733499377335,
221
+ "eval_loss": 0.026835119351744652,
222
+ "eval_runtime": 217.9844,
223
+ "eval_samples_per_second": 3.684,
224
+ "eval_steps_per_second": 0.463,
225
  "step": 2712
226
  },
227
  {
228
  "epoch": 3.009745293466224,
229
+ "grad_norm": 0.01824910193681717,
230
  "learning_rate": 3.832902670111973e-05,
231
+ "loss": 0.013,
232
  "step": 2800
233
  },
234
  {
235
  "epoch": 3.0208194905869323,
236
+ "grad_norm": 0.05538594350218773,
237
  "learning_rate": 3.7713793527747014e-05,
238
+ "loss": 0.0087,
239
  "step": 2900
240
  },
241
  {
242
  "epoch": 3.0318936877076412,
243
+ "grad_norm": 0.018902570009231567,
244
  "learning_rate": 3.7098560354374314e-05,
245
+ "loss": 0.0478,
246
  "step": 3000
247
  },
248
  {
249
  "epoch": 3.04296788482835,
250
+ "grad_norm": 0.06631331145763397,
251
  "learning_rate": 3.64833271810016e-05,
252
+ "loss": 0.0109,
253
  "step": 3100
254
  },
255
  {
256
  "epoch": 3.0540420819490586,
257
+ "grad_norm": 0.017673442140221596,
258
  "learning_rate": 3.5868094007628894e-05,
259
+ "loss": 0.0175,
260
  "step": 3200
261
  },
262
  {
263
  "epoch": 3.0651162790697675,
264
+ "grad_norm": 0.007972619496285915,
265
  "learning_rate": 3.525286083425619e-05,
266
+ "loss": 0.0009,
267
  "step": 3300
268
  },
269
  {
270
  "epoch": 3.0761904761904764,
271
+ "grad_norm": 0.005024532321840525,
272
  "learning_rate": 3.463762766088347e-05,
273
+ "loss": 0.0089,
274
  "step": 3400
275
  },
276
  {
277
  "epoch": 3.087264673311185,
278
+ "grad_norm": 0.008405894972383976,
279
  "learning_rate": 3.4022394487510767e-05,
280
+ "loss": 0.0005,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 3.0983388704318937,
285
+ "grad_norm": 0.004133996553719044,
286
  "learning_rate": 3.340716131413806e-05,
287
+ "loss": 0.0004,
288
  "step": 3600
289
  },
290
  {
291
  "epoch": 3.100110741971207,
292
+ "eval_accuracy": 0.9975093399750934,
293
+ "eval_loss": 0.011567194014787674,
294
+ "eval_runtime": 211.0278,
295
+ "eval_samples_per_second": 3.805,
296
+ "eval_steps_per_second": 0.479,
297
  "step": 3616
298
  },
299
  {
300
  "epoch": 4.0093023255813955,
301
+ "grad_norm": 0.01580865867435932,
302
  "learning_rate": 3.2791928140765346e-05,
303
+ "loss": 0.0153,
304
  "step": 3700
305
  },
306
  {
307
  "epoch": 4.020376522702104,
308
+ "grad_norm": 0.003808818757534027,
309
  "learning_rate": 3.2176694967392646e-05,
310
+ "loss": 0.0004,
311
  "step": 3800
312
  },
313
  {
314
  "epoch": 4.0314507198228124,
315
+ "grad_norm": 0.006102267187088728,
316
  "learning_rate": 3.156146179401994e-05,
317
  "loss": 0.0003,
318
  "step": 3900
319
  },
320
  {
321
  "epoch": 4.042524916943521,
322
+ "grad_norm": 0.005705439019948244,
323
  "learning_rate": 3.0946228620647226e-05,
324
+ "loss": 0.0002,
325
  "step": 4000
326
  },
327
  {
328
  "epoch": 4.05359911406423,
329
+ "grad_norm": 0.0029700251761823893,
330
  "learning_rate": 3.033099544727452e-05,
331
  "loss": 0.0003,
332
  "step": 4100
333
  },
334
  {
335
  "epoch": 4.064673311184939,
336
+ "grad_norm": 0.0034872761461883783,
337
  "learning_rate": 2.971576227390181e-05,
338
  "loss": 0.0002,
339
  "step": 4200
340
  },
341
  {
342
  "epoch": 4.075747508305648,
343
+ "grad_norm": 0.00403960794210434,
344
  "learning_rate": 2.91005291005291e-05,
345
  "loss": 0.0002,
346
  "step": 4300
347
  },
348
  {
349
  "epoch": 4.086821705426357,
350
+ "grad_norm": 0.0029434591997414827,
351
  "learning_rate": 2.8485295927156392e-05,
352
+ "loss": 0.0028,
353
  "step": 4400
354
  },
355
  {
356
  "epoch": 4.097895902547065,
357
+ "grad_norm": 0.003810062538832426,
358
  "learning_rate": 2.787006275378369e-05,
359
+ "loss": 0.0125,
360
  "step": 4500
361
  },
362
  {
363
  "epoch": 4.100110741971207,
364
+ "eval_accuracy": 0.9962640099626401,
365
+ "eval_loss": 0.008478819392621517,
366
+ "eval_runtime": 207.8373,
367
+ "eval_samples_per_second": 3.864,
368
+ "eval_steps_per_second": 0.486,
369
  "step": 4520
370
  },
371
  {
372
  "epoch": 5.008859357696567,
373
+ "grad_norm": 0.004100952763110399,
374
  "learning_rate": 2.725482958041098e-05,
375
+ "loss": 0.0002,
376
  "step": 4600
377
  },
378
  {
379
  "epoch": 5.019933554817276,
380
+ "grad_norm": 0.006221433635801077,
381
  "learning_rate": 2.6639596407038268e-05,
382
+ "loss": 0.0002,
383
  "step": 4700
384
  },
385
  {
386
  "epoch": 5.0310077519379846,
387
+ "grad_norm": 0.0044784145429730415,
388
  "learning_rate": 2.602436323366556e-05,
389
+ "loss": 0.0002,
390
  "step": 4800
391
  },
392
  {
393
  "epoch": 5.0420819490586934,
394
+ "grad_norm": 0.0028812645468860865,
395
  "learning_rate": 2.540913006029285e-05,
396
+ "loss": 0.0002,
397
  "step": 4900
398
  },
399
  {
400
  "epoch": 5.053156146179402,
401
+ "grad_norm": 0.0024458877742290497,
402
  "learning_rate": 2.479389688692014e-05,
403
+ "loss": 0.0002,
404
  "step": 5000
405
  },
406
  {
407
  "epoch": 5.06423034330011,
408
+ "grad_norm": 0.00314136128872633,
409
  "learning_rate": 2.4178663713547438e-05,
410
+ "loss": 0.0001,
411
  "step": 5100
412
  },
413
  {
414
  "epoch": 5.075304540420819,
415
+ "grad_norm": 0.0019707216415554285,
416
  "learning_rate": 2.3563430540174727e-05,
417
+ "loss": 0.0001,
418
  "step": 5200
419
  },
420
  {
421
  "epoch": 5.086378737541528,
422
+ "grad_norm": 0.0017920031677931547,
423
  "learning_rate": 2.2948197366802017e-05,
424
+ "loss": 0.0001,
425
  "step": 5300
426
  },
427
  {
428
  "epoch": 5.097452934662237,
429
+ "grad_norm": 0.008118866011500359,
430
  "learning_rate": 2.233296419342931e-05,
431
+ "loss": 0.0043,
432
  "step": 5400
433
  },
434
  {
435
  "epoch": 5.100110741971207,
436
+ "eval_accuracy": 0.9912826899128269,
437
+ "eval_loss": 0.027965275570750237,
438
+ "eval_runtime": 213.2679,
439
+ "eval_samples_per_second": 3.765,
440
+ "eval_steps_per_second": 0.474,
441
  "step": 5424
442
  },
443
  {
444
  "epoch": 6.008416389811739,
445
+ "grad_norm": 0.01948702521622181,
446
  "learning_rate": 2.1717731020056604e-05,
447
+ "loss": 0.0021,
448
  "step": 5500
449
  },
450
  {
451
  "epoch": 6.019490586932448,
452
+ "grad_norm": 0.005027715116739273,
453
  "learning_rate": 2.1102497846683894e-05,
454
+ "loss": 0.0018,
455
  "step": 5600
456
  },
457
  {
458
  "epoch": 6.030564784053156,
459
+ "grad_norm": 0.8039076924324036,
460
  "learning_rate": 2.0487264673311183e-05,
461
+ "loss": 0.0125,
462
  "step": 5700
463
  },
464
  {
465
  "epoch": 6.041638981173865,
466
+ "grad_norm": 0.007823876105248928,
467
  "learning_rate": 1.987203149993848e-05,
468
+ "loss": 0.0164,
469
  "step": 5800
470
  },
471
  {
472
  "epoch": 6.052713178294574,
473
+ "grad_norm": 0.020579500123858452,
474
  "learning_rate": 1.925679832656577e-05,
475
+ "loss": 0.0059,
476
  "step": 5900
477
  },
478
  {
479
  "epoch": 6.0637873754152825,
480
+ "grad_norm": 0.003875893075019121,
481
  "learning_rate": 1.864156515319306e-05,
482
+ "loss": 0.0004,
483
  "step": 6000
484
  },
485
  {
486
  "epoch": 6.074861572535991,
487
+ "grad_norm": 0.009862402454018593,
488
  "learning_rate": 1.8026331979820353e-05,
489
+ "loss": 0.0058,
490
  "step": 6100
491
  },
492
  {
493
  "epoch": 6.0859357696567,
494
+ "grad_norm": 0.005517066456377506,
495
  "learning_rate": 1.7411098806447646e-05,
496
+ "loss": 0.0004,
497
  "step": 6200
498
  },
499
  {
500
  "epoch": 6.097009966777408,
501
+ "grad_norm": 0.007121366914361715,
502
  "learning_rate": 1.6795865633074936e-05,
503
+ "loss": 0.0002,
504
  "step": 6300
505
  },
506
  {
507
  "epoch": 6.100110741971207,
508
  "eval_accuracy": 0.9975093399750934,
509
+ "eval_loss": 0.009998604655265808,
510
+ "eval_runtime": 204.2684,
511
+ "eval_samples_per_second": 3.931,
512
+ "eval_steps_per_second": 0.494,
513
  "step": 6328
514
  },
515
  {
516
  "epoch": 7.00797342192691,
517
+ "grad_norm": 0.005023865960538387,
518
  "learning_rate": 1.6180632459702226e-05,
519
+ "loss": 0.0002,
520
  "step": 6400
521
  },
522
  {
523
  "epoch": 7.019047619047619,
524
+ "grad_norm": 0.001879742369055748,
525
  "learning_rate": 1.556539928632952e-05,
526
+ "loss": 0.0002,
527
  "step": 6500
528
  },
529
  {
530
  "epoch": 7.030121816168328,
531
+ "grad_norm": 0.003781010629609227,
532
  "learning_rate": 1.4950166112956812e-05,
533
+ "loss": 0.005,
534
  "step": 6600
535
  },
536
  {
537
  "epoch": 7.041196013289037,
538
+ "grad_norm": 0.025241386145353317,
539
  "learning_rate": 1.4334932939584104e-05,
540
+ "loss": 0.0005,
541
  "step": 6700
542
  },
543
  {
544
  "epoch": 7.052270210409746,
545
+ "grad_norm": 0.0018404372967779636,
546
  "learning_rate": 1.3719699766211393e-05,
547
+ "loss": 0.0002,
548
  "step": 6800
549
  },
550
  {
551
  "epoch": 7.063344407530454,
552
+ "grad_norm": 0.004017640370875597,
553
  "learning_rate": 1.3104466592838688e-05,
554
+ "loss": 0.0005,
555
  "step": 6900
556
  },
557
  {
558
  "epoch": 7.074418604651163,
559
+ "grad_norm": 0.0032308073714375496,
560
  "learning_rate": 1.2489233419465978e-05,
561
+ "loss": 0.0004,
562
  "step": 7000
563
  },
564
  {
565
  "epoch": 7.0854928017718715,
566
+ "grad_norm": 0.0011970199411734939,
567
  "learning_rate": 1.187400024609327e-05,
568
+ "loss": 0.0002,
569
  "step": 7100
570
  },
571
  {
572
  "epoch": 7.09656699889258,
573
+ "grad_norm": 0.0023619933053851128,
574
  "learning_rate": 1.1258767072720563e-05,
575
  "loss": 0.0001,
576
  "step": 7200
577
  },
578
  {
579
  "epoch": 7.100110741971207,
580
+ "eval_accuracy": 0.9987546699875467,
581
+ "eval_loss": 0.0030333329923450947,
582
+ "eval_runtime": 201.1888,
583
+ "eval_samples_per_second": 3.991,
584
+ "eval_steps_per_second": 0.502,
585
  "step": 7232
586
  },
587
  {
588
  "epoch": 8.007530454042081,
589
+ "grad_norm": 0.0018289505969733,
590
  "learning_rate": 1.0643533899347853e-05,
591
  "loss": 0.0001,
592
  "step": 7300
593
  },
594
  {
595
  "epoch": 8.018604651162791,
596
+ "grad_norm": 0.01480843871831894,
597
  "learning_rate": 1.0028300725975146e-05,
598
  "loss": 0.0001,
599
  "step": 7400
600
  },
601
  {
602
  "epoch": 8.029678848283499,
603
+ "grad_norm": 0.0018123927293345332,
604
  "learning_rate": 9.413067552602436e-06,
605
  "loss": 0.0001,
606
  "step": 7500
607
  },
608
  {
609
  "epoch": 8.040753045404209,
610
+ "grad_norm": 0.0014256143476814032,
611
  "learning_rate": 8.797834379229729e-06,
612
  "loss": 0.0001,
613
  "step": 7600
614
  },
615
  {
616
  "epoch": 8.051827242524917,
617
+ "grad_norm": 0.0011715757427737117,
618
  "learning_rate": 8.18260120585702e-06,
619
  "loss": 0.0001,
620
  "step": 7700
621
  },
622
  {
623
  "epoch": 8.062901439645625,
624
+ "grad_norm": 0.002099097240716219,
625
  "learning_rate": 7.567368032484312e-06,
626
  "loss": 0.0001,
627
  "step": 7800
628
  },
629
  {
630
  "epoch": 8.073975636766335,
631
+ "grad_norm": 0.000933408213313669,
632
  "learning_rate": 6.952134859111603e-06,
633
  "loss": 0.0001,
634
  "step": 7900
635
  },
636
  {
637
  "epoch": 8.085049833887043,
638
+ "grad_norm": 0.0009701464441604912,
639
  "learning_rate": 6.336901685738895e-06,
640
  "loss": 0.0001,
641
  "step": 8000
642
  },
643
  {
644
  "epoch": 8.096124031007752,
645
+ "grad_norm": 0.001506564673036337,
646
  "learning_rate": 5.7216685123661875e-06,
647
  "loss": 0.0001,
648
  "step": 8100
649
  },
650
  {
651
  "epoch": 8.100110741971207,
652
+ "eval_accuracy": 1.0,
653
+ "eval_loss": 0.002309577539563179,
654
+ "eval_runtime": 208.0157,
655
+ "eval_samples_per_second": 3.86,
656
+ "eval_steps_per_second": 0.486,
657
  "step": 8136
658
  },
659
  {
660
  "epoch": 9.007087486157253,
661
+ "grad_norm": 0.0019045774824917316,
662
  "learning_rate": 5.106435338993479e-06,
663
  "loss": 0.0001,
664
  "step": 8200
665
  },
666
  {
667
  "epoch": 9.018161683277963,
668
+ "grad_norm": 0.0013489355333149433,
669
  "learning_rate": 4.4912021656207705e-06,
670
  "loss": 0.0001,
671
  "step": 8300
672
  },
673
  {
674
  "epoch": 9.029235880398671,
675
+ "grad_norm": 0.0015209962148219347,
676
  "learning_rate": 3.875968992248062e-06,
677
  "loss": 0.0001,
678
  "step": 8400
679
  },
680
  {
681
  "epoch": 9.04031007751938,
682
+ "grad_norm": 0.0016524152597412467,
683
  "learning_rate": 3.260735818875354e-06,
684
  "loss": 0.0001,
685
  "step": 8500
686
  },
687
  {
688
  "epoch": 9.051384274640089,
689
+ "grad_norm": 0.0015727116260677576,
690
  "learning_rate": 2.6455026455026455e-06,
691
  "loss": 0.0001,
692
  "step": 8600
693
  },
694
  {
695
  "epoch": 9.062458471760797,
696
+ "grad_norm": 0.002659299410879612,
697
  "learning_rate": 2.0302694721299375e-06,
698
  "loss": 0.0001,
699
  "step": 8700
700
  },
701
  {
702
  "epoch": 9.073532668881507,
703
+ "grad_norm": 0.00093603425193578,
704
  "learning_rate": 1.415036298757229e-06,
705
  "loss": 0.0001,
706
  "step": 8800
707
  },
708
  {
709
  "epoch": 9.084606866002215,
710
+ "grad_norm": 0.001421812572516501,
711
  "learning_rate": 7.998031253845208e-07,
712
  "loss": 0.0001,
713
  "step": 8900
714
  },
715
  {
716
  "epoch": 9.095681063122923,
717
+ "grad_norm": 0.001551046734675765,
718
  "learning_rate": 1.8456995201181249e-07,
719
  "loss": 0.0001,
720
  "step": 9000
721
  },
722
  {
723
  "epoch": 9.099003322259136,
724
+ "eval_accuracy": 1.0,
725
+ "eval_loss": 0.002318102866411209,
726
+ "eval_runtime": 200.8195,
727
+ "eval_samples_per_second": 3.999,
728
+ "eval_steps_per_second": 0.503,
729
  "step": 9030
730
  },
731
  {
732
  "epoch": 9.099003322259136,
733
  "step": 9030,
734
  "total_flos": 6.328460909097596e+19,
735
+ "train_loss": 0.18933373247551735,
736
+ "train_runtime": 25561.7958,
737
+ "train_samples_per_second": 2.826,
738
+ "train_steps_per_second": 0.353
739
  },
740
  {
741
  "epoch": 9.099003322259136,
742
+ "eval_accuracy": 0.7774294670846394,
743
+ "eval_loss": 0.7984064817428589,
744
+ "eval_runtime": 302.1425,
745
+ "eval_samples_per_second": 4.223,
746
+ "eval_steps_per_second": 0.53,
747
  "step": 9030
748
  }
749
  ],