Shawon16 commited on
Commit
67345cf
·
verified ·
1 Parent(s): d9a81bf

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -0
  2. test_results.json +6 -0
  3. trainer_state.json +334 -0
all_results.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.971875,
3
+ "f1": 0.970599219660647,
4
+ "precision": 0.9766995157620159,
5
+ "recall": 0.971875
6
+ }
test_results.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.971875,
3
+ "f1": 0.970599219660647,
4
+ "precision": 0.9766995157620159,
5
+ "recall": 0.971875
6
+ }
trainer_state.json ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/ViViT_LSA64_SR_6/checkpoint-1152",
4
+ "epoch": 8.033333333333333,
5
+ "eval_steps": 500,
6
+ "global_step": 2592,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.011574074074074073,
13
+ "grad_norm": 44.73263931274414,
14
+ "learning_rate": 5.613425925925926e-06,
15
+ "loss": 17.0729,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.023148148148148147,
20
+ "grad_norm": 47.833282470703125,
21
+ "learning_rate": 1.1400462962962963e-05,
22
+ "loss": 15.7817,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.03333333333333333,
27
+ "eval_accuracy": 0.4609375,
28
+ "eval_f1": 0.43072370107622715,
29
+ "eval_loss": 2.8823623657226562,
30
+ "eval_precision": 0.5308730476699226,
31
+ "eval_recall": 0.4609375,
32
+ "eval_runtime": 294.5123,
33
+ "eval_samples_per_second": 0.869,
34
+ "eval_steps_per_second": 0.435,
35
+ "step": 288
36
+ },
37
+ {
38
+ "epoch": 1.0013888888888889,
39
+ "grad_norm": 49.69872283935547,
40
+ "learning_rate": 1.71875e-05,
41
+ "loss": 12.8142,
42
+ "step": 300
43
+ },
44
+ {
45
+ "epoch": 1.012962962962963,
46
+ "grad_norm": 53.043094635009766,
47
+ "learning_rate": 2.297453703703704e-05,
48
+ "loss": 8.4497,
49
+ "step": 400
50
+ },
51
+ {
52
+ "epoch": 1.024537037037037,
53
+ "grad_norm": 30.541913986206055,
54
+ "learning_rate": 2.8761574074074076e-05,
55
+ "loss": 4.7558,
56
+ "step": 500
57
+ },
58
+ {
59
+ "epoch": 1.0333333333333334,
60
+ "eval_accuracy": 0.94921875,
61
+ "eval_f1": 0.9469866071428571,
62
+ "eval_loss": 0.5581986904144287,
63
+ "eval_precision": 0.9585937499999999,
64
+ "eval_recall": 0.94921875,
65
+ "eval_runtime": 294.5325,
66
+ "eval_samples_per_second": 0.869,
67
+ "eval_steps_per_second": 0.435,
68
+ "step": 576
69
+ },
70
+ {
71
+ "epoch": 2.0027777777777778,
72
+ "grad_norm": 16.023212432861328,
73
+ "learning_rate": 3.454861111111111e-05,
74
+ "loss": 2.1506,
75
+ "step": 600
76
+ },
77
+ {
78
+ "epoch": 2.0143518518518517,
79
+ "grad_norm": 9.642929077148438,
80
+ "learning_rate": 4.033564814814815e-05,
81
+ "loss": 0.968,
82
+ "step": 700
83
+ },
84
+ {
85
+ "epoch": 2.025925925925926,
86
+ "grad_norm": 2.5669755935668945,
87
+ "learning_rate": 4.612268518518519e-05,
88
+ "loss": 0.5173,
89
+ "step": 800
90
+ },
91
+ {
92
+ "epoch": 2.033333333333333,
93
+ "eval_accuracy": 0.98046875,
94
+ "eval_f1": 0.9797619047619047,
95
+ "eval_loss": 0.09987345337867737,
96
+ "eval_precision": 0.9854166666666667,
97
+ "eval_recall": 0.98046875,
98
+ "eval_runtime": 293.8692,
99
+ "eval_samples_per_second": 0.871,
100
+ "eval_steps_per_second": 0.436,
101
+ "step": 864
102
+ },
103
+ {
104
+ "epoch": 3.004166666666667,
105
+ "grad_norm": 49.025569915771484,
106
+ "learning_rate": 4.978780864197531e-05,
107
+ "loss": 0.2348,
108
+ "step": 900
109
+ },
110
+ {
111
+ "epoch": 3.015740740740741,
112
+ "grad_norm": 4.306143283843994,
113
+ "learning_rate": 4.9144804526748975e-05,
114
+ "loss": 0.0649,
115
+ "step": 1000
116
+ },
117
+ {
118
+ "epoch": 3.027314814814815,
119
+ "grad_norm": 52.926971435546875,
120
+ "learning_rate": 4.850180041152263e-05,
121
+ "loss": 0.1244,
122
+ "step": 1100
123
+ },
124
+ {
125
+ "epoch": 3.033333333333333,
126
+ "eval_accuracy": 1.0,
127
+ "eval_f1": 1.0,
128
+ "eval_loss": 0.010159317404031754,
129
+ "eval_precision": 1.0,
130
+ "eval_recall": 1.0,
131
+ "eval_runtime": 358.4985,
132
+ "eval_samples_per_second": 0.714,
133
+ "eval_steps_per_second": 0.357,
134
+ "step": 1152
135
+ },
136
+ {
137
+ "epoch": 4.0055555555555555,
138
+ "grad_norm": 0.2205909937620163,
139
+ "learning_rate": 4.78587962962963e-05,
140
+ "loss": 0.0688,
141
+ "step": 1200
142
+ },
143
+ {
144
+ "epoch": 4.01712962962963,
145
+ "grad_norm": 0.06853680312633514,
146
+ "learning_rate": 4.721579218106996e-05,
147
+ "loss": 0.018,
148
+ "step": 1300
149
+ },
150
+ {
151
+ "epoch": 4.0287037037037035,
152
+ "grad_norm": 0.06609813868999481,
153
+ "learning_rate": 4.657278806584363e-05,
154
+ "loss": 0.0043,
155
+ "step": 1400
156
+ },
157
+ {
158
+ "epoch": 4.033333333333333,
159
+ "eval_accuracy": 0.9921875,
160
+ "eval_f1": 0.9920634920634921,
161
+ "eval_loss": 0.026480242609977722,
162
+ "eval_precision": 0.99375,
163
+ "eval_recall": 0.9921875,
164
+ "eval_runtime": 327.6241,
165
+ "eval_samples_per_second": 0.781,
166
+ "eval_steps_per_second": 0.391,
167
+ "step": 1440
168
+ },
169
+ {
170
+ "epoch": 5.006944444444445,
171
+ "grad_norm": 0.09487422555685043,
172
+ "learning_rate": 4.5929783950617286e-05,
173
+ "loss": 0.0178,
174
+ "step": 1500
175
+ },
176
+ {
177
+ "epoch": 5.018518518518518,
178
+ "grad_norm": 0.06308699399232864,
179
+ "learning_rate": 4.5286779835390944e-05,
180
+ "loss": 0.0033,
181
+ "step": 1600
182
+ },
183
+ {
184
+ "epoch": 5.030092592592593,
185
+ "grad_norm": 0.4189186692237854,
186
+ "learning_rate": 4.464377572016461e-05,
187
+ "loss": 0.021,
188
+ "step": 1700
189
+ },
190
+ {
191
+ "epoch": 5.033333333333333,
192
+ "eval_accuracy": 0.9921875,
193
+ "eval_f1": 0.9920634920634921,
194
+ "eval_loss": 0.019995149224996567,
195
+ "eval_precision": 0.99375,
196
+ "eval_recall": 0.9921875,
197
+ "eval_runtime": 314.0145,
198
+ "eval_samples_per_second": 0.815,
199
+ "eval_steps_per_second": 0.408,
200
+ "step": 1728
201
+ },
202
+ {
203
+ "epoch": 6.008333333333334,
204
+ "grad_norm": 0.022702256217598915,
205
+ "learning_rate": 4.4000771604938274e-05,
206
+ "loss": 0.0144,
207
+ "step": 1800
208
+ },
209
+ {
210
+ "epoch": 6.019907407407407,
211
+ "grad_norm": 0.019604824483394623,
212
+ "learning_rate": 4.335776748971194e-05,
213
+ "loss": 0.0016,
214
+ "step": 1900
215
+ },
216
+ {
217
+ "epoch": 6.031481481481482,
218
+ "grad_norm": 0.017143510282039642,
219
+ "learning_rate": 4.27147633744856e-05,
220
+ "loss": 0.0014,
221
+ "step": 2000
222
+ },
223
+ {
224
+ "epoch": 6.033333333333333,
225
+ "eval_accuracy": 1.0,
226
+ "eval_f1": 1.0,
227
+ "eval_loss": 0.0011858218349516392,
228
+ "eval_precision": 1.0,
229
+ "eval_recall": 1.0,
230
+ "eval_runtime": 318.1849,
231
+ "eval_samples_per_second": 0.805,
232
+ "eval_steps_per_second": 0.402,
233
+ "step": 2016
234
+ },
235
+ {
236
+ "epoch": 7.009722222222222,
237
+ "grad_norm": 0.1380627602338791,
238
+ "learning_rate": 4.207175925925926e-05,
239
+ "loss": 0.0017,
240
+ "step": 2100
241
+ },
242
+ {
243
+ "epoch": 7.021296296296296,
244
+ "grad_norm": 0.02778031677007675,
245
+ "learning_rate": 4.142875514403292e-05,
246
+ "loss": 0.0251,
247
+ "step": 2200
248
+ },
249
+ {
250
+ "epoch": 7.032870370370371,
251
+ "grad_norm": 0.0441974513232708,
252
+ "learning_rate": 4.0785751028806585e-05,
253
+ "loss": 0.0414,
254
+ "step": 2300
255
+ },
256
+ {
257
+ "epoch": 7.033333333333333,
258
+ "eval_accuracy": 0.99609375,
259
+ "eval_f1": 0.996031746031746,
260
+ "eval_loss": 0.007463962305337191,
261
+ "eval_precision": 0.996875,
262
+ "eval_recall": 0.99609375,
263
+ "eval_runtime": 362.0912,
264
+ "eval_samples_per_second": 0.707,
265
+ "eval_steps_per_second": 0.354,
266
+ "step": 2304
267
+ },
268
+ {
269
+ "epoch": 8.011111111111111,
270
+ "grad_norm": 0.018177902325987816,
271
+ "learning_rate": 4.0149176954732515e-05,
272
+ "loss": 0.0836,
273
+ "step": 2400
274
+ },
275
+ {
276
+ "epoch": 8.022685185185185,
277
+ "grad_norm": 0.04126301780343056,
278
+ "learning_rate": 3.950617283950617e-05,
279
+ "loss": 0.0386,
280
+ "step": 2500
281
+ },
282
+ {
283
+ "epoch": 8.033333333333333,
284
+ "eval_accuracy": 0.99609375,
285
+ "eval_f1": 0.996031746031746,
286
+ "eval_loss": 0.018984569236636162,
287
+ "eval_precision": 0.996875,
288
+ "eval_recall": 0.99609375,
289
+ "eval_runtime": 313.7776,
290
+ "eval_samples_per_second": 0.816,
291
+ "eval_steps_per_second": 0.408,
292
+ "step": 2592
293
+ },
294
+ {
295
+ "epoch": 8.033333333333333,
296
+ "step": 2592,
297
+ "total_flos": 5.3155188007162085e+19,
298
+ "train_loss": 2.444476819677669,
299
+ "train_runtime": 29887.626,
300
+ "train_samples_per_second": 2.313,
301
+ "train_steps_per_second": 0.289
302
+ }
303
+ ],
304
+ "logging_steps": 100,
305
+ "max_steps": 8640,
306
+ "num_input_tokens_seen": 0,
307
+ "num_train_epochs": 9223372036854775807,
308
+ "save_steps": 500,
309
+ "stateful_callbacks": {
310
+ "EarlyStoppingCallback": {
311
+ "args": {
312
+ "early_stopping_patience": 5,
313
+ "early_stopping_threshold": 0.0
314
+ },
315
+ "attributes": {
316
+ "early_stopping_patience_counter": 5
317
+ }
318
+ },
319
+ "TrainerControl": {
320
+ "args": {
321
+ "should_epoch_stop": false,
322
+ "should_evaluate": false,
323
+ "should_log": false,
324
+ "should_save": true,
325
+ "should_training_stop": true
326
+ },
327
+ "attributes": {}
328
+ }
329
+ },
330
+ "total_flos": 5.3155188007162085e+19,
331
+ "train_batch_size": 2,
332
+ "trial_name": null,
333
+ "trial_params": null
334
+ }