miladfa7 commited on
Commit
026dcf1
·
verified ·
1 Parent(s): cf77e3f

Training in progress, epoch 1

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.142857142857143,
3
+ "eval_accuracy": 0.7804878048780488,
4
+ "eval_loss": 1.077329158782959,
5
+ "eval_runtime": 5.6706,
6
+ "eval_samples_per_second": 14.461,
7
+ "eval_steps_per_second": 3.703
8
+ }
confusion_matrix.jpg CHANGED
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59292649f7eb3d4ee8f49a34dd43ec407f534a3f98d4ef95687ba5900ee5707a
3
  size 344952716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c7b5d0cb1d714a52696a8ff0f6dc1293167be17242fb1b444307772c973ad9
3
  size 344952716
runs/Aug11_13-22-52_prod3/events.out.tfevents.1754890678.prod3.730045.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a72647c4ece89c4638cf84c0ef336f15a835e947a7591c59a84e5e9a6945ec16
3
+ size 411
runs/Aug11_13-45-55_prod3/events.out.tfevents.1754891163.prod3.753305.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c2c7e858cddad8e172f12987e7e173a48429d302a8207374e46ace10fec806
3
+ size 8424
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.142857142857143,
3
+ "eval_accuracy": 0.7804878048780488,
4
+ "eval_loss": 1.077329158782959,
5
+ "eval_runtime": 5.6706,
6
+ "eval_samples_per_second": 14.461,
7
+ "eval_steps_per_second": 3.703
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 230,
3
+ "best_metric": 0.7804878048780488,
4
+ "best_model_checkpoint": "./Models/Matcha_clips_224_fintuned_5/checkpoint-230",
5
+ "epoch": 6.142857142857143,
6
+ "eval_steps": 500,
7
+ "global_step": 805,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.024844720496894408,
14
+ "grad_norm": 11.596962928771973,
15
+ "learning_rate": 1.1728395061728396e-05,
16
+ "loss": 1.6871,
17
+ "step": 20
18
+ },
19
+ {
20
+ "epoch": 0.049689440993788817,
21
+ "grad_norm": 15.900735855102539,
22
+ "learning_rate": 2.4074074074074074e-05,
23
+ "loss": 0.9615,
24
+ "step": 40
25
+ },
26
+ {
27
+ "epoch": 0.07453416149068323,
28
+ "grad_norm": 4.670846939086914,
29
+ "learning_rate": 3.6419753086419754e-05,
30
+ "loss": 0.5851,
31
+ "step": 60
32
+ },
33
+ {
34
+ "epoch": 0.09937888198757763,
35
+ "grad_norm": 6.628495216369629,
36
+ "learning_rate": 4.876543209876544e-05,
37
+ "loss": 0.3508,
38
+ "step": 80
39
+ },
40
+ {
41
+ "epoch": 0.12422360248447205,
42
+ "grad_norm": 10.711517333984375,
43
+ "learning_rate": 4.875690607734807e-05,
44
+ "loss": 0.3922,
45
+ "step": 100
46
+ },
47
+ {
48
+ "epoch": 0.14285714285714285,
49
+ "eval_accuracy": 0.7682926829268293,
50
+ "eval_loss": 0.996225118637085,
51
+ "eval_runtime": 6.4019,
52
+ "eval_samples_per_second": 12.809,
53
+ "eval_steps_per_second": 3.28,
54
+ "step": 115
55
+ },
56
+ {
57
+ "epoch": 1.0062111801242235,
58
+ "grad_norm": 0.29712462425231934,
59
+ "learning_rate": 4.737569060773481e-05,
60
+ "loss": 0.1851,
61
+ "step": 120
62
+ },
63
+ {
64
+ "epoch": 1.031055900621118,
65
+ "grad_norm": 3.168236494064331,
66
+ "learning_rate": 4.599447513812155e-05,
67
+ "loss": 0.1939,
68
+ "step": 140
69
+ },
70
+ {
71
+ "epoch": 1.0559006211180124,
72
+ "grad_norm": 0.13498322665691376,
73
+ "learning_rate": 4.461325966850829e-05,
74
+ "loss": 0.3082,
75
+ "step": 160
76
+ },
77
+ {
78
+ "epoch": 1.0807453416149069,
79
+ "grad_norm": 0.22494107484817505,
80
+ "learning_rate": 4.323204419889503e-05,
81
+ "loss": 0.0559,
82
+ "step": 180
83
+ },
84
+ {
85
+ "epoch": 1.1055900621118013,
86
+ "grad_norm": 0.5178938508033752,
87
+ "learning_rate": 4.1850828729281773e-05,
88
+ "loss": 0.2148,
89
+ "step": 200
90
+ },
91
+ {
92
+ "epoch": 1.1304347826086956,
93
+ "grad_norm": 0.1849067360162735,
94
+ "learning_rate": 4.046961325966851e-05,
95
+ "loss": 0.208,
96
+ "step": 220
97
+ },
98
+ {
99
+ "epoch": 1.1428571428571428,
100
+ "eval_accuracy": 0.7804878048780488,
101
+ "eval_loss": 1.0773290395736694,
102
+ "eval_runtime": 6.2229,
103
+ "eval_samples_per_second": 13.177,
104
+ "eval_steps_per_second": 3.375,
105
+ "step": 230
106
+ },
107
+ {
108
+ "epoch": 2.012422360248447,
109
+ "grad_norm": 2.8872368335723877,
110
+ "learning_rate": 3.9088397790055245e-05,
111
+ "loss": 0.1527,
112
+ "step": 240
113
+ },
114
+ {
115
+ "epoch": 2.0372670807453415,
116
+ "grad_norm": 0.04912768676877022,
117
+ "learning_rate": 3.770718232044199e-05,
118
+ "loss": 0.0707,
119
+ "step": 260
120
+ },
121
+ {
122
+ "epoch": 2.062111801242236,
123
+ "grad_norm": 0.0381060354411602,
124
+ "learning_rate": 3.632596685082873e-05,
125
+ "loss": 0.0086,
126
+ "step": 280
127
+ },
128
+ {
129
+ "epoch": 2.0869565217391304,
130
+ "grad_norm": 42.93180465698242,
131
+ "learning_rate": 3.4944751381215476e-05,
132
+ "loss": 0.022,
133
+ "step": 300
134
+ },
135
+ {
136
+ "epoch": 2.111801242236025,
137
+ "grad_norm": 9.570910453796387,
138
+ "learning_rate": 3.3563535911602215e-05,
139
+ "loss": 0.0579,
140
+ "step": 320
141
+ },
142
+ {
143
+ "epoch": 2.1366459627329193,
144
+ "grad_norm": 0.1904195100069046,
145
+ "learning_rate": 3.218232044198895e-05,
146
+ "loss": 0.0868,
147
+ "step": 340
148
+ },
149
+ {
150
+ "epoch": 2.142857142857143,
151
+ "eval_accuracy": 0.7317073170731707,
152
+ "eval_loss": 1.2267667055130005,
153
+ "eval_runtime": 6.118,
154
+ "eval_samples_per_second": 13.403,
155
+ "eval_steps_per_second": 3.432,
156
+ "step": 345
157
+ },
158
+ {
159
+ "epoch": 3.018633540372671,
160
+ "grad_norm": 0.011533539742231369,
161
+ "learning_rate": 3.0801104972375693e-05,
162
+ "loss": 0.0023,
163
+ "step": 360
164
+ },
165
+ {
166
+ "epoch": 3.0434782608695654,
167
+ "grad_norm": 0.018733657896518707,
168
+ "learning_rate": 2.9419889502762433e-05,
169
+ "loss": 0.0034,
170
+ "step": 380
171
+ },
172
+ {
173
+ "epoch": 3.0683229813664594,
174
+ "grad_norm": 0.02364545315504074,
175
+ "learning_rate": 2.8038674033149172e-05,
176
+ "loss": 0.0041,
177
+ "step": 400
178
+ },
179
+ {
180
+ "epoch": 3.093167701863354,
181
+ "grad_norm": 0.011309165507555008,
182
+ "learning_rate": 2.6657458563535914e-05,
183
+ "loss": 0.0014,
184
+ "step": 420
185
+ },
186
+ {
187
+ "epoch": 3.1180124223602483,
188
+ "grad_norm": 0.01748155988752842,
189
+ "learning_rate": 2.5276243093922653e-05,
190
+ "loss": 0.0042,
191
+ "step": 440
192
+ },
193
+ {
194
+ "epoch": 3.142857142857143,
195
+ "grad_norm": 0.025904180482029915,
196
+ "learning_rate": 2.3895027624309393e-05,
197
+ "loss": 0.0032,
198
+ "step": 460
199
+ },
200
+ {
201
+ "epoch": 3.142857142857143,
202
+ "eval_accuracy": 0.7317073170731707,
203
+ "eval_loss": 1.3057693243026733,
204
+ "eval_runtime": 6.2762,
205
+ "eval_samples_per_second": 13.065,
206
+ "eval_steps_per_second": 3.346,
207
+ "step": 460
208
+ },
209
+ {
210
+ "epoch": 4.024844720496894,
211
+ "grad_norm": 0.04104848951101303,
212
+ "learning_rate": 2.2513812154696135e-05,
213
+ "loss": 0.0012,
214
+ "step": 480
215
+ },
216
+ {
217
+ "epoch": 4.049689440993789,
218
+ "grad_norm": 0.013835583813488483,
219
+ "learning_rate": 2.1132596685082874e-05,
220
+ "loss": 0.0009,
221
+ "step": 500
222
+ },
223
+ {
224
+ "epoch": 4.074534161490683,
225
+ "grad_norm": 0.016223575919866562,
226
+ "learning_rate": 1.9751381215469613e-05,
227
+ "loss": 0.001,
228
+ "step": 520
229
+ },
230
+ {
231
+ "epoch": 4.099378881987578,
232
+ "grad_norm": 0.010322828777134418,
233
+ "learning_rate": 1.8370165745856356e-05,
234
+ "loss": 0.0008,
235
+ "step": 540
236
+ },
237
+ {
238
+ "epoch": 4.124223602484472,
239
+ "grad_norm": 0.012116617523133755,
240
+ "learning_rate": 1.6988950276243095e-05,
241
+ "loss": 0.0014,
242
+ "step": 560
243
+ },
244
+ {
245
+ "epoch": 4.142857142857143,
246
+ "eval_accuracy": 0.7317073170731707,
247
+ "eval_loss": 1.3161958456039429,
248
+ "eval_runtime": 6.3153,
249
+ "eval_samples_per_second": 12.984,
250
+ "eval_steps_per_second": 3.325,
251
+ "step": 575
252
+ },
253
+ {
254
+ "epoch": 5.0062111801242235,
255
+ "grad_norm": 0.012491249479353428,
256
+ "learning_rate": 1.5607734806629834e-05,
257
+ "loss": 0.0008,
258
+ "step": 580
259
+ },
260
+ {
261
+ "epoch": 5.031055900621118,
262
+ "grad_norm": 0.040033698081970215,
263
+ "learning_rate": 1.4226519337016575e-05,
264
+ "loss": 0.0007,
265
+ "step": 600
266
+ },
267
+ {
268
+ "epoch": 5.055900621118012,
269
+ "grad_norm": 0.010547437705099583,
270
+ "learning_rate": 1.2845303867403316e-05,
271
+ "loss": 0.0006,
272
+ "step": 620
273
+ },
274
+ {
275
+ "epoch": 5.080745341614906,
276
+ "grad_norm": 0.016931477934122086,
277
+ "learning_rate": 1.1464088397790055e-05,
278
+ "loss": 0.0009,
279
+ "step": 640
280
+ },
281
+ {
282
+ "epoch": 5.105590062111801,
283
+ "grad_norm": 0.021733667701482773,
284
+ "learning_rate": 1.0082872928176797e-05,
285
+ "loss": 0.0006,
286
+ "step": 660
287
+ },
288
+ {
289
+ "epoch": 5.130434782608695,
290
+ "grad_norm": 0.053632985800504684,
291
+ "learning_rate": 8.701657458563537e-06,
292
+ "loss": 0.0007,
293
+ "step": 680
294
+ },
295
+ {
296
+ "epoch": 5.142857142857143,
297
+ "eval_accuracy": 0.7317073170731707,
298
+ "eval_loss": 1.3207590579986572,
299
+ "eval_runtime": 6.2982,
300
+ "eval_samples_per_second": 13.02,
301
+ "eval_steps_per_second": 3.334,
302
+ "step": 690
303
+ },
304
+ {
305
+ "epoch": 6.012422360248447,
306
+ "grad_norm": 0.01116255484521389,
307
+ "learning_rate": 7.320441988950276e-06,
308
+ "loss": 0.0006,
309
+ "step": 700
310
+ },
311
+ {
312
+ "epoch": 6.037267080745342,
313
+ "grad_norm": 0.01857682503759861,
314
+ "learning_rate": 5.939226519337017e-06,
315
+ "loss": 0.0006,
316
+ "step": 720
317
+ },
318
+ {
319
+ "epoch": 6.062111801242236,
320
+ "grad_norm": 0.010129285044968128,
321
+ "learning_rate": 4.5580110497237574e-06,
322
+ "loss": 0.0006,
323
+ "step": 740
324
+ },
325
+ {
326
+ "epoch": 6.086956521739131,
327
+ "grad_norm": 0.007020850665867329,
328
+ "learning_rate": 3.1767955801104974e-06,
329
+ "loss": 0.0006,
330
+ "step": 760
331
+ },
332
+ {
333
+ "epoch": 6.111801242236025,
334
+ "grad_norm": 0.009550134651362896,
335
+ "learning_rate": 1.7955801104972376e-06,
336
+ "loss": 0.0006,
337
+ "step": 780
338
+ },
339
+ {
340
+ "epoch": 6.136645962732919,
341
+ "grad_norm": 0.013940293341875076,
342
+ "learning_rate": 4.143646408839779e-07,
343
+ "loss": 0.0007,
344
+ "step": 800
345
+ },
346
+ {
347
+ "epoch": 6.142857142857143,
348
+ "eval_accuracy": 0.7317073170731707,
349
+ "eval_loss": 1.3218822479248047,
350
+ "eval_runtime": 6.2549,
351
+ "eval_samples_per_second": 13.11,
352
+ "eval_steps_per_second": 3.357,
353
+ "step": 805
354
+ },
355
+ {
356
+ "epoch": 6.142857142857143,
357
+ "step": 805,
358
+ "total_flos": 4.012506890622075e+18,
359
+ "train_loss": 0.1384738716829036,
360
+ "train_runtime": 848.1252,
361
+ "train_samples_per_second": 3.797,
362
+ "train_steps_per_second": 0.949
363
+ },
364
+ {
365
+ "epoch": 6.142857142857143,
366
+ "eval_accuracy": 0.7804878048780488,
367
+ "eval_loss": 1.077329158782959,
368
+ "eval_runtime": 5.6706,
369
+ "eval_samples_per_second": 14.461,
370
+ "eval_steps_per_second": 3.703,
371
+ "step": 805
372
+ }
373
+ ],
374
+ "logging_steps": 20,
375
+ "max_steps": 805,
376
+ "num_input_tokens_seen": 0,
377
+ "num_train_epochs": 9223372036854775807,
378
+ "save_steps": 500,
379
+ "stateful_callbacks": {
380
+ "TrainerControl": {
381
+ "args": {
382
+ "should_epoch_stop": false,
383
+ "should_evaluate": false,
384
+ "should_log": false,
385
+ "should_save": true,
386
+ "should_training_stop": true
387
+ },
388
+ "attributes": {}
389
+ }
390
+ },
391
+ "total_flos": 4.012506890622075e+18,
392
+ "train_batch_size": 4,
393
+ "trial_name": null,
394
+ "trial_params": null
395
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c007e31d6c153b7ddf44f7a4662355c7cec244c581e5ea1720ae0f8fc18169
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acfa06c9a4d7a0d20f474dc00057d5fe4ef6648d6c36d35f2ad4dfe9c51a8a7b
3
  size 5368