miladfa7 commited on
Commit
1fc0236
·
verified ·
1 Parent(s): 895f2d1

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 6.142857142857143,
3
- "eval_accuracy": 0.7804878048780488,
4
- "eval_loss": 1.077329158782959,
5
- "eval_runtime": 5.6706,
6
- "eval_samples_per_second": 14.461,
7
- "eval_steps_per_second": 3.703
8
  }
 
1
  {
2
  "epoch": 6.142857142857143,
3
+ "eval_accuracy": 0.8028169014084507,
4
+ "eval_loss": 1.103053331375122,
5
+ "eval_runtime": 4.9334,
6
+ "eval_samples_per_second": 14.392,
7
+ "eval_steps_per_second": 3.649
8
  }
confusion_matrix.jpg CHANGED
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59a9352d6e9e3f904cadf0c8ec4c5f4d9e71257187590e4be3a56269450864ca
3
  size 344952716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f247e39984064745b1af387be3ee18ee964433ae6589b206412a2a943db465f
3
  size 344952716
runs/Aug11_13-45-55_prod3/events.out.tfevents.1754892098.prod3.753305.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a5d08465f532175aa78449cfb120674ef931279786f1bb4e3ee3678fe60c7b
3
+ size 411
runs/Aug11_14-13-04_prod3/events.out.tfevents.1754892793.prod3.788163.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1930c512d73dd86d2a2093b52f45f1ecce9826625f4c33bbee6eebe2f0e986d4
3
+ size 5487
runs/Aug11_14-14-16_prod3/events.out.tfevents.1754892863.prod3.789915.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32787120c0ace084a08491616cd320f007d6602d74558f7b38598207c052e965
3
+ size 6839
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 6.142857142857143,
3
- "eval_accuracy": 0.7804878048780488,
4
- "eval_loss": 1.077329158782959,
5
- "eval_runtime": 5.6706,
6
- "eval_samples_per_second": 14.461,
7
- "eval_steps_per_second": 3.703
8
  }
 
1
  {
2
  "epoch": 6.142857142857143,
3
+ "eval_accuracy": 0.8028169014084507,
4
+ "eval_loss": 1.103053331375122,
5
+ "eval_runtime": 4.9334,
6
+ "eval_samples_per_second": 14.392,
7
+ "eval_steps_per_second": 3.649
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 230,
3
- "best_metric": 0.7804878048780488,
4
- "best_model_checkpoint": "./Models/Matcha_clips_224_fintuned_5/checkpoint-230",
5
  "epoch": 6.142857142857143,
6
  "eval_steps": 500,
7
  "global_step": 805,
@@ -11,363 +11,363 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.024844720496894408,
14
- "grad_norm": 11.596962928771973,
15
  "learning_rate": 1.1728395061728396e-05,
16
- "loss": 1.6871,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.049689440993788817,
21
- "grad_norm": 15.900735855102539,
22
  "learning_rate": 2.4074074074074074e-05,
23
- "loss": 0.9615,
24
  "step": 40
25
  },
26
  {
27
  "epoch": 0.07453416149068323,
28
- "grad_norm": 4.670846939086914,
29
  "learning_rate": 3.6419753086419754e-05,
30
- "loss": 0.5851,
31
  "step": 60
32
  },
33
  {
34
  "epoch": 0.09937888198757763,
35
- "grad_norm": 6.628495216369629,
36
  "learning_rate": 4.876543209876544e-05,
37
- "loss": 0.3508,
38
  "step": 80
39
  },
40
  {
41
  "epoch": 0.12422360248447205,
42
- "grad_norm": 10.711517333984375,
43
  "learning_rate": 4.875690607734807e-05,
44
- "loss": 0.3922,
45
  "step": 100
46
  },
47
  {
48
  "epoch": 0.14285714285714285,
49
- "eval_accuracy": 0.7682926829268293,
50
- "eval_loss": 0.996225118637085,
51
- "eval_runtime": 6.4019,
52
- "eval_samples_per_second": 12.809,
53
- "eval_steps_per_second": 3.28,
54
  "step": 115
55
  },
56
  {
57
  "epoch": 1.0062111801242235,
58
- "grad_norm": 0.29712462425231934,
59
  "learning_rate": 4.737569060773481e-05,
60
- "loss": 0.1851,
61
  "step": 120
62
  },
63
  {
64
  "epoch": 1.031055900621118,
65
- "grad_norm": 3.168236494064331,
66
  "learning_rate": 4.599447513812155e-05,
67
- "loss": 0.1939,
68
  "step": 140
69
  },
70
  {
71
  "epoch": 1.0559006211180124,
72
- "grad_norm": 0.13498322665691376,
73
  "learning_rate": 4.461325966850829e-05,
74
- "loss": 0.3082,
75
  "step": 160
76
  },
77
  {
78
  "epoch": 1.0807453416149069,
79
- "grad_norm": 0.22494107484817505,
80
  "learning_rate": 4.323204419889503e-05,
81
- "loss": 0.0559,
82
  "step": 180
83
  },
84
  {
85
  "epoch": 1.1055900621118013,
86
- "grad_norm": 0.5178938508033752,
87
  "learning_rate": 4.1850828729281773e-05,
88
- "loss": 0.2148,
89
  "step": 200
90
  },
91
  {
92
  "epoch": 1.1304347826086956,
93
- "grad_norm": 0.1849067360162735,
94
  "learning_rate": 4.046961325966851e-05,
95
- "loss": 0.208,
96
  "step": 220
97
  },
98
  {
99
  "epoch": 1.1428571428571428,
100
- "eval_accuracy": 0.7804878048780488,
101
- "eval_loss": 1.0773290395736694,
102
- "eval_runtime": 6.2229,
103
- "eval_samples_per_second": 13.177,
104
- "eval_steps_per_second": 3.375,
105
  "step": 230
106
  },
107
  {
108
  "epoch": 2.012422360248447,
109
- "grad_norm": 2.8872368335723877,
110
  "learning_rate": 3.9088397790055245e-05,
111
- "loss": 0.1527,
112
  "step": 240
113
  },
114
  {
115
  "epoch": 2.0372670807453415,
116
- "grad_norm": 0.04912768676877022,
117
  "learning_rate": 3.770718232044199e-05,
118
- "loss": 0.0707,
119
  "step": 260
120
  },
121
  {
122
  "epoch": 2.062111801242236,
123
- "grad_norm": 0.0381060354411602,
124
  "learning_rate": 3.632596685082873e-05,
125
- "loss": 0.0086,
126
  "step": 280
127
  },
128
  {
129
  "epoch": 2.0869565217391304,
130
- "grad_norm": 42.93180465698242,
131
  "learning_rate": 3.4944751381215476e-05,
132
- "loss": 0.022,
133
  "step": 300
134
  },
135
  {
136
  "epoch": 2.111801242236025,
137
- "grad_norm": 9.570910453796387,
138
  "learning_rate": 3.3563535911602215e-05,
139
- "loss": 0.0579,
140
  "step": 320
141
  },
142
  {
143
  "epoch": 2.1366459627329193,
144
- "grad_norm": 0.1904195100069046,
145
  "learning_rate": 3.218232044198895e-05,
146
- "loss": 0.0868,
147
  "step": 340
148
  },
149
  {
150
  "epoch": 2.142857142857143,
151
- "eval_accuracy": 0.7317073170731707,
152
- "eval_loss": 1.2267667055130005,
153
- "eval_runtime": 6.118,
154
- "eval_samples_per_second": 13.403,
155
- "eval_steps_per_second": 3.432,
156
  "step": 345
157
  },
158
  {
159
  "epoch": 3.018633540372671,
160
- "grad_norm": 0.011533539742231369,
161
  "learning_rate": 3.0801104972375693e-05,
162
- "loss": 0.0023,
163
  "step": 360
164
  },
165
  {
166
  "epoch": 3.0434782608695654,
167
- "grad_norm": 0.018733657896518707,
168
  "learning_rate": 2.9419889502762433e-05,
169
- "loss": 0.0034,
170
  "step": 380
171
  },
172
  {
173
  "epoch": 3.0683229813664594,
174
- "grad_norm": 0.02364545315504074,
175
  "learning_rate": 2.8038674033149172e-05,
176
- "loss": 0.0041,
177
  "step": 400
178
  },
179
  {
180
  "epoch": 3.093167701863354,
181
- "grad_norm": 0.011309165507555008,
182
  "learning_rate": 2.6657458563535914e-05,
183
- "loss": 0.0014,
184
  "step": 420
185
  },
186
  {
187
  "epoch": 3.1180124223602483,
188
- "grad_norm": 0.01748155988752842,
189
  "learning_rate": 2.5276243093922653e-05,
190
- "loss": 0.0042,
191
  "step": 440
192
  },
193
  {
194
  "epoch": 3.142857142857143,
195
- "grad_norm": 0.025904180482029915,
196
  "learning_rate": 2.3895027624309393e-05,
197
- "loss": 0.0032,
198
  "step": 460
199
  },
200
  {
201
  "epoch": 3.142857142857143,
202
- "eval_accuracy": 0.7317073170731707,
203
- "eval_loss": 1.3057693243026733,
204
- "eval_runtime": 6.2762,
205
- "eval_samples_per_second": 13.065,
206
- "eval_steps_per_second": 3.346,
207
  "step": 460
208
  },
209
  {
210
  "epoch": 4.024844720496894,
211
- "grad_norm": 0.04104848951101303,
212
  "learning_rate": 2.2513812154696135e-05,
213
  "loss": 0.0012,
214
  "step": 480
215
  },
216
  {
217
  "epoch": 4.049689440993789,
218
- "grad_norm": 0.013835583813488483,
219
  "learning_rate": 2.1132596685082874e-05,
220
  "loss": 0.0009,
221
  "step": 500
222
  },
223
  {
224
  "epoch": 4.074534161490683,
225
- "grad_norm": 0.016223575919866562,
226
  "learning_rate": 1.9751381215469613e-05,
227
- "loss": 0.001,
228
  "step": 520
229
  },
230
  {
231
  "epoch": 4.099378881987578,
232
- "grad_norm": 0.010322828777134418,
233
  "learning_rate": 1.8370165745856356e-05,
234
- "loss": 0.0008,
235
  "step": 540
236
  },
237
  {
238
  "epoch": 4.124223602484472,
239
- "grad_norm": 0.012116617523133755,
240
  "learning_rate": 1.6988950276243095e-05,
241
- "loss": 0.0014,
242
  "step": 560
243
  },
244
  {
245
  "epoch": 4.142857142857143,
246
- "eval_accuracy": 0.7317073170731707,
247
- "eval_loss": 1.3161958456039429,
248
- "eval_runtime": 6.3153,
249
- "eval_samples_per_second": 12.984,
250
- "eval_steps_per_second": 3.325,
251
  "step": 575
252
  },
253
  {
254
  "epoch": 5.0062111801242235,
255
- "grad_norm": 0.012491249479353428,
256
  "learning_rate": 1.5607734806629834e-05,
257
- "loss": 0.0008,
258
  "step": 580
259
  },
260
  {
261
  "epoch": 5.031055900621118,
262
- "grad_norm": 0.040033698081970215,
263
  "learning_rate": 1.4226519337016575e-05,
264
  "loss": 0.0007,
265
  "step": 600
266
  },
267
  {
268
  "epoch": 5.055900621118012,
269
- "grad_norm": 0.010547437705099583,
270
  "learning_rate": 1.2845303867403316e-05,
271
- "loss": 0.0006,
272
  "step": 620
273
  },
274
  {
275
  "epoch": 5.080745341614906,
276
- "grad_norm": 0.016931477934122086,
277
  "learning_rate": 1.1464088397790055e-05,
278
- "loss": 0.0009,
279
  "step": 640
280
  },
281
  {
282
  "epoch": 5.105590062111801,
283
- "grad_norm": 0.021733667701482773,
284
  "learning_rate": 1.0082872928176797e-05,
285
  "loss": 0.0006,
286
  "step": 660
287
  },
288
  {
289
  "epoch": 5.130434782608695,
290
- "grad_norm": 0.053632985800504684,
291
  "learning_rate": 8.701657458563537e-06,
292
- "loss": 0.0007,
293
  "step": 680
294
  },
295
  {
296
  "epoch": 5.142857142857143,
297
- "eval_accuracy": 0.7317073170731707,
298
- "eval_loss": 1.3207590579986572,
299
- "eval_runtime": 6.2982,
300
- "eval_samples_per_second": 13.02,
301
- "eval_steps_per_second": 3.334,
302
  "step": 690
303
  },
304
  {
305
  "epoch": 6.012422360248447,
306
- "grad_norm": 0.01116255484521389,
307
  "learning_rate": 7.320441988950276e-06,
308
  "loss": 0.0006,
309
  "step": 700
310
  },
311
  {
312
  "epoch": 6.037267080745342,
313
- "grad_norm": 0.01857682503759861,
314
  "learning_rate": 5.939226519337017e-06,
315
  "loss": 0.0006,
316
  "step": 720
317
  },
318
  {
319
  "epoch": 6.062111801242236,
320
- "grad_norm": 0.010129285044968128,
321
  "learning_rate": 4.5580110497237574e-06,
322
  "loss": 0.0006,
323
  "step": 740
324
  },
325
  {
326
  "epoch": 6.086956521739131,
327
- "grad_norm": 0.007020850665867329,
328
  "learning_rate": 3.1767955801104974e-06,
329
  "loss": 0.0006,
330
  "step": 760
331
  },
332
  {
333
  "epoch": 6.111801242236025,
334
- "grad_norm": 0.009550134651362896,
335
  "learning_rate": 1.7955801104972376e-06,
336
  "loss": 0.0006,
337
  "step": 780
338
  },
339
  {
340
  "epoch": 6.136645962732919,
341
- "grad_norm": 0.013940293341875076,
342
  "learning_rate": 4.143646408839779e-07,
343
  "loss": 0.0007,
344
  "step": 800
345
  },
346
  {
347
  "epoch": 6.142857142857143,
348
- "eval_accuracy": 0.7317073170731707,
349
- "eval_loss": 1.3218822479248047,
350
- "eval_runtime": 6.2549,
351
- "eval_samples_per_second": 13.11,
352
- "eval_steps_per_second": 3.357,
353
  "step": 805
354
  },
355
  {
356
  "epoch": 6.142857142857143,
357
  "step": 805,
358
  "total_flos": 4.012506890622075e+18,
359
- "train_loss": 0.1384738716829036,
360
- "train_runtime": 848.1252,
361
- "train_samples_per_second": 3.797,
362
- "train_steps_per_second": 0.949
363
  },
364
  {
365
  "epoch": 6.142857142857143,
366
- "eval_accuracy": 0.7804878048780488,
367
- "eval_loss": 1.077329158782959,
368
- "eval_runtime": 5.6706,
369
- "eval_samples_per_second": 14.461,
370
- "eval_steps_per_second": 3.703,
371
  "step": 805
372
  }
373
  ],
 
1
  {
2
+ "best_global_step": 460,
3
+ "best_metric": 0.782051282051282,
4
+ "best_model_checkpoint": "./Models/Matcha_clips_224_fintuned_5/checkpoint-460",
5
  "epoch": 6.142857142857143,
6
  "eval_steps": 500,
7
  "global_step": 805,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.024844720496894408,
14
+ "grad_norm": 11.83416748046875,
15
  "learning_rate": 1.1728395061728396e-05,
16
+ "loss": 1.7545,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.049689440993788817,
21
+ "grad_norm": 14.771072387695312,
22
  "learning_rate": 2.4074074074074074e-05,
23
+ "loss": 1.0393,
24
  "step": 40
25
  },
26
  {
27
  "epoch": 0.07453416149068323,
28
+ "grad_norm": 3.698537826538086,
29
  "learning_rate": 3.6419753086419754e-05,
30
+ "loss": 0.6203,
31
  "step": 60
32
  },
33
  {
34
  "epoch": 0.09937888198757763,
35
+ "grad_norm": 1.2401633262634277,
36
  "learning_rate": 4.876543209876544e-05,
37
+ "loss": 0.3541,
38
  "step": 80
39
  },
40
  {
41
  "epoch": 0.12422360248447205,
42
+ "grad_norm": 27.550607681274414,
43
  "learning_rate": 4.875690607734807e-05,
44
+ "loss": 0.4705,
45
  "step": 100
46
  },
47
  {
48
  "epoch": 0.14285714285714285,
49
+ "eval_accuracy": 0.7317073170731707,
50
+ "eval_loss": 1.0046272277832031,
51
+ "eval_runtime": 6.2101,
52
+ "eval_samples_per_second": 13.204,
53
+ "eval_steps_per_second": 3.382,
54
  "step": 115
55
  },
56
  {
57
  "epoch": 1.0062111801242235,
58
+ "grad_norm": 0.25678861141204834,
59
  "learning_rate": 4.737569060773481e-05,
60
+ "loss": 0.224,
61
  "step": 120
62
  },
63
  {
64
  "epoch": 1.031055900621118,
65
+ "grad_norm": 0.9883336424827576,
66
  "learning_rate": 4.599447513812155e-05,
67
+ "loss": 0.1258,
68
  "step": 140
69
  },
70
  {
71
  "epoch": 1.0559006211180124,
72
+ "grad_norm": 0.18707028031349182,
73
  "learning_rate": 4.461325966850829e-05,
74
+ "loss": 0.2609,
75
  "step": 160
76
  },
77
  {
78
  "epoch": 1.0807453416149069,
79
+ "grad_norm": 0.08376210927963257,
80
  "learning_rate": 4.323204419889503e-05,
81
+ "loss": 0.049,
82
  "step": 180
83
  },
84
  {
85
  "epoch": 1.1055900621118013,
86
+ "grad_norm": 0.08791894465684891,
87
  "learning_rate": 4.1850828729281773e-05,
88
+ "loss": 0.1955,
89
  "step": 200
90
  },
91
  {
92
  "epoch": 1.1304347826086956,
93
+ "grad_norm": 0.12012392282485962,
94
  "learning_rate": 4.046961325966851e-05,
95
+ "loss": 0.1493,
96
  "step": 220
97
  },
98
  {
99
  "epoch": 1.1428571428571428,
100
+ "eval_accuracy": 0.7560975609756098,
101
+ "eval_loss": 1.226146936416626,
102
+ "eval_runtime": 6.1393,
103
+ "eval_samples_per_second": 13.357,
104
+ "eval_steps_per_second": 3.421,
105
  "step": 230
106
  },
107
  {
108
  "epoch": 2.012422360248447,
109
+ "grad_norm": 0.09424888342618942,
110
  "learning_rate": 3.9088397790055245e-05,
111
+ "loss": 0.1158,
112
  "step": 240
113
  },
114
  {
115
  "epoch": 2.0372670807453415,
116
+ "grad_norm": 0.05555570125579834,
117
  "learning_rate": 3.770718232044199e-05,
118
+ "loss": 0.0235,
119
  "step": 260
120
  },
121
  {
122
  "epoch": 2.062111801242236,
123
+ "grad_norm": 0.02632896415889263,
124
  "learning_rate": 3.632596685082873e-05,
125
+ "loss": 0.0072,
126
  "step": 280
127
  },
128
  {
129
  "epoch": 2.0869565217391304,
130
+ "grad_norm": 1.1457473039627075,
131
  "learning_rate": 3.4944751381215476e-05,
132
+ "loss": 0.0063,
133
  "step": 300
134
  },
135
  {
136
  "epoch": 2.111801242236025,
137
+ "grad_norm": 1.7765306234359741,
138
  "learning_rate": 3.3563535911602215e-05,
139
+ "loss": 0.0898,
140
  "step": 320
141
  },
142
  {
143
  "epoch": 2.1366459627329193,
144
+ "grad_norm": 0.04399551451206207,
145
  "learning_rate": 3.218232044198895e-05,
146
+ "loss": 0.0965,
147
  "step": 340
148
  },
149
  {
150
  "epoch": 2.142857142857143,
151
+ "eval_accuracy": 0.7692307692307693,
152
+ "eval_loss": 1.1611318588256836,
153
+ "eval_runtime": 5.888,
154
+ "eval_samples_per_second": 13.247,
155
+ "eval_steps_per_second": 3.397,
156
  "step": 345
157
  },
158
  {
159
  "epoch": 3.018633540372671,
160
+ "grad_norm": 0.022856663912534714,
161
  "learning_rate": 3.0801104972375693e-05,
162
+ "loss": 0.0181,
163
  "step": 360
164
  },
165
  {
166
  "epoch": 3.0434782608695654,
167
+ "grad_norm": 0.019706225022673607,
168
  "learning_rate": 2.9419889502762433e-05,
169
+ "loss": 0.002,
170
  "step": 380
171
  },
172
  {
173
  "epoch": 3.0683229813664594,
174
+ "grad_norm": 0.028400592505931854,
175
  "learning_rate": 2.8038674033149172e-05,
176
+ "loss": 0.0045,
177
  "step": 400
178
  },
179
  {
180
  "epoch": 3.093167701863354,
181
+ "grad_norm": 0.01030923891812563,
182
  "learning_rate": 2.6657458563535914e-05,
183
+ "loss": 0.0011,
184
  "step": 420
185
  },
186
  {
187
  "epoch": 3.1180124223602483,
188
+ "grad_norm": 0.10795993357896805,
189
  "learning_rate": 2.5276243093922653e-05,
190
+ "loss": 0.1063,
191
  "step": 440
192
  },
193
  {
194
  "epoch": 3.142857142857143,
195
+ "grad_norm": 0.020210983231663704,
196
  "learning_rate": 2.3895027624309393e-05,
197
+ "loss": 0.0018,
198
  "step": 460
199
  },
200
  {
201
  "epoch": 3.142857142857143,
202
+ "eval_accuracy": 0.782051282051282,
203
+ "eval_loss": 1.2064043283462524,
204
+ "eval_runtime": 5.8576,
205
+ "eval_samples_per_second": 13.316,
206
+ "eval_steps_per_second": 3.414,
207
  "step": 460
208
  },
209
  {
210
  "epoch": 4.024844720496894,
211
+ "grad_norm": 0.029298782348632812,
212
  "learning_rate": 2.2513812154696135e-05,
213
  "loss": 0.0012,
214
  "step": 480
215
  },
216
  {
217
  "epoch": 4.049689440993789,
218
+ "grad_norm": 0.014873038977384567,
219
  "learning_rate": 2.1132596685082874e-05,
220
  "loss": 0.0009,
221
  "step": 500
222
  },
223
  {
224
  "epoch": 4.074534161490683,
225
+ "grad_norm": 0.011718147434294224,
226
  "learning_rate": 1.9751381215469613e-05,
227
+ "loss": 0.0008,
228
  "step": 520
229
  },
230
  {
231
  "epoch": 4.099378881987578,
232
+ "grad_norm": 0.010744371451437473,
233
  "learning_rate": 1.8370165745856356e-05,
234
+ "loss": 0.0012,
235
  "step": 540
236
  },
237
  {
238
  "epoch": 4.124223602484472,
239
+ "grad_norm": 0.014341841451823711,
240
  "learning_rate": 1.6988950276243095e-05,
241
+ "loss": 0.012,
242
  "step": 560
243
  },
244
  {
245
  "epoch": 4.142857142857143,
246
+ "eval_accuracy": 0.782051282051282,
247
+ "eval_loss": 1.1255377531051636,
248
+ "eval_runtime": 6.0116,
249
+ "eval_samples_per_second": 12.975,
250
+ "eval_steps_per_second": 3.327,
251
  "step": 575
252
  },
253
  {
254
  "epoch": 5.0062111801242235,
255
+ "grad_norm": 0.016358228400349617,
256
  "learning_rate": 1.5607734806629834e-05,
257
+ "loss": 0.0007,
258
  "step": 580
259
  },
260
  {
261
  "epoch": 5.031055900621118,
262
+ "grad_norm": 0.03938188776373863,
263
  "learning_rate": 1.4226519337016575e-05,
264
  "loss": 0.0007,
265
  "step": 600
266
  },
267
  {
268
  "epoch": 5.055900621118012,
269
+ "grad_norm": 0.00977695919573307,
270
  "learning_rate": 1.2845303867403316e-05,
271
+ "loss": 0.0007,
272
  "step": 620
273
  },
274
  {
275
  "epoch": 5.080745341614906,
276
+ "grad_norm": 0.014728990383446217,
277
  "learning_rate": 1.1464088397790055e-05,
278
+ "loss": 0.0008,
279
  "step": 640
280
  },
281
  {
282
  "epoch": 5.105590062111801,
283
+ "grad_norm": 0.016683587804436684,
284
  "learning_rate": 1.0082872928176797e-05,
285
  "loss": 0.0006,
286
  "step": 660
287
  },
288
  {
289
  "epoch": 5.130434782608695,
290
+ "grad_norm": 0.038530658930540085,
291
  "learning_rate": 8.701657458563537e-06,
292
+ "loss": 0.0009,
293
  "step": 680
294
  },
295
  {
296
  "epoch": 5.142857142857143,
297
+ "eval_accuracy": 0.782051282051282,
298
+ "eval_loss": 1.0889217853546143,
299
+ "eval_runtime": 6.0113,
300
+ "eval_samples_per_second": 12.976,
301
+ "eval_steps_per_second": 3.327,
302
  "step": 690
303
  },
304
  {
305
  "epoch": 6.012422360248447,
306
+ "grad_norm": 0.007233364041894674,
307
  "learning_rate": 7.320441988950276e-06,
308
  "loss": 0.0006,
309
  "step": 700
310
  },
311
  {
312
  "epoch": 6.037267080745342,
313
+ "grad_norm": 0.01591545157134533,
314
  "learning_rate": 5.939226519337017e-06,
315
  "loss": 0.0006,
316
  "step": 720
317
  },
318
  {
319
  "epoch": 6.062111801242236,
320
+ "grad_norm": 0.010462045669555664,
321
  "learning_rate": 4.5580110497237574e-06,
322
  "loss": 0.0006,
323
  "step": 740
324
  },
325
  {
326
  "epoch": 6.086956521739131,
327
+ "grad_norm": 0.006039341911673546,
328
  "learning_rate": 3.1767955801104974e-06,
329
  "loss": 0.0006,
330
  "step": 760
331
  },
332
  {
333
  "epoch": 6.111801242236025,
334
+ "grad_norm": 0.010201127268373966,
335
  "learning_rate": 1.7955801104972376e-06,
336
  "loss": 0.0006,
337
  "step": 780
338
  },
339
  {
340
  "epoch": 6.136645962732919,
341
+ "grad_norm": 0.013513385318219662,
342
  "learning_rate": 4.143646408839779e-07,
343
  "loss": 0.0007,
344
  "step": 800
345
  },
346
  {
347
  "epoch": 6.142857142857143,
348
+ "eval_accuracy": 0.7702702702702703,
349
+ "eval_loss": 1.14006769657135,
350
+ "eval_runtime": 5.5871,
351
+ "eval_samples_per_second": 13.245,
352
+ "eval_steps_per_second": 3.401,
353
  "step": 805
354
  },
355
  {
356
  "epoch": 6.142857142857143,
357
  "step": 805,
358
  "total_flos": 4.012506890622075e+18,
359
+ "train_loss": 0.1426203187803813,
360
+ "train_runtime": 889.4005,
361
+ "train_samples_per_second": 3.62,
362
+ "train_steps_per_second": 0.905
363
  },
364
  {
365
  "epoch": 6.142857142857143,
366
+ "eval_accuracy": 0.8028169014084507,
367
+ "eval_loss": 1.103053331375122,
368
+ "eval_runtime": 4.9334,
369
+ "eval_samples_per_second": 14.392,
370
+ "eval_steps_per_second": 3.649,
371
  "step": 805
372
  }
373
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acfa06c9a4d7a0d20f474dc00057d5fe4ef6648d6c36d35f2ad4dfe9c51a8a7b
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cadfece94be4587a2a84d50343b8e2befa8b9d8af2d646cdedf5a702a909129
3
  size 5368