DanJoshua commited on
Commit
461a6c5
·
verified ·
1 Parent(s): b83c3f7

End of training

Browse files
README.md CHANGED
@@ -18,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.5304
22
- - Accuracy: 0.8438
23
- - F1: 0.8410
24
- - Precision: 0.8692
25
 
26
  ## Model description
27
 
 
18
 
19
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.3164
22
+ - Accuracy: 0.895
23
+ - F1: 0.8950
24
+ - Precision: 0.8952
25
 
26
  ## Model description
27
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.1,
3
+ "eval_accuracy": 0.895,
4
+ "eval_f1": 0.894989498949895,
5
+ "eval_loss": 0.3164408206939697,
6
+ "eval_precision": 0.8951580632252901,
7
+ "eval_runtime": 659.8661,
8
+ "eval_samples_per_second": 1.212,
9
+ "eval_steps_per_second": 0.152,
10
+ "total_flos": 0.0,
11
+ "train_loss": 0.6979074507727658,
12
+ "train_runtime": 5881.3964,
13
+ "train_samples_per_second": 2.584,
14
+ "train_steps_per_second": 0.323
15
+ }
runs/Aug23_21-13-25_6d69d878f876/events.out.tfevents.1724454156.6d69d878f876.352.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b63ef8942b0193caa0a1528edd66965ee6e68e617a646778609f09dc6de10b5
3
+ size 510
test_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.1,
3
+ "eval_accuracy": 0.895,
4
+ "eval_f1": 0.894989498949895,
5
+ "eval_loss": 0.3164408206939697,
6
+ "eval_precision": 0.8951580632252901,
7
+ "eval_runtime": 659.8661,
8
+ "eval_samples_per_second": 1.212,
9
+ "eval_steps_per_second": 0.152
10
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.1,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6979074507727658,
5
+ "train_runtime": 5881.3964,
6
+ "train_samples_per_second": 2.584,
7
+ "train_steps_per_second": 0.323
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.84375,
3
+ "best_model_checkpoint": "mvit_v1_b_Kinetics400_transf_c_rwf2000/checkpoint-760",
4
+ "epoch": 6.1,
5
+ "eval_steps": 500,
6
+ "global_step": 1330,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.005263157894736842,
13
+ "grad_norm": 20.39082908630371,
14
+ "learning_rate": 9.947368421052632e-06,
15
+ "loss": 6.5828,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.010526315789473684,
20
+ "grad_norm": 30.1745548248291,
21
+ "learning_rate": 9.894736842105264e-06,
22
+ "loss": 6.3269,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.015789473684210527,
27
+ "grad_norm": 21.304548263549805,
28
+ "learning_rate": 9.842105263157896e-06,
29
+ "loss": 5.5401,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.021052631578947368,
34
+ "grad_norm": 34.847957611083984,
35
+ "learning_rate": 9.789473684210527e-06,
36
+ "loss": 5.1716,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.02631578947368421,
41
+ "grad_norm": 29.04662322998047,
42
+ "learning_rate": 9.736842105263159e-06,
43
+ "loss": 4.3721,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.031578947368421054,
48
+ "grad_norm": 31.47679901123047,
49
+ "learning_rate": 9.68421052631579e-06,
50
+ "loss": 4.0561,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.03684210526315789,
55
+ "grad_norm": 30.9488468170166,
56
+ "learning_rate": 9.631578947368422e-06,
57
+ "loss": 3.3662,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.042105263157894736,
62
+ "grad_norm": 23.594987869262695,
63
+ "learning_rate": 9.578947368421054e-06,
64
+ "loss": 1.9555,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.04736842105263158,
69
+ "grad_norm": 29.11677360534668,
70
+ "learning_rate": 9.526315789473684e-06,
71
+ "loss": 1.8569,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.05263157894736842,
76
+ "grad_norm": 37.965518951416016,
77
+ "learning_rate": 9.473684210526315e-06,
78
+ "loss": 1.514,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.05789473684210526,
83
+ "grad_norm": 13.468791961669922,
84
+ "learning_rate": 9.421052631578949e-06,
85
+ "loss": 0.9838,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.06315789473684211,
90
+ "grad_norm": 30.70915412902832,
91
+ "learning_rate": 9.36842105263158e-06,
92
+ "loss": 0.7295,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.06842105263157895,
97
+ "grad_norm": 13.8804292678833,
98
+ "learning_rate": 9.315789473684212e-06,
99
+ "loss": 0.6841,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.07368421052631578,
104
+ "grad_norm": 17.479394912719727,
105
+ "learning_rate": 9.263157894736842e-06,
106
+ "loss": 0.9459,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.07894736842105263,
111
+ "grad_norm": 8.364581108093262,
112
+ "learning_rate": 9.210526315789474e-06,
113
+ "loss": 0.7621,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.08421052631578947,
118
+ "grad_norm": 15.02173137664795,
119
+ "learning_rate": 9.157894736842105e-06,
120
+ "loss": 0.6913,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.08947368421052632,
125
+ "grad_norm": 23.994060516357422,
126
+ "learning_rate": 9.105263157894739e-06,
127
+ "loss": 0.6554,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.09473684210526316,
132
+ "grad_norm": 13.736618041992188,
133
+ "learning_rate": 9.05263157894737e-06,
134
+ "loss": 0.746,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.1,
139
+ "grad_norm": 7.4077558517456055,
140
+ "learning_rate": 9e-06,
141
+ "loss": 0.4837,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.1,
146
+ "eval_accuracy": 0.66875,
147
+ "eval_f1": 0.6374674019922192,
148
+ "eval_loss": 0.9726358652114868,
149
+ "eval_precision": 0.7576950608446671,
150
+ "eval_runtime": 70.973,
151
+ "eval_samples_per_second": 2.254,
152
+ "eval_steps_per_second": 0.282,
153
+ "step": 190
154
+ },
155
+ {
156
+ "epoch": 1.0052631578947369,
157
+ "grad_norm": 29.950836181640625,
158
+ "learning_rate": 8.947368421052632e-06,
159
+ "loss": 0.7972,
160
+ "step": 200
161
+ },
162
+ {
163
+ "epoch": 1.0105263157894737,
164
+ "grad_norm": 7.4079694747924805,
165
+ "learning_rate": 8.894736842105264e-06,
166
+ "loss": 0.5114,
167
+ "step": 210
168
+ },
169
+ {
170
+ "epoch": 1.0157894736842106,
171
+ "grad_norm": 13.153223991394043,
172
+ "learning_rate": 8.842105263157895e-06,
173
+ "loss": 0.427,
174
+ "step": 220
175
+ },
176
+ {
177
+ "epoch": 1.0210526315789474,
178
+ "grad_norm": 23.603761672973633,
179
+ "learning_rate": 8.789473684210527e-06,
180
+ "loss": 0.8155,
181
+ "step": 230
182
+ },
183
+ {
184
+ "epoch": 1.0263157894736843,
185
+ "grad_norm": 12.597455024719238,
186
+ "learning_rate": 8.736842105263158e-06,
187
+ "loss": 0.7769,
188
+ "step": 240
189
+ },
190
+ {
191
+ "epoch": 1.0315789473684212,
192
+ "grad_norm": 12.40769100189209,
193
+ "learning_rate": 8.68421052631579e-06,
194
+ "loss": 0.6717,
195
+ "step": 250
196
+ },
197
+ {
198
+ "epoch": 1.0368421052631578,
199
+ "grad_norm": 7.567124366760254,
200
+ "learning_rate": 8.631578947368422e-06,
201
+ "loss": 0.5306,
202
+ "step": 260
203
+ },
204
+ {
205
+ "epoch": 1.0421052631578946,
206
+ "grad_norm": 16.219642639160156,
207
+ "learning_rate": 8.578947368421053e-06,
208
+ "loss": 0.4013,
209
+ "step": 270
210
+ },
211
+ {
212
+ "epoch": 1.0473684210526315,
213
+ "grad_norm": 15.891722679138184,
214
+ "learning_rate": 8.526315789473685e-06,
215
+ "loss": 0.473,
216
+ "step": 280
217
+ },
218
+ {
219
+ "epoch": 1.0526315789473684,
220
+ "grad_norm": 12.381929397583008,
221
+ "learning_rate": 8.473684210526317e-06,
222
+ "loss": 0.5054,
223
+ "step": 290
224
+ },
225
+ {
226
+ "epoch": 1.0578947368421052,
227
+ "grad_norm": 3.8112006187438965,
228
+ "learning_rate": 8.421052631578948e-06,
229
+ "loss": 0.4146,
230
+ "step": 300
231
+ },
232
+ {
233
+ "epoch": 1.063157894736842,
234
+ "grad_norm": 12.637545585632324,
235
+ "learning_rate": 8.36842105263158e-06,
236
+ "loss": 0.483,
237
+ "step": 310
238
+ },
239
+ {
240
+ "epoch": 1.068421052631579,
241
+ "grad_norm": 16.767824172973633,
242
+ "learning_rate": 8.315789473684212e-06,
243
+ "loss": 0.4881,
244
+ "step": 320
245
+ },
246
+ {
247
+ "epoch": 1.0736842105263158,
248
+ "grad_norm": 20.965835571289062,
249
+ "learning_rate": 8.263157894736843e-06,
250
+ "loss": 0.4277,
251
+ "step": 330
252
+ },
253
+ {
254
+ "epoch": 1.0789473684210527,
255
+ "grad_norm": 15.695181846618652,
256
+ "learning_rate": 8.210526315789475e-06,
257
+ "loss": 0.4985,
258
+ "step": 340
259
+ },
260
+ {
261
+ "epoch": 1.0842105263157895,
262
+ "grad_norm": 5.402801513671875,
263
+ "learning_rate": 8.157894736842106e-06,
264
+ "loss": 0.6222,
265
+ "step": 350
266
+ },
267
+ {
268
+ "epoch": 1.0894736842105264,
269
+ "grad_norm": 13.765841484069824,
270
+ "learning_rate": 8.105263157894736e-06,
271
+ "loss": 0.4847,
272
+ "step": 360
273
+ },
274
+ {
275
+ "epoch": 1.0947368421052632,
276
+ "grad_norm": 12.656335830688477,
277
+ "learning_rate": 8.052631578947368e-06,
278
+ "loss": 0.4337,
279
+ "step": 370
280
+ },
281
+ {
282
+ "epoch": 1.1,
283
+ "grad_norm": 9.778788566589355,
284
+ "learning_rate": 8.000000000000001e-06,
285
+ "loss": 0.4771,
286
+ "step": 380
287
+ },
288
+ {
289
+ "epoch": 1.1,
290
+ "eval_accuracy": 0.8375,
291
+ "eval_f1": 0.8365807668133248,
292
+ "eval_loss": 0.4597933888435364,
293
+ "eval_precision": 0.8452685421994885,
294
+ "eval_runtime": 70.3913,
295
+ "eval_samples_per_second": 2.273,
296
+ "eval_steps_per_second": 0.284,
297
+ "step": 380
298
+ },
299
+ {
300
+ "epoch": 2.0052631578947366,
301
+ "grad_norm": 3.884115695953369,
302
+ "learning_rate": 7.947368421052633e-06,
303
+ "loss": 0.4156,
304
+ "step": 390
305
+ },
306
+ {
307
+ "epoch": 2.0105263157894737,
308
+ "grad_norm": 13.59132194519043,
309
+ "learning_rate": 7.894736842105265e-06,
310
+ "loss": 0.4021,
311
+ "step": 400
312
+ },
313
+ {
314
+ "epoch": 2.0157894736842104,
315
+ "grad_norm": 8.34617805480957,
316
+ "learning_rate": 7.842105263157895e-06,
317
+ "loss": 0.3354,
318
+ "step": 410
319
+ },
320
+ {
321
+ "epoch": 2.0210526315789474,
322
+ "grad_norm": 8.853759765625,
323
+ "learning_rate": 7.789473684210526e-06,
324
+ "loss": 0.4284,
325
+ "step": 420
326
+ },
327
+ {
328
+ "epoch": 2.026315789473684,
329
+ "grad_norm": 2.9599461555480957,
330
+ "learning_rate": 7.736842105263158e-06,
331
+ "loss": 0.399,
332
+ "step": 430
333
+ },
334
+ {
335
+ "epoch": 2.031578947368421,
336
+ "grad_norm": 8.018070220947266,
337
+ "learning_rate": 7.68421052631579e-06,
338
+ "loss": 0.5831,
339
+ "step": 440
340
+ },
341
+ {
342
+ "epoch": 2.036842105263158,
343
+ "grad_norm": 6.710073947906494,
344
+ "learning_rate": 7.631578947368423e-06,
345
+ "loss": 0.2981,
346
+ "step": 450
347
+ },
348
+ {
349
+ "epoch": 2.042105263157895,
350
+ "grad_norm": 2.070084810256958,
351
+ "learning_rate": 7.578947368421054e-06,
352
+ "loss": 0.4386,
353
+ "step": 460
354
+ },
355
+ {
356
+ "epoch": 2.0473684210526315,
357
+ "grad_norm": 5.736349105834961,
358
+ "learning_rate": 7.526315789473685e-06,
359
+ "loss": 0.3822,
360
+ "step": 470
361
+ },
362
+ {
363
+ "epoch": 2.0526315789473686,
364
+ "grad_norm": 11.180822372436523,
365
+ "learning_rate": 7.473684210526316e-06,
366
+ "loss": 0.3862,
367
+ "step": 480
368
+ },
369
+ {
370
+ "epoch": 2.057894736842105,
371
+ "grad_norm": 8.966133117675781,
372
+ "learning_rate": 7.421052631578948e-06,
373
+ "loss": 0.5257,
374
+ "step": 490
375
+ },
376
+ {
377
+ "epoch": 2.0631578947368423,
378
+ "grad_norm": 14.565701484680176,
379
+ "learning_rate": 7.368421052631579e-06,
380
+ "loss": 0.3742,
381
+ "step": 500
382
+ },
383
+ {
384
+ "epoch": 2.068421052631579,
385
+ "grad_norm": 10.042845726013184,
386
+ "learning_rate": 7.315789473684212e-06,
387
+ "loss": 0.3909,
388
+ "step": 510
389
+ },
390
+ {
391
+ "epoch": 2.0736842105263156,
392
+ "grad_norm": 10.890286445617676,
393
+ "learning_rate": 7.263157894736843e-06,
394
+ "loss": 0.5469,
395
+ "step": 520
396
+ },
397
+ {
398
+ "epoch": 2.0789473684210527,
399
+ "grad_norm": 5.286346435546875,
400
+ "learning_rate": 7.210526315789474e-06,
401
+ "loss": 0.5734,
402
+ "step": 530
403
+ },
404
+ {
405
+ "epoch": 2.0842105263157893,
406
+ "grad_norm": 11.605768203735352,
407
+ "learning_rate": 7.157894736842106e-06,
408
+ "loss": 0.3376,
409
+ "step": 540
410
+ },
411
+ {
412
+ "epoch": 2.0894736842105264,
413
+ "grad_norm": 15.367936134338379,
414
+ "learning_rate": 7.1052631578947375e-06,
415
+ "loss": 0.4938,
416
+ "step": 550
417
+ },
418
+ {
419
+ "epoch": 2.094736842105263,
420
+ "grad_norm": 16.802827835083008,
421
+ "learning_rate": 7.052631578947369e-06,
422
+ "loss": 0.341,
423
+ "step": 560
424
+ },
425
+ {
426
+ "epoch": 2.1,
427
+ "grad_norm": 17.05912208557129,
428
+ "learning_rate": 7e-06,
429
+ "loss": 0.2694,
430
+ "step": 570
431
+ },
432
+ {
433
+ "epoch": 2.1,
434
+ "eval_accuracy": 0.83125,
435
+ "eval_f1": 0.8297536945812809,
436
+ "eval_loss": 0.46257010102272034,
437
+ "eval_precision": 0.84331983805668,
438
+ "eval_runtime": 69.7179,
439
+ "eval_samples_per_second": 2.295,
440
+ "eval_steps_per_second": 0.287,
441
+ "step": 570
442
+ },
443
+ {
444
+ "epoch": 3.0052631578947366,
445
+ "grad_norm": 3.188321113586426,
446
+ "learning_rate": 6.947368421052632e-06,
447
+ "loss": 0.3871,
448
+ "step": 580
449
+ },
450
+ {
451
+ "epoch": 3.0105263157894737,
452
+ "grad_norm": 2.308584451675415,
453
+ "learning_rate": 6.894736842105264e-06,
454
+ "loss": 0.4008,
455
+ "step": 590
456
+ },
457
+ {
458
+ "epoch": 3.0157894736842104,
459
+ "grad_norm": 5.290454387664795,
460
+ "learning_rate": 6.842105263157896e-06,
461
+ "loss": 0.4617,
462
+ "step": 600
463
+ },
464
+ {
465
+ "epoch": 3.0210526315789474,
466
+ "grad_norm": 5.937681674957275,
467
+ "learning_rate": 6.789473684210527e-06,
468
+ "loss": 0.3444,
469
+ "step": 610
470
+ },
471
+ {
472
+ "epoch": 3.026315789473684,
473
+ "grad_norm": 4.868968963623047,
474
+ "learning_rate": 6.736842105263158e-06,
475
+ "loss": 0.3037,
476
+ "step": 620
477
+ },
478
+ {
479
+ "epoch": 3.031578947368421,
480
+ "grad_norm": 6.8114824295043945,
481
+ "learning_rate": 6.68421052631579e-06,
482
+ "loss": 0.2949,
483
+ "step": 630
484
+ },
485
+ {
486
+ "epoch": 3.036842105263158,
487
+ "grad_norm": 13.931032180786133,
488
+ "learning_rate": 6.631578947368421e-06,
489
+ "loss": 0.4389,
490
+ "step": 640
491
+ },
492
+ {
493
+ "epoch": 3.042105263157895,
494
+ "grad_norm": 18.7010555267334,
495
+ "learning_rate": 6.578947368421054e-06,
496
+ "loss": 0.4782,
497
+ "step": 650
498
+ },
499
+ {
500
+ "epoch": 3.0473684210526315,
501
+ "grad_norm": 5.578834533691406,
502
+ "learning_rate": 6.526315789473685e-06,
503
+ "loss": 0.3549,
504
+ "step": 660
505
+ },
506
+ {
507
+ "epoch": 3.0526315789473686,
508
+ "grad_norm": 24.610780715942383,
509
+ "learning_rate": 6.473684210526316e-06,
510
+ "loss": 0.4142,
511
+ "step": 670
512
+ },
513
+ {
514
+ "epoch": 3.057894736842105,
515
+ "grad_norm": 2.36200213432312,
516
+ "learning_rate": 6.421052631578948e-06,
517
+ "loss": 0.284,
518
+ "step": 680
519
+ },
520
+ {
521
+ "epoch": 3.0631578947368423,
522
+ "grad_norm": 5.415768623352051,
523
+ "learning_rate": 6.3684210526315795e-06,
524
+ "loss": 0.3369,
525
+ "step": 690
526
+ },
527
+ {
528
+ "epoch": 3.068421052631579,
529
+ "grad_norm": 1.3822064399719238,
530
+ "learning_rate": 6.31578947368421e-06,
531
+ "loss": 0.3621,
532
+ "step": 700
533
+ },
534
+ {
535
+ "epoch": 3.0736842105263156,
536
+ "grad_norm": 7.259896278381348,
537
+ "learning_rate": 6.263157894736842e-06,
538
+ "loss": 0.421,
539
+ "step": 710
540
+ },
541
+ {
542
+ "epoch": 3.0789473684210527,
543
+ "grad_norm": 29.047313690185547,
544
+ "learning_rate": 6.2105263157894745e-06,
545
+ "loss": 0.5442,
546
+ "step": 720
547
+ },
548
+ {
549
+ "epoch": 3.0842105263157893,
550
+ "grad_norm": 7.356611728668213,
551
+ "learning_rate": 6.157894736842106e-06,
552
+ "loss": 0.3321,
553
+ "step": 730
554
+ },
555
+ {
556
+ "epoch": 3.0894736842105264,
557
+ "grad_norm": 17.060741424560547,
558
+ "learning_rate": 6.105263157894738e-06,
559
+ "loss": 0.3558,
560
+ "step": 740
561
+ },
562
+ {
563
+ "epoch": 3.094736842105263,
564
+ "grad_norm": 9.117140769958496,
565
+ "learning_rate": 6.0526315789473685e-06,
566
+ "loss": 0.4388,
567
+ "step": 750
568
+ },
569
+ {
570
+ "epoch": 3.1,
571
+ "grad_norm": 20.096881866455078,
572
+ "learning_rate": 6e-06,
573
+ "loss": 0.4363,
574
+ "step": 760
575
+ },
576
+ {
577
+ "epoch": 3.1,
578
+ "eval_accuracy": 0.84375,
579
+ "eval_f1": 0.8435972629521016,
580
+ "eval_loss": 0.3822278082370758,
581
+ "eval_precision": 0.8450980392156863,
582
+ "eval_runtime": 67.3361,
583
+ "eval_samples_per_second": 2.376,
584
+ "eval_steps_per_second": 0.297,
585
+ "step": 760
586
+ },
587
+ {
588
+ "epoch": 4.005263157894737,
589
+ "grad_norm": 11.768166542053223,
590
+ "learning_rate": 5.947368421052632e-06,
591
+ "loss": 0.5369,
592
+ "step": 770
593
+ },
594
+ {
595
+ "epoch": 4.010526315789473,
596
+ "grad_norm": 13.316620826721191,
597
+ "learning_rate": 5.8947368421052634e-06,
598
+ "loss": 0.2579,
599
+ "step": 780
600
+ },
601
+ {
602
+ "epoch": 4.015789473684211,
603
+ "grad_norm": 9.412049293518066,
604
+ "learning_rate": 5.842105263157896e-06,
605
+ "loss": 0.239,
606
+ "step": 790
607
+ },
608
+ {
609
+ "epoch": 4.021052631578947,
610
+ "grad_norm": 2.2383475303649902,
611
+ "learning_rate": 5.789473684210527e-06,
612
+ "loss": 0.3057,
613
+ "step": 800
614
+ },
615
+ {
616
+ "epoch": 4.026315789473684,
617
+ "grad_norm": 20.319499969482422,
618
+ "learning_rate": 5.736842105263158e-06,
619
+ "loss": 0.2473,
620
+ "step": 810
621
+ },
622
+ {
623
+ "epoch": 4.031578947368421,
624
+ "grad_norm": 2.4087274074554443,
625
+ "learning_rate": 5.68421052631579e-06,
626
+ "loss": 0.3685,
627
+ "step": 820
628
+ },
629
+ {
630
+ "epoch": 4.036842105263158,
631
+ "grad_norm": 19.121938705444336,
632
+ "learning_rate": 5.631578947368422e-06,
633
+ "loss": 0.2928,
634
+ "step": 830
635
+ },
636
+ {
637
+ "epoch": 4.042105263157895,
638
+ "grad_norm": 23.854658126831055,
639
+ "learning_rate": 5.578947368421052e-06,
640
+ "loss": 0.4694,
641
+ "step": 840
642
+ },
643
+ {
644
+ "epoch": 4.0473684210526315,
645
+ "grad_norm": 14.487116813659668,
646
+ "learning_rate": 5.526315789473685e-06,
647
+ "loss": 0.4689,
648
+ "step": 850
649
+ },
650
+ {
651
+ "epoch": 4.052631578947368,
652
+ "grad_norm": 10.47803020477295,
653
+ "learning_rate": 5.4736842105263165e-06,
654
+ "loss": 0.3579,
655
+ "step": 860
656
+ },
657
+ {
658
+ "epoch": 4.057894736842106,
659
+ "grad_norm": 6.377836227416992,
660
+ "learning_rate": 5.421052631578948e-06,
661
+ "loss": 0.3246,
662
+ "step": 870
663
+ },
664
+ {
665
+ "epoch": 4.063157894736842,
666
+ "grad_norm": 10.962491989135742,
667
+ "learning_rate": 5.36842105263158e-06,
668
+ "loss": 0.2626,
669
+ "step": 880
670
+ },
671
+ {
672
+ "epoch": 4.068421052631579,
673
+ "grad_norm": 15.242006301879883,
674
+ "learning_rate": 5.315789473684211e-06,
675
+ "loss": 0.4136,
676
+ "step": 890
677
+ },
678
+ {
679
+ "epoch": 4.073684210526316,
680
+ "grad_norm": 13.032598495483398,
681
+ "learning_rate": 5.263157894736842e-06,
682
+ "loss": 0.4153,
683
+ "step": 900
684
+ },
685
+ {
686
+ "epoch": 4.078947368421052,
687
+ "grad_norm": 17.038894653320312,
688
+ "learning_rate": 5.210526315789474e-06,
689
+ "loss": 0.3384,
690
+ "step": 910
691
+ },
692
+ {
693
+ "epoch": 4.08421052631579,
694
+ "grad_norm": 6.975522518157959,
695
+ "learning_rate": 5.157894736842106e-06,
696
+ "loss": 0.2313,
697
+ "step": 920
698
+ },
699
+ {
700
+ "epoch": 4.089473684210526,
701
+ "grad_norm": 17.41042137145996,
702
+ "learning_rate": 5.105263157894738e-06,
703
+ "loss": 0.2923,
704
+ "step": 930
705
+ },
706
+ {
707
+ "epoch": 4.094736842105263,
708
+ "grad_norm": 8.513750076293945,
709
+ "learning_rate": 5.052631578947369e-06,
710
+ "loss": 0.1948,
711
+ "step": 940
712
+ },
713
+ {
714
+ "epoch": 4.1,
715
+ "grad_norm": 9.249213218688965,
716
+ "learning_rate": 5e-06,
717
+ "loss": 0.4551,
718
+ "step": 950
719
+ },
720
+ {
721
+ "epoch": 4.1,
722
+ "eval_accuracy": 0.8375,
723
+ "eval_f1": 0.8337595907928389,
724
+ "eval_loss": 0.567510724067688,
725
+ "eval_precision": 0.8708791208791208,
726
+ "eval_runtime": 69.8935,
727
+ "eval_samples_per_second": 2.289,
728
+ "eval_steps_per_second": 0.286,
729
+ "step": 950
730
+ },
731
+ {
732
+ "epoch": 5.005263157894737,
733
+ "grad_norm": 12.310464859008789,
734
+ "learning_rate": 4.947368421052632e-06,
735
+ "loss": 0.4447,
736
+ "step": 960
737
+ },
738
+ {
739
+ "epoch": 5.010526315789473,
740
+ "grad_norm": 6.919663429260254,
741
+ "learning_rate": 4.894736842105264e-06,
742
+ "loss": 0.3302,
743
+ "step": 970
744
+ },
745
+ {
746
+ "epoch": 5.015789473684211,
747
+ "grad_norm": 5.0730133056640625,
748
+ "learning_rate": 4.842105263157895e-06,
749
+ "loss": 0.1817,
750
+ "step": 980
751
+ },
752
+ {
753
+ "epoch": 5.021052631578947,
754
+ "grad_norm": 25.77090835571289,
755
+ "learning_rate": 4.789473684210527e-06,
756
+ "loss": 0.3793,
757
+ "step": 990
758
+ },
759
+ {
760
+ "epoch": 5.026315789473684,
761
+ "grad_norm": 19.587158203125,
762
+ "learning_rate": 4.736842105263158e-06,
763
+ "loss": 0.2926,
764
+ "step": 1000
765
+ },
766
+ {
767
+ "epoch": 5.031578947368421,
768
+ "grad_norm": 17.99262809753418,
769
+ "learning_rate": 4.68421052631579e-06,
770
+ "loss": 0.5002,
771
+ "step": 1010
772
+ },
773
+ {
774
+ "epoch": 5.036842105263158,
775
+ "grad_norm": 9.718439102172852,
776
+ "learning_rate": 4.631578947368421e-06,
777
+ "loss": 0.2441,
778
+ "step": 1020
779
+ },
780
+ {
781
+ "epoch": 5.042105263157895,
782
+ "grad_norm": 17.285890579223633,
783
+ "learning_rate": 4.578947368421053e-06,
784
+ "loss": 0.4185,
785
+ "step": 1030
786
+ },
787
+ {
788
+ "epoch": 5.0473684210526315,
789
+ "grad_norm": 9.232149124145508,
790
+ "learning_rate": 4.526315789473685e-06,
791
+ "loss": 0.2818,
792
+ "step": 1040
793
+ },
794
+ {
795
+ "epoch": 5.052631578947368,
796
+ "grad_norm": 22.14645004272461,
797
+ "learning_rate": 4.473684210526316e-06,
798
+ "loss": 0.3087,
799
+ "step": 1050
800
+ },
801
+ {
802
+ "epoch": 5.057894736842106,
803
+ "grad_norm": 11.559210777282715,
804
+ "learning_rate": 4.4210526315789476e-06,
805
+ "loss": 0.4542,
806
+ "step": 1060
807
+ },
808
+ {
809
+ "epoch": 5.063157894736842,
810
+ "grad_norm": 13.807418823242188,
811
+ "learning_rate": 4.368421052631579e-06,
812
+ "loss": 0.3877,
813
+ "step": 1070
814
+ },
815
+ {
816
+ "epoch": 5.068421052631579,
817
+ "grad_norm": 13.337950706481934,
818
+ "learning_rate": 4.315789473684211e-06,
819
+ "loss": 0.4411,
820
+ "step": 1080
821
+ },
822
+ {
823
+ "epoch": 5.073684210526316,
824
+ "grad_norm": 14.076698303222656,
825
+ "learning_rate": 4.2631578947368425e-06,
826
+ "loss": 0.2447,
827
+ "step": 1090
828
+ },
829
+ {
830
+ "epoch": 5.078947368421052,
831
+ "grad_norm": 12.362092971801758,
832
+ "learning_rate": 4.210526315789474e-06,
833
+ "loss": 0.5224,
834
+ "step": 1100
835
+ },
836
+ {
837
+ "epoch": 5.08421052631579,
838
+ "grad_norm": 2.9585299491882324,
839
+ "learning_rate": 4.157894736842106e-06,
840
+ "loss": 0.2479,
841
+ "step": 1110
842
+ },
843
+ {
844
+ "epoch": 5.089473684210526,
845
+ "grad_norm": 19.757415771484375,
846
+ "learning_rate": 4.105263157894737e-06,
847
+ "loss": 0.3588,
848
+ "step": 1120
849
+ },
850
+ {
851
+ "epoch": 5.094736842105263,
852
+ "grad_norm": 23.674379348754883,
853
+ "learning_rate": 4.052631578947368e-06,
854
+ "loss": 0.4141,
855
+ "step": 1130
856
+ },
857
+ {
858
+ "epoch": 5.1,
859
+ "grad_norm": 20.916202545166016,
860
+ "learning_rate": 4.000000000000001e-06,
861
+ "loss": 0.2914,
862
+ "step": 1140
863
+ },
864
+ {
865
+ "epoch": 5.1,
866
+ "eval_accuracy": 0.81875,
867
+ "eval_f1": 0.8134373366571509,
868
+ "eval_loss": 0.6118867993354797,
869
+ "eval_precision": 0.8597249162405219,
870
+ "eval_runtime": 71.0105,
871
+ "eval_samples_per_second": 2.253,
872
+ "eval_steps_per_second": 0.282,
873
+ "step": 1140
874
+ },
875
+ {
876
+ "epoch": 6.005263157894737,
877
+ "grad_norm": 20.181005477905273,
878
+ "learning_rate": 3.947368421052632e-06,
879
+ "loss": 0.3204,
880
+ "step": 1150
881
+ },
882
+ {
883
+ "epoch": 6.010526315789473,
884
+ "grad_norm": 21.946392059326172,
885
+ "learning_rate": 3.894736842105263e-06,
886
+ "loss": 0.4591,
887
+ "step": 1160
888
+ },
889
+ {
890
+ "epoch": 6.015789473684211,
891
+ "grad_norm": 16.40288543701172,
892
+ "learning_rate": 3.842105263157895e-06,
893
+ "loss": 0.2527,
894
+ "step": 1170
895
+ },
896
+ {
897
+ "epoch": 6.021052631578947,
898
+ "grad_norm": 22.849788665771484,
899
+ "learning_rate": 3.789473684210527e-06,
900
+ "loss": 0.419,
901
+ "step": 1180
902
+ },
903
+ {
904
+ "epoch": 6.026315789473684,
905
+ "grad_norm": 4.809814453125,
906
+ "learning_rate": 3.736842105263158e-06,
907
+ "loss": 0.2078,
908
+ "step": 1190
909
+ },
910
+ {
911
+ "epoch": 6.031578947368421,
912
+ "grad_norm": 19.384782791137695,
913
+ "learning_rate": 3.6842105263157896e-06,
914
+ "loss": 0.3582,
915
+ "step": 1200
916
+ },
917
+ {
918
+ "epoch": 6.036842105263158,
919
+ "grad_norm": 23.3424129486084,
920
+ "learning_rate": 3.6315789473684217e-06,
921
+ "loss": 0.3567,
922
+ "step": 1210
923
+ },
924
+ {
925
+ "epoch": 6.042105263157895,
926
+ "grad_norm": 3.446596145629883,
927
+ "learning_rate": 3.578947368421053e-06,
928
+ "loss": 0.2239,
929
+ "step": 1220
930
+ },
931
+ {
932
+ "epoch": 6.0473684210526315,
933
+ "grad_norm": 20.072446823120117,
934
+ "learning_rate": 3.5263157894736846e-06,
935
+ "loss": 0.3906,
936
+ "step": 1230
937
+ },
938
+ {
939
+ "epoch": 6.052631578947368,
940
+ "grad_norm": 7.216496467590332,
941
+ "learning_rate": 3.473684210526316e-06,
942
+ "loss": 0.3055,
943
+ "step": 1240
944
+ },
945
+ {
946
+ "epoch": 6.057894736842106,
947
+ "grad_norm": 8.636588096618652,
948
+ "learning_rate": 3.421052631578948e-06,
949
+ "loss": 0.1265,
950
+ "step": 1250
951
+ },
952
+ {
953
+ "epoch": 6.063157894736842,
954
+ "grad_norm": 0.8843567371368408,
955
+ "learning_rate": 3.368421052631579e-06,
956
+ "loss": 0.3457,
957
+ "step": 1260
958
+ },
959
+ {
960
+ "epoch": 6.068421052631579,
961
+ "grad_norm": 20.05299949645996,
962
+ "learning_rate": 3.3157894736842107e-06,
963
+ "loss": 0.3391,
964
+ "step": 1270
965
+ },
966
+ {
967
+ "epoch": 6.073684210526316,
968
+ "grad_norm": 18.296667098999023,
969
+ "learning_rate": 3.2631578947368423e-06,
970
+ "loss": 0.4391,
971
+ "step": 1280
972
+ },
973
+ {
974
+ "epoch": 6.078947368421052,
975
+ "grad_norm": 12.995136260986328,
976
+ "learning_rate": 3.210526315789474e-06,
977
+ "loss": 0.396,
978
+ "step": 1290
979
+ },
980
+ {
981
+ "epoch": 6.08421052631579,
982
+ "grad_norm": 16.825014114379883,
983
+ "learning_rate": 3.157894736842105e-06,
984
+ "loss": 0.3864,
985
+ "step": 1300
986
+ },
987
+ {
988
+ "epoch": 6.089473684210526,
989
+ "grad_norm": 0.5085676908493042,
990
+ "learning_rate": 3.1052631578947372e-06,
991
+ "loss": 0.323,
992
+ "step": 1310
993
+ },
994
+ {
995
+ "epoch": 6.094736842105263,
996
+ "grad_norm": 26.47535514831543,
997
+ "learning_rate": 3.052631578947369e-06,
998
+ "loss": 0.4595,
999
+ "step": 1320
1000
+ },
1001
+ {
1002
+ "epoch": 6.1,
1003
+ "grad_norm": 18.22189712524414,
1004
+ "learning_rate": 3e-06,
1005
+ "loss": 0.5212,
1006
+ "step": 1330
1007
+ },
1008
+ {
1009
+ "epoch": 6.1,
1010
+ "eval_accuracy": 0.84375,
1011
+ "eval_f1": 0.8410111689653803,
1012
+ "eval_loss": 0.5304452180862427,
1013
+ "eval_precision": 0.8691894613190133,
1014
+ "eval_runtime": 70.9081,
1015
+ "eval_samples_per_second": 2.256,
1016
+ "eval_steps_per_second": 0.282,
1017
+ "step": 1330
1018
+ },
1019
+ {
1020
+ "epoch": 6.1,
1021
+ "step": 1330,
1022
+ "total_flos": 0.0,
1023
+ "train_loss": 0.6979074507727658,
1024
+ "train_runtime": 5881.3964,
1025
+ "train_samples_per_second": 2.584,
1026
+ "train_steps_per_second": 0.323
1027
+ },
1028
+ {
1029
+ "epoch": 6.1,
1030
+ "eval_accuracy": 0.895,
1031
+ "eval_f1": 0.894989498949895,
1032
+ "eval_loss": 0.3164408206939697,
1033
+ "eval_precision": 0.8951580632252901,
1034
+ "eval_runtime": 659.8661,
1035
+ "eval_samples_per_second": 1.212,
1036
+ "eval_steps_per_second": 0.152,
1037
+ "step": 1330
1038
+ }
1039
+ ],
1040
+ "logging_steps": 10,
1041
+ "max_steps": 1900,
1042
+ "num_input_tokens_seen": 0,
1043
+ "num_train_epochs": 9223372036854775807,
1044
+ "save_steps": 500,
1045
+ "stateful_callbacks": {
1046
+ "EarlyStoppingCallback": {
1047
+ "args": {
1048
+ "early_stopping_patience": 3,
1049
+ "early_stopping_threshold": 0.005
1050
+ },
1051
+ "attributes": {
1052
+ "early_stopping_patience_counter": 0
1053
+ }
1054
+ },
1055
+ "TrainerControl": {
1056
+ "args": {
1057
+ "should_epoch_stop": false,
1058
+ "should_evaluate": false,
1059
+ "should_log": false,
1060
+ "should_save": true,
1061
+ "should_training_stop": true
1062
+ },
1063
+ "attributes": {}
1064
+ }
1065
+ },
1066
+ "total_flos": 0.0,
1067
+ "train_batch_size": 8,
1068
+ "trial_name": null,
1069
+ "trial_params": null
1070
+ }