RobertoMDLP commited on
Commit
ba84a80
·
verified ·
1 Parent(s): 68af918

🍻 cheers

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
  - imagefolder
@@ -18,7 +19,7 @@ model-index:
18
  name: Image Classification
19
  type: image-classification
20
  dataset:
21
- name: imagefolder
22
  type: imagefolder
23
  config: default
24
  split: validation
@@ -26,16 +27,16 @@ model-index:
26
  metrics:
27
  - name: Accuracy
28
  type: accuracy
29
- value: 0.9810526315789474
30
  - name: F1
31
  type: f1
32
- value: 0.9802102573360923
33
  - name: Precision
34
  type: precision
35
- value: 0.9766839378238341
36
  - name: Recall
37
  type: recall
38
- value: 0.9845360824742269
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # vit-tom-jerry-model
45
 
46
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.0855
49
- - Accuracy: 0.9811
50
- - F1: 0.9802
51
- - Precision: 0.9767
52
- - Recall: 0.9845
53
 
54
  ## Model description
55
 
 
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
6
+ - image-classification
7
  - generated_from_trainer
8
  datasets:
9
  - imagefolder
 
19
  name: Image Classification
20
  type: image-classification
21
  dataset:
22
+ name: tom_and_jerry
23
  type: imagefolder
24
  config: default
25
  split: validation
 
27
  metrics:
28
  - name: Accuracy
29
  type: accuracy
30
+ value: 0.991578947368421
31
  - name: F1
32
  type: f1
33
+ value: 0.9911287912744658
34
  - name: Precision
35
  type: precision
36
+ value: 0.9911287912744658
37
  - name: Recall
38
  type: recall
39
+ value: 0.9911287912744658
40
  ---
41
 
42
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
44
 
45
  # vit-tom-jerry-model
46
 
47
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the tom_and_jerry dataset.
48
  It achieves the following results on the evaluation set:
49
+ - Loss: 0.0403
50
+ - Accuracy: 0.9916
51
+ - F1: 0.9911
52
+ - Precision: 0.9911
53
+ - Recall: 0.9911
54
 
55
  ## Model description
56
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.158273381294964,
3
+ "eval_accuracy": 0.991578947368421,
4
+ "eval_f1": 0.9911287912744658,
5
+ "eval_loss": 0.0403330959379673,
6
+ "eval_precision": 0.9911287912744658,
7
+ "eval_recall": 0.9911287912744658,
8
+ "eval_runtime": 6.3358,
9
+ "eval_samples_per_second": 74.97,
10
+ "eval_steps_per_second": 9.47,
11
+ "total_flos": 3.711866302538957e+17,
12
+ "train_loss": 0.03943447194062173,
13
+ "train_runtime": 362.7821,
14
+ "train_samples_per_second": 30.583,
15
+ "train_steps_per_second": 3.832
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.158273381294964,
3
+ "eval_accuracy": 0.991578947368421,
4
+ "eval_f1": 0.9911287912744658,
5
+ "eval_loss": 0.0403330959379673,
6
+ "eval_precision": 0.9911287912744658,
7
+ "eval_recall": 0.9911287912744658,
8
+ "eval_runtime": 6.3358,
9
+ "eval_samples_per_second": 74.97,
10
+ "eval_steps_per_second": 9.47
11
+ }
runs/Aug14_21-07-55_d6feb5af6e29/events.out.tfevents.1755206129.d6feb5af6e29.604.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde1bb4fc51e0558d14865eca52c8a362ae4087d398bdd4c7bef90b4abbabb0a
3
+ size 560
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.158273381294964,
3
+ "total_flos": 3.711866302538957e+17,
4
+ "train_loss": 0.03943447194062173,
5
+ "train_runtime": 362.7821,
6
+ "train_samples_per_second": 30.583,
7
+ "train_steps_per_second": 3.832
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 300,
3
+ "best_metric": 0.991578947368421,
4
+ "best_model_checkpoint": "./vit-tom-jerry-model/checkpoint-300",
5
+ "epoch": 2.158273381294964,
6
+ "eval_steps": 100,
7
+ "global_step": 600,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.03597122302158273,
14
+ "grad_norm": 0.00730751920491457,
15
+ "learning_rate": 0.00019870503597122302,
16
+ "loss": 0.0425,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.07194244604316546,
21
+ "grad_norm": 0.005511483643203974,
22
+ "learning_rate": 0.00019726618705035972,
23
+ "loss": 0.0008,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.1079136690647482,
28
+ "grad_norm": 0.004138847813010216,
29
+ "learning_rate": 0.00019582733812949641,
30
+ "loss": 0.0006,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.14388489208633093,
35
+ "grad_norm": 0.003660564310848713,
36
+ "learning_rate": 0.00019453237410071942,
37
+ "loss": 0.025,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.17985611510791366,
42
+ "grad_norm": 0.002617336343973875,
43
+ "learning_rate": 0.00019309352517985612,
44
+ "loss": 0.0005,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.2158273381294964,
49
+ "grad_norm": 20.51651382446289,
50
+ "learning_rate": 0.0001916546762589928,
51
+ "loss": 0.0691,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.2517985611510791,
56
+ "grad_norm": 1.180568814277649,
57
+ "learning_rate": 0.0001902158273381295,
58
+ "loss": 0.3193,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.28776978417266186,
63
+ "grad_norm": 0.008585783652961254,
64
+ "learning_rate": 0.00018892086330935253,
65
+ "loss": 0.0207,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.3237410071942446,
70
+ "grad_norm": 0.009863360784947872,
71
+ "learning_rate": 0.00018748201438848923,
72
+ "loss": 0.105,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.3597122302158273,
77
+ "grad_norm": 0.12301458418369293,
78
+ "learning_rate": 0.0001860431654676259,
79
+ "loss": 0.0808,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.3597122302158273,
84
+ "eval_accuracy": 0.9705263157894737,
85
+ "eval_f1": 0.9693514490081853,
86
+ "eval_loss": 0.1167958602309227,
87
+ "eval_precision": 0.9646464646464646,
88
+ "eval_recall": 0.9759450171821306,
89
+ "eval_runtime": 5.9396,
90
+ "eval_samples_per_second": 79.972,
91
+ "eval_steps_per_second": 10.102,
92
+ "step": 100
93
+ },
94
+ {
95
+ "epoch": 0.39568345323741005,
96
+ "grad_norm": 0.0395534411072731,
97
+ "learning_rate": 0.0001846043165467626,
98
+ "loss": 0.0784,
99
+ "step": 110
100
+ },
101
+ {
102
+ "epoch": 0.4316546762589928,
103
+ "grad_norm": 0.007597455754876137,
104
+ "learning_rate": 0.0001831654676258993,
105
+ "loss": 0.1622,
106
+ "step": 120
107
+ },
108
+ {
109
+ "epoch": 0.4676258992805755,
110
+ "grad_norm": 0.008104625158011913,
111
+ "learning_rate": 0.000181726618705036,
112
+ "loss": 0.0327,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 0.5035971223021583,
117
+ "grad_norm": 0.012004414573311806,
118
+ "learning_rate": 0.0001802877697841727,
119
+ "loss": 0.0899,
120
+ "step": 140
121
+ },
122
+ {
123
+ "epoch": 0.539568345323741,
124
+ "grad_norm": 0.023231692612171173,
125
+ "learning_rate": 0.00017884892086330936,
126
+ "loss": 0.153,
127
+ "step": 150
128
+ },
129
+ {
130
+ "epoch": 0.5755395683453237,
131
+ "grad_norm": 0.04413440078496933,
132
+ "learning_rate": 0.00017741007194244606,
133
+ "loss": 0.1071,
134
+ "step": 160
135
+ },
136
+ {
137
+ "epoch": 0.6115107913669064,
138
+ "grad_norm": 0.032176461070775986,
139
+ "learning_rate": 0.00017597122302158273,
140
+ "loss": 0.0832,
141
+ "step": 170
142
+ },
143
+ {
144
+ "epoch": 0.6474820143884892,
145
+ "grad_norm": 0.01023666001856327,
146
+ "learning_rate": 0.00017453237410071943,
147
+ "loss": 0.0437,
148
+ "step": 180
149
+ },
150
+ {
151
+ "epoch": 0.6834532374100719,
152
+ "grad_norm": 1.4527229070663452,
153
+ "learning_rate": 0.00017309352517985612,
154
+ "loss": 0.0117,
155
+ "step": 190
156
+ },
157
+ {
158
+ "epoch": 0.7194244604316546,
159
+ "grad_norm": 0.05816151946783066,
160
+ "learning_rate": 0.0001716546762589928,
161
+ "loss": 0.212,
162
+ "step": 200
163
+ },
164
+ {
165
+ "epoch": 0.7194244604316546,
166
+ "eval_accuracy": 0.9705263157894737,
167
+ "eval_f1": 0.9691306446821153,
168
+ "eval_loss": 0.12086797505617142,
169
+ "eval_precision": 0.9666666666666666,
170
+ "eval_recall": 0.9719483041984163,
171
+ "eval_runtime": 5.9309,
172
+ "eval_samples_per_second": 80.089,
173
+ "eval_steps_per_second": 10.116,
174
+ "step": 200
175
+ },
176
+ {
177
+ "epoch": 0.7553956834532374,
178
+ "grad_norm": 0.013493811711668968,
179
+ "learning_rate": 0.00017021582733812952,
180
+ "loss": 0.1312,
181
+ "step": 210
182
+ },
183
+ {
184
+ "epoch": 0.7913669064748201,
185
+ "grad_norm": 0.01136984582990408,
186
+ "learning_rate": 0.0001687769784172662,
187
+ "loss": 0.0076,
188
+ "step": 220
189
+ },
190
+ {
191
+ "epoch": 0.8273381294964028,
192
+ "grad_norm": 0.010255228728055954,
193
+ "learning_rate": 0.0001673381294964029,
194
+ "loss": 0.0016,
195
+ "step": 230
196
+ },
197
+ {
198
+ "epoch": 0.8633093525179856,
199
+ "grad_norm": 0.006183688063174486,
200
+ "learning_rate": 0.0001658992805755396,
201
+ "loss": 0.0021,
202
+ "step": 240
203
+ },
204
+ {
205
+ "epoch": 0.8992805755395683,
206
+ "grad_norm": 0.006337467581033707,
207
+ "learning_rate": 0.00016446043165467626,
208
+ "loss": 0.055,
209
+ "step": 250
210
+ },
211
+ {
212
+ "epoch": 0.935251798561151,
213
+ "grad_norm": 0.005062475800514221,
214
+ "learning_rate": 0.00016302158273381296,
215
+ "loss": 0.0747,
216
+ "step": 260
217
+ },
218
+ {
219
+ "epoch": 0.9712230215827338,
220
+ "grad_norm": 0.02078184485435486,
221
+ "learning_rate": 0.00016158273381294963,
222
+ "loss": 0.0664,
223
+ "step": 270
224
+ },
225
+ {
226
+ "epoch": 1.0071942446043165,
227
+ "grad_norm": 0.008888340555131435,
228
+ "learning_rate": 0.00016014388489208632,
229
+ "loss": 0.0019,
230
+ "step": 280
231
+ },
232
+ {
233
+ "epoch": 1.0431654676258992,
234
+ "grad_norm": 0.008093398064374924,
235
+ "learning_rate": 0.00015870503597122305,
236
+ "loss": 0.0011,
237
+ "step": 290
238
+ },
239
+ {
240
+ "epoch": 1.079136690647482,
241
+ "grad_norm": 0.004925600253045559,
242
+ "learning_rate": 0.00015726618705035972,
243
+ "loss": 0.0008,
244
+ "step": 300
245
+ },
246
+ {
247
+ "epoch": 1.079136690647482,
248
+ "eval_accuracy": 0.991578947368421,
249
+ "eval_f1": 0.9911287912744658,
250
+ "eval_loss": 0.0403330959379673,
251
+ "eval_precision": 0.9911287912744658,
252
+ "eval_recall": 0.9911287912744658,
253
+ "eval_runtime": 6.5468,
254
+ "eval_samples_per_second": 72.555,
255
+ "eval_steps_per_second": 9.165,
256
+ "step": 300
257
+ },
258
+ {
259
+ "epoch": 1.1151079136690647,
260
+ "grad_norm": 0.0044704582542181015,
261
+ "learning_rate": 0.00015582733812949642,
262
+ "loss": 0.0005,
263
+ "step": 310
264
+ },
265
+ {
266
+ "epoch": 1.1510791366906474,
267
+ "grad_norm": 0.005276895128190517,
268
+ "learning_rate": 0.0001543884892086331,
269
+ "loss": 0.0006,
270
+ "step": 320
271
+ },
272
+ {
273
+ "epoch": 1.1870503597122302,
274
+ "grad_norm": 0.004525118041783571,
275
+ "learning_rate": 0.0001529496402877698,
276
+ "loss": 0.0261,
277
+ "step": 330
278
+ },
279
+ {
280
+ "epoch": 1.223021582733813,
281
+ "grad_norm": 0.1290452778339386,
282
+ "learning_rate": 0.00015151079136690649,
283
+ "loss": 0.0008,
284
+ "step": 340
285
+ },
286
+ {
287
+ "epoch": 1.2589928057553956,
288
+ "grad_norm": 0.09148821979761124,
289
+ "learning_rate": 0.00015007194244604316,
290
+ "loss": 0.0008,
291
+ "step": 350
292
+ },
293
+ {
294
+ "epoch": 1.2949640287769784,
295
+ "grad_norm": 0.0038220672868192196,
296
+ "learning_rate": 0.00014863309352517985,
297
+ "loss": 0.0008,
298
+ "step": 360
299
+ },
300
+ {
301
+ "epoch": 1.330935251798561,
302
+ "grad_norm": 0.0043488466180861,
303
+ "learning_rate": 0.00014719424460431655,
304
+ "loss": 0.0536,
305
+ "step": 370
306
+ },
307
+ {
308
+ "epoch": 1.3669064748201438,
309
+ "grad_norm": 0.007121060974895954,
310
+ "learning_rate": 0.00014575539568345325,
311
+ "loss": 0.01,
312
+ "step": 380
313
+ },
314
+ {
315
+ "epoch": 1.4028776978417266,
316
+ "grad_norm": 0.0033652805723249912,
317
+ "learning_rate": 0.00014431654676258995,
318
+ "loss": 0.0023,
319
+ "step": 390
320
+ },
321
+ {
322
+ "epoch": 1.4388489208633093,
323
+ "grad_norm": 9.669677734375,
324
+ "learning_rate": 0.00014287769784172662,
325
+ "loss": 0.0041,
326
+ "step": 400
327
+ },
328
+ {
329
+ "epoch": 1.4388489208633093,
330
+ "eval_accuracy": 0.9894736842105263,
331
+ "eval_f1": 0.9889220062596495,
332
+ "eval_loss": 0.04642796143889427,
333
+ "eval_precision": 0.9884436160298229,
334
+ "eval_recall": 0.9894105782160466,
335
+ "eval_runtime": 6.793,
336
+ "eval_samples_per_second": 69.925,
337
+ "eval_steps_per_second": 8.833,
338
+ "step": 400
339
+ },
340
+ {
341
+ "epoch": 1.474820143884892,
342
+ "grad_norm": 0.003309508552774787,
343
+ "learning_rate": 0.00014143884892086332,
344
+ "loss": 0.0004,
345
+ "step": 410
346
+ },
347
+ {
348
+ "epoch": 1.5107913669064748,
349
+ "grad_norm": 0.0035187567118555307,
350
+ "learning_rate": 0.00014,
351
+ "loss": 0.0005,
352
+ "step": 420
353
+ },
354
+ {
355
+ "epoch": 1.5467625899280577,
356
+ "grad_norm": 0.0034841764718294144,
357
+ "learning_rate": 0.00013856115107913669,
358
+ "loss": 0.0005,
359
+ "step": 430
360
+ },
361
+ {
362
+ "epoch": 1.5827338129496402,
363
+ "grad_norm": 0.04978319630026817,
364
+ "learning_rate": 0.00013712230215827338,
365
+ "loss": 0.0006,
366
+ "step": 440
367
+ },
368
+ {
369
+ "epoch": 1.6187050359712232,
370
+ "grad_norm": 11.15152645111084,
371
+ "learning_rate": 0.00013568345323741008,
372
+ "loss": 0.0305,
373
+ "step": 450
374
+ },
375
+ {
376
+ "epoch": 1.6546762589928057,
377
+ "grad_norm": 0.0030082084704190493,
378
+ "learning_rate": 0.00013424460431654678,
379
+ "loss": 0.0003,
380
+ "step": 460
381
+ },
382
+ {
383
+ "epoch": 1.6906474820143886,
384
+ "grad_norm": 0.002781275659799576,
385
+ "learning_rate": 0.00013280575539568345,
386
+ "loss": 0.0003,
387
+ "step": 470
388
+ },
389
+ {
390
+ "epoch": 1.7266187050359711,
391
+ "grad_norm": 0.0027191194240003824,
392
+ "learning_rate": 0.00013136690647482015,
393
+ "loss": 0.0005,
394
+ "step": 480
395
+ },
396
+ {
397
+ "epoch": 1.762589928057554,
398
+ "grad_norm": 0.0029242518357932568,
399
+ "learning_rate": 0.00012992805755395685,
400
+ "loss": 0.0494,
401
+ "step": 490
402
+ },
403
+ {
404
+ "epoch": 1.7985611510791366,
405
+ "grad_norm": 0.0035123827401548624,
406
+ "learning_rate": 0.00012848920863309352,
407
+ "loss": 0.0004,
408
+ "step": 500
409
+ },
410
+ {
411
+ "epoch": 1.7985611510791366,
412
+ "eval_accuracy": 0.968421052631579,
413
+ "eval_f1": 0.9671339412977595,
414
+ "eval_loss": 0.1312914788722992,
415
+ "eval_precision": 0.962668443925063,
416
+ "eval_recall": 0.9732276258777828,
417
+ "eval_runtime": 6.6073,
418
+ "eval_samples_per_second": 71.89,
419
+ "eval_steps_per_second": 9.081,
420
+ "step": 500
421
+ },
422
+ {
423
+ "epoch": 1.8345323741007196,
424
+ "grad_norm": 0.0036016402300447226,
425
+ "learning_rate": 0.00012705035971223022,
426
+ "loss": 0.0997,
427
+ "step": 510
428
+ },
429
+ {
430
+ "epoch": 1.870503597122302,
431
+ "grad_norm": 0.004752186127007008,
432
+ "learning_rate": 0.0001256115107913669,
433
+ "loss": 0.0297,
434
+ "step": 520
435
+ },
436
+ {
437
+ "epoch": 1.906474820143885,
438
+ "grad_norm": 0.005383871030062437,
439
+ "learning_rate": 0.0001241726618705036,
440
+ "loss": 0.0007,
441
+ "step": 530
442
+ },
443
+ {
444
+ "epoch": 1.9424460431654675,
445
+ "grad_norm": 0.0036646986845880747,
446
+ "learning_rate": 0.0001227338129496403,
447
+ "loss": 0.004,
448
+ "step": 540
449
+ },
450
+ {
451
+ "epoch": 1.9784172661870505,
452
+ "grad_norm": 0.00526112737134099,
453
+ "learning_rate": 0.00012129496402877698,
454
+ "loss": 0.0652,
455
+ "step": 550
456
+ },
457
+ {
458
+ "epoch": 2.014388489208633,
459
+ "grad_norm": 0.004464145749807358,
460
+ "learning_rate": 0.00011985611510791368,
461
+ "loss": 0.0006,
462
+ "step": 560
463
+ },
464
+ {
465
+ "epoch": 2.050359712230216,
466
+ "grad_norm": 0.006042866501957178,
467
+ "learning_rate": 0.00011841726618705036,
468
+ "loss": 0.0007,
469
+ "step": 570
470
+ },
471
+ {
472
+ "epoch": 2.0863309352517985,
473
+ "grad_norm": 0.1046941950917244,
474
+ "learning_rate": 0.00011697841726618706,
475
+ "loss": 0.001,
476
+ "step": 580
477
+ },
478
+ {
479
+ "epoch": 2.1223021582733814,
480
+ "grad_norm": 0.0033078379929065704,
481
+ "learning_rate": 0.00011553956834532376,
482
+ "loss": 0.0004,
483
+ "step": 590
484
+ },
485
+ {
486
+ "epoch": 2.158273381294964,
487
+ "grad_norm": 0.016841504722833633,
488
+ "learning_rate": 0.00011410071942446043,
489
+ "loss": 0.0005,
490
+ "step": 600
491
+ },
492
+ {
493
+ "epoch": 2.158273381294964,
494
+ "eval_accuracy": 0.9810526315789474,
495
+ "eval_f1": 0.9802102573360923,
496
+ "eval_loss": 0.08548920601606369,
497
+ "eval_precision": 0.9766839378238341,
498
+ "eval_recall": 0.9845360824742269,
499
+ "eval_runtime": 6.6607,
500
+ "eval_samples_per_second": 71.314,
501
+ "eval_steps_per_second": 9.008,
502
+ "step": 600
503
+ },
504
+ {
505
+ "epoch": 2.158273381294964,
506
+ "step": 600,
507
+ "total_flos": 3.711866302538957e+17,
508
+ "train_loss": 0.03943447194062173,
509
+ "train_runtime": 362.7821,
510
+ "train_samples_per_second": 30.583,
511
+ "train_steps_per_second": 3.832
512
+ }
513
+ ],
514
+ "logging_steps": 10,
515
+ "max_steps": 1390,
516
+ "num_input_tokens_seen": 0,
517
+ "num_train_epochs": 5,
518
+ "save_steps": 100,
519
+ "stateful_callbacks": {
520
+ "EarlyStoppingCallback": {
521
+ "args": {
522
+ "early_stopping_patience": 3,
523
+ "early_stopping_threshold": 0.0
524
+ },
525
+ "attributes": {
526
+ "early_stopping_patience_counter": 3
527
+ }
528
+ },
529
+ "TrainerControl": {
530
+ "args": {
531
+ "should_epoch_stop": false,
532
+ "should_evaluate": false,
533
+ "should_log": false,
534
+ "should_save": true,
535
+ "should_training_stop": true
536
+ },
537
+ "attributes": {}
538
+ }
539
+ },
540
+ "total_flos": 3.711866302538957e+17,
541
+ "train_batch_size": 8,
542
+ "trial_name": null,
543
+ "trial_params": null
544
+ }