panda992 commited on
Commit
13395df
·
verified ·
1 Parent(s): 68847df

🍻 cheers

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # fish_disease_datasets
18
 
19
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.0824
22
  - Accuracy: 0.9783
 
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
6
+ - image-classification
7
  - generated_from_trainer
8
  metrics:
9
  - accuracy
 
17
 
18
  # fish_disease_datasets
19
 
20
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the fish_disease_datasets dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.0824
23
  - Accuracy: 0.9783
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9809782608695652,
4
- "eval_loss": 0.0674990862607956,
5
- "eval_runtime": 1.7056,
6
- "eval_samples_per_second": 215.763,
7
- "eval_steps_per_second": 26.97,
8
  "total_flos": 6.45382209997357e+17,
9
- "train_loss": 0.13892418100633694,
10
- "train_runtime": 142.587,
11
- "train_samples_per_second": 58.406,
12
- "train_steps_per_second": 3.675
13
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9782608695652174,
4
+ "eval_loss": 0.08242087066173553,
5
+ "eval_runtime": 1.5232,
6
+ "eval_samples_per_second": 241.595,
7
+ "eval_steps_per_second": 30.199,
8
  "total_flos": 6.45382209997357e+17,
9
+ "train_loss": 0.26906703446657604,
10
+ "train_runtime": 211.2966,
11
+ "train_samples_per_second": 39.414,
12
+ "train_steps_per_second": 2.48
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9809782608695652,
4
- "eval_loss": 0.0674990862607956,
5
- "eval_runtime": 1.7056,
6
- "eval_samples_per_second": 215.763,
7
- "eval_steps_per_second": 26.97
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9782608695652174,
4
+ "eval_loss": 0.08242087066173553,
5
+ "eval_runtime": 1.5232,
6
+ "eval_samples_per_second": 241.595,
7
+ "eval_steps_per_second": 30.199
8
  }
runs/May22_12-46-10_ef8175b0da48/events.out.tfevents.1747918461.ef8175b0da48.11741.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff1df53df78f1c48ff0609e91ae93f3386d665f6aac93793c66eaa0b41ea223c
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 6.45382209997357e+17,
4
- "train_loss": 0.13892418100633694,
5
- "train_runtime": 142.587,
6
- "train_samples_per_second": 58.406,
7
- "train_steps_per_second": 3.675
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 6.45382209997357e+17,
4
+ "train_loss": 0.26906703446657604,
5
+ "train_runtime": 211.2966,
6
+ "train_samples_per_second": 39.414,
7
+ "train_steps_per_second": 2.48
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 500,
3
- "best_metric": 0.0674990862607956,
4
  "best_model_checkpoint": "fish_disease_datasets/checkpoint-500",
5
  "epoch": 4.0,
6
  "eval_steps": 100,
@@ -11,421 +11,421 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.07633587786259542,
14
- "grad_norm": 8.140775680541992,
15
  "learning_rate": 0.00019656488549618322,
16
- "loss": 0.2332,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.15267175572519084,
21
- "grad_norm": 5.731508731842041,
22
  "learning_rate": 0.00019274809160305345,
23
- "loss": 0.2397,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.22900763358778625,
28
- "grad_norm": 4.3719563484191895,
29
  "learning_rate": 0.00018893129770992367,
30
- "loss": 0.2881,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.3053435114503817,
35
- "grad_norm": 2.272369623184204,
36
  "learning_rate": 0.0001851145038167939,
37
- "loss": 0.2598,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.3816793893129771,
42
- "grad_norm": 3.4309451580047607,
43
  "learning_rate": 0.00018129770992366412,
44
- "loss": 0.1827,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.4580152671755725,
49
- "grad_norm": 1.1197692155838013,
50
  "learning_rate": 0.00017748091603053437,
51
- "loss": 0.152,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.5343511450381679,
56
- "grad_norm": 4.275225639343262,
57
  "learning_rate": 0.0001736641221374046,
58
- "loss": 0.2208,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.6106870229007634,
63
- "grad_norm": 3.2157912254333496,
64
  "learning_rate": 0.00016984732824427482,
65
- "loss": 0.3555,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.6870229007633588,
70
- "grad_norm": 5.242416858673096,
71
  "learning_rate": 0.00016603053435114505,
72
- "loss": 0.163,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.7633587786259542,
77
- "grad_norm": 0.5541224479675293,
78
  "learning_rate": 0.00016221374045801527,
79
- "loss": 0.2052,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.7633587786259542,
84
- "eval_accuracy": 0.9483695652173914,
85
- "eval_loss": 0.1684044897556305,
86
- "eval_runtime": 1.6021,
87
- "eval_samples_per_second": 229.704,
88
- "eval_steps_per_second": 28.713,
89
  "step": 100
90
  },
91
  {
92
  "epoch": 0.8396946564885496,
93
- "grad_norm": 0.14907874166965485,
94
  "learning_rate": 0.0001583969465648855,
95
- "loss": 0.1426,
96
  "step": 110
97
  },
98
  {
99
  "epoch": 0.916030534351145,
100
- "grad_norm": 1.8745291233062744,
101
  "learning_rate": 0.00015458015267175574,
102
- "loss": 0.1216,
103
  "step": 120
104
  },
105
  {
106
  "epoch": 0.9923664122137404,
107
- "grad_norm": 5.384662628173828,
108
  "learning_rate": 0.00015076335877862594,
109
- "loss": 0.17,
110
  "step": 130
111
  },
112
  {
113
  "epoch": 1.0687022900763359,
114
- "grad_norm": 4.790631294250488,
115
  "learning_rate": 0.0001469465648854962,
116
- "loss": 0.0845,
117
  "step": 140
118
  },
119
  {
120
  "epoch": 1.1450381679389312,
121
- "grad_norm": 4.6353230476379395,
122
  "learning_rate": 0.00014312977099236642,
123
- "loss": 0.1092,
124
  "step": 150
125
  },
126
  {
127
  "epoch": 1.2213740458015268,
128
- "grad_norm": 0.8423321843147278,
129
  "learning_rate": 0.00013931297709923664,
130
- "loss": 0.126,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 1.297709923664122,
135
- "grad_norm": 0.4985540211200714,
136
  "learning_rate": 0.0001354961832061069,
137
- "loss": 0.1103,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 1.3740458015267176,
142
- "grad_norm": 4.837319850921631,
143
  "learning_rate": 0.0001316793893129771,
144
- "loss": 0.1849,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 1.450381679389313,
149
- "grad_norm": 4.57224702835083,
150
  "learning_rate": 0.00012786259541984734,
151
- "loss": 0.195,
152
  "step": 190
153
  },
154
  {
155
  "epoch": 1.5267175572519083,
156
- "grad_norm": 0.5998224020004272,
157
  "learning_rate": 0.00012404580152671757,
158
- "loss": 0.05,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 1.5267175572519083,
163
- "eval_accuracy": 0.9592391304347826,
164
- "eval_loss": 0.16003353893756866,
165
- "eval_runtime": 1.5924,
166
- "eval_samples_per_second": 231.101,
167
- "eval_steps_per_second": 28.888,
168
  "step": 200
169
  },
170
  {
171
  "epoch": 1.6030534351145038,
172
- "grad_norm": 2.8493752479553223,
173
  "learning_rate": 0.0001202290076335878,
174
- "loss": 0.3001,
175
  "step": 210
176
  },
177
  {
178
  "epoch": 1.6793893129770994,
179
- "grad_norm": 0.12870310246944427,
180
  "learning_rate": 0.00011641221374045803,
181
- "loss": 0.0944,
182
  "step": 220
183
  },
184
  {
185
  "epoch": 1.7557251908396947,
186
- "grad_norm": 6.482563018798828,
187
  "learning_rate": 0.00011259541984732824,
188
- "loss": 0.0905,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 1.83206106870229,
193
- "grad_norm": 1.615530252456665,
194
  "learning_rate": 0.00010877862595419848,
195
- "loss": 0.1326,
196
  "step": 240
197
  },
198
  {
199
  "epoch": 1.9083969465648853,
200
- "grad_norm": 2.059842586517334,
201
  "learning_rate": 0.00010496183206106871,
202
- "loss": 0.1274,
203
  "step": 250
204
  },
205
  {
206
  "epoch": 1.984732824427481,
207
- "grad_norm": 2.750730276107788,
208
  "learning_rate": 0.00010114503816793894,
209
- "loss": 0.2225,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 2.0610687022900764,
214
- "grad_norm": 0.04552418366074562,
215
  "learning_rate": 9.732824427480916e-05,
216
- "loss": 0.0418,
217
  "step": 270
218
  },
219
  {
220
  "epoch": 2.1374045801526718,
221
- "grad_norm": 3.4131999015808105,
222
  "learning_rate": 9.351145038167939e-05,
223
- "loss": 0.1439,
224
  "step": 280
225
  },
226
  {
227
  "epoch": 2.213740458015267,
228
- "grad_norm": 1.8281826972961426,
229
  "learning_rate": 8.969465648854962e-05,
230
- "loss": 0.1084,
231
  "step": 290
232
  },
233
  {
234
  "epoch": 2.2900763358778624,
235
- "grad_norm": 0.03664281591773033,
236
  "learning_rate": 8.587786259541986e-05,
237
- "loss": 0.0579,
238
  "step": 300
239
  },
240
  {
241
  "epoch": 2.2900763358778624,
242
- "eval_accuracy": 0.9510869565217391,
243
- "eval_loss": 0.161894753575325,
244
- "eval_runtime": 1.5917,
245
- "eval_samples_per_second": 231.199,
246
- "eval_steps_per_second": 28.9,
247
  "step": 300
248
  },
249
  {
250
  "epoch": 2.366412213740458,
251
- "grad_norm": 10.416833877563477,
252
  "learning_rate": 8.206106870229007e-05,
253
- "loss": 0.1502,
254
  "step": 310
255
  },
256
  {
257
  "epoch": 2.4427480916030535,
258
- "grad_norm": 2.426678419113159,
259
  "learning_rate": 7.824427480916031e-05,
260
- "loss": 0.2334,
261
  "step": 320
262
  },
263
  {
264
  "epoch": 2.519083969465649,
265
- "grad_norm": 5.191472053527832,
266
  "learning_rate": 7.442748091603053e-05,
267
- "loss": 0.0829,
268
  "step": 330
269
  },
270
  {
271
  "epoch": 2.595419847328244,
272
- "grad_norm": 2.1241517066955566,
273
  "learning_rate": 7.061068702290077e-05,
274
- "loss": 0.066,
275
  "step": 340
276
  },
277
  {
278
  "epoch": 2.67175572519084,
279
- "grad_norm": 0.10309349745512009,
280
  "learning_rate": 6.6793893129771e-05,
281
- "loss": 0.1356,
282
  "step": 350
283
  },
284
  {
285
  "epoch": 2.7480916030534353,
286
- "grad_norm": 0.8078840374946594,
287
  "learning_rate": 6.297709923664122e-05,
288
- "loss": 0.09,
289
  "step": 360
290
  },
291
  {
292
  "epoch": 2.8244274809160306,
293
- "grad_norm": 0.7653095126152039,
294
  "learning_rate": 5.916030534351146e-05,
295
- "loss": 0.0678,
296
  "step": 370
297
  },
298
  {
299
  "epoch": 2.900763358778626,
300
- "grad_norm": 0.1392490267753601,
301
  "learning_rate": 5.534351145038168e-05,
302
- "loss": 0.0532,
303
  "step": 380
304
  },
305
  {
306
  "epoch": 2.9770992366412212,
307
- "grad_norm": 7.884419918060303,
308
  "learning_rate": 5.152671755725191e-05,
309
- "loss": 0.1382,
310
  "step": 390
311
  },
312
  {
313
  "epoch": 3.053435114503817,
314
- "grad_norm": 10.337115287780762,
315
  "learning_rate": 4.7709923664122144e-05,
316
- "loss": 0.1181,
317
  "step": 400
318
  },
319
  {
320
  "epoch": 3.053435114503817,
321
- "eval_accuracy": 0.9809782608695652,
322
- "eval_loss": 0.06854937225580215,
323
- "eval_runtime": 2.5796,
324
- "eval_samples_per_second": 142.659,
325
- "eval_steps_per_second": 17.832,
326
  "step": 400
327
  },
328
  {
329
  "epoch": 3.1297709923664123,
330
- "grad_norm": 0.06403453648090363,
331
  "learning_rate": 4.389312977099237e-05,
332
- "loss": 0.1245,
333
  "step": 410
334
  },
335
  {
336
  "epoch": 3.2061068702290076,
337
- "grad_norm": 3.0858404636383057,
338
  "learning_rate": 4.00763358778626e-05,
339
- "loss": 0.0901,
340
  "step": 420
341
  },
342
  {
343
  "epoch": 3.282442748091603,
344
- "grad_norm": 1.1947516202926636,
345
  "learning_rate": 3.625954198473282e-05,
346
- "loss": 0.1319,
347
  "step": 430
348
  },
349
  {
350
  "epoch": 3.3587786259541983,
351
- "grad_norm": 2.4845542907714844,
352
  "learning_rate": 3.2442748091603054e-05,
353
- "loss": 0.0932,
354
  "step": 440
355
  },
356
  {
357
  "epoch": 3.435114503816794,
358
- "grad_norm": 0.7602748870849609,
359
  "learning_rate": 2.862595419847328e-05,
360
- "loss": 0.0749,
361
  "step": 450
362
  },
363
  {
364
  "epoch": 3.5114503816793894,
365
- "grad_norm": 3.669577121734619,
366
  "learning_rate": 2.4809160305343512e-05,
367
- "loss": 0.1067,
368
  "step": 460
369
  },
370
  {
371
  "epoch": 3.5877862595419847,
372
- "grad_norm": 0.02125421166419983,
373
  "learning_rate": 2.099236641221374e-05,
374
- "loss": 0.0278,
375
  "step": 470
376
  },
377
  {
378
  "epoch": 3.66412213740458,
379
- "grad_norm": 0.025344278663396835,
380
  "learning_rate": 1.717557251908397e-05,
381
- "loss": 0.0717,
382
  "step": 480
383
  },
384
  {
385
  "epoch": 3.7404580152671754,
386
- "grad_norm": 0.454380601644516,
387
  "learning_rate": 1.3358778625954198e-05,
388
- "loss": 0.1241,
389
  "step": 490
390
  },
391
  {
392
  "epoch": 3.816793893129771,
393
- "grad_norm": 0.49031591415405273,
394
  "learning_rate": 9.541984732824428e-06,
395
- "loss": 0.1321,
396
  "step": 500
397
  },
398
  {
399
  "epoch": 3.816793893129771,
400
- "eval_accuracy": 0.9809782608695652,
401
- "eval_loss": 0.0674990862607956,
402
- "eval_runtime": 1.6192,
403
- "eval_samples_per_second": 227.276,
404
- "eval_steps_per_second": 28.409,
405
  "step": 500
406
  },
407
  {
408
  "epoch": 3.8931297709923665,
409
- "grad_norm": 7.950806617736816,
410
  "learning_rate": 5.725190839694657e-06,
411
- "loss": 0.1726,
412
  "step": 510
413
  },
414
  {
415
  "epoch": 3.969465648854962,
416
- "grad_norm": 0.29777708649635315,
417
  "learning_rate": 1.908396946564886e-06,
418
- "loss": 0.0653,
419
  "step": 520
420
  },
421
  {
422
  "epoch": 4.0,
423
  "step": 524,
424
  "total_flos": 6.45382209997357e+17,
425
- "train_loss": 0.13892418100633694,
426
- "train_runtime": 142.587,
427
- "train_samples_per_second": 58.406,
428
- "train_steps_per_second": 3.675
429
  }
430
  ],
431
  "logging_steps": 10,
 
1
  {
2
  "best_global_step": 500,
3
+ "best_metric": 0.08242087066173553,
4
  "best_model_checkpoint": "fish_disease_datasets/checkpoint-500",
5
  "epoch": 4.0,
6
  "eval_steps": 100,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.07633587786259542,
14
+ "grad_norm": 2.050231695175171,
15
  "learning_rate": 0.00019656488549618322,
16
+ "loss": 1.8355,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.15267175572519084,
21
+ "grad_norm": 2.2453360557556152,
22
  "learning_rate": 0.00019274809160305345,
23
+ "loss": 1.5538,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.22900763358778625,
28
+ "grad_norm": 1.8408141136169434,
29
  "learning_rate": 0.00018893129770992367,
30
+ "loss": 1.354,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.3053435114503817,
35
+ "grad_norm": 2.0277440547943115,
36
  "learning_rate": 0.0001851145038167939,
37
+ "loss": 1.035,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.3816793893129771,
42
+ "grad_norm": 2.367299795150757,
43
  "learning_rate": 0.00018129770992366412,
44
+ "loss": 0.9501,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.4580152671755725,
49
+ "grad_norm": 3.103821277618408,
50
  "learning_rate": 0.00017748091603053437,
51
+ "loss": 0.6932,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.5343511450381679,
56
+ "grad_norm": 2.1959078311920166,
57
  "learning_rate": 0.0001736641221374046,
58
+ "loss": 0.5865,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.6106870229007634,
63
+ "grad_norm": 3.5142126083374023,
64
  "learning_rate": 0.00016984732824427482,
65
+ "loss": 0.5495,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.6870229007633588,
70
+ "grad_norm": 3.4135642051696777,
71
  "learning_rate": 0.00016603053435114505,
72
+ "loss": 0.5116,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.7633587786259542,
77
+ "grad_norm": 1.4583014249801636,
78
  "learning_rate": 0.00016221374045801527,
79
+ "loss": 0.4914,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.7633587786259542,
84
+ "eval_accuracy": 0.9130434782608695,
85
+ "eval_loss": 0.37967830896377563,
86
+ "eval_runtime": 1.6211,
87
+ "eval_samples_per_second": 227.007,
88
+ "eval_steps_per_second": 28.376,
89
  "step": 100
90
  },
91
  {
92
  "epoch": 0.8396946564885496,
93
+ "grad_norm": 3.2363483905792236,
94
  "learning_rate": 0.0001583969465648855,
95
+ "loss": 0.3807,
96
  "step": 110
97
  },
98
  {
99
  "epoch": 0.916030534351145,
100
+ "grad_norm": 2.342712879180908,
101
  "learning_rate": 0.00015458015267175574,
102
+ "loss": 0.4461,
103
  "step": 120
104
  },
105
  {
106
  "epoch": 0.9923664122137404,
107
+ "grad_norm": 3.4489521980285645,
108
  "learning_rate": 0.00015076335877862594,
109
+ "loss": 0.3225,
110
  "step": 130
111
  },
112
  {
113
  "epoch": 1.0687022900763359,
114
+ "grad_norm": 1.5788570642471313,
115
  "learning_rate": 0.0001469465648854962,
116
+ "loss": 0.2013,
117
  "step": 140
118
  },
119
  {
120
  "epoch": 1.1450381679389312,
121
+ "grad_norm": 5.834712982177734,
122
  "learning_rate": 0.00014312977099236642,
123
+ "loss": 0.2111,
124
  "step": 150
125
  },
126
  {
127
  "epoch": 1.2213740458015268,
128
+ "grad_norm": 0.4217844009399414,
129
  "learning_rate": 0.00013931297709923664,
130
+ "loss": 0.1577,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 1.297709923664122,
135
+ "grad_norm": 0.20651423931121826,
136
  "learning_rate": 0.0001354961832061069,
137
+ "loss": 0.1269,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 1.3740458015267176,
142
+ "grad_norm": 4.183597087860107,
143
  "learning_rate": 0.0001316793893129771,
144
+ "loss": 0.1539,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 1.450381679389313,
149
+ "grad_norm": 3.6589787006378174,
150
  "learning_rate": 0.00012786259541984734,
151
+ "loss": 0.1826,
152
  "step": 190
153
  },
154
  {
155
  "epoch": 1.5267175572519083,
156
+ "grad_norm": 0.8545241951942444,
157
  "learning_rate": 0.00012404580152671757,
158
+ "loss": 0.0893,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 1.5267175572519083,
163
+ "eval_accuracy": 0.9565217391304348,
164
+ "eval_loss": 0.17774879932403564,
165
+ "eval_runtime": 1.3742,
166
+ "eval_samples_per_second": 267.782,
167
+ "eval_steps_per_second": 33.473,
168
  "step": 200
169
  },
170
  {
171
  "epoch": 1.6030534351145038,
172
+ "grad_norm": 6.76841926574707,
173
  "learning_rate": 0.0001202290076335878,
174
+ "loss": 0.1503,
175
  "step": 210
176
  },
177
  {
178
  "epoch": 1.6793893129770994,
179
+ "grad_norm": 3.40751314163208,
180
  "learning_rate": 0.00011641221374045803,
181
+ "loss": 0.1792,
182
  "step": 220
183
  },
184
  {
185
  "epoch": 1.7557251908396947,
186
+ "grad_norm": 0.26832714676856995,
187
  "learning_rate": 0.00011259541984732824,
188
+ "loss": 0.164,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 1.83206106870229,
193
+ "grad_norm": 0.2684068977832794,
194
  "learning_rate": 0.00010877862595419848,
195
+ "loss": 0.1942,
196
  "step": 240
197
  },
198
  {
199
  "epoch": 1.9083969465648853,
200
+ "grad_norm": 1.6055045127868652,
201
  "learning_rate": 0.00010496183206106871,
202
+ "loss": 0.0789,
203
  "step": 250
204
  },
205
  {
206
  "epoch": 1.984732824427481,
207
+ "grad_norm": 1.8824135065078735,
208
  "learning_rate": 0.00010114503816793894,
209
+ "loss": 0.1053,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 2.0610687022900764,
214
+ "grad_norm": 0.2628338932991028,
215
  "learning_rate": 9.732824427480916e-05,
216
+ "loss": 0.0604,
217
  "step": 270
218
  },
219
  {
220
  "epoch": 2.1374045801526718,
221
+ "grad_norm": 4.945899486541748,
222
  "learning_rate": 9.351145038167939e-05,
223
+ "loss": 0.0896,
224
  "step": 280
225
  },
226
  {
227
  "epoch": 2.213740458015267,
228
+ "grad_norm": 0.10353419184684753,
229
  "learning_rate": 8.969465648854962e-05,
230
+ "loss": 0.1232,
231
  "step": 290
232
  },
233
  {
234
  "epoch": 2.2900763358778624,
235
+ "grad_norm": 0.1373486965894699,
236
  "learning_rate": 8.587786259541986e-05,
237
+ "loss": 0.0733,
238
  "step": 300
239
  },
240
  {
241
  "epoch": 2.2900763358778624,
242
+ "eval_accuracy": 0.9619565217391305,
243
+ "eval_loss": 0.1245102658867836,
244
+ "eval_runtime": 1.3915,
245
+ "eval_samples_per_second": 264.465,
246
+ "eval_steps_per_second": 33.058,
247
  "step": 300
248
  },
249
  {
250
  "epoch": 2.366412213740458,
251
+ "grad_norm": 0.08756294846534729,
252
  "learning_rate": 8.206106870229007e-05,
253
+ "loss": 0.0652,
254
  "step": 310
255
  },
256
  {
257
  "epoch": 2.4427480916030535,
258
+ "grad_norm": 0.12176764756441116,
259
  "learning_rate": 7.824427480916031e-05,
260
+ "loss": 0.0502,
261
  "step": 320
262
  },
263
  {
264
  "epoch": 2.519083969465649,
265
+ "grad_norm": 0.09179558604955673,
266
  "learning_rate": 7.442748091603053e-05,
267
+ "loss": 0.0686,
268
  "step": 330
269
  },
270
  {
271
  "epoch": 2.595419847328244,
272
+ "grad_norm": 0.6511502861976624,
273
  "learning_rate": 7.061068702290077e-05,
274
+ "loss": 0.0435,
275
  "step": 340
276
  },
277
  {
278
  "epoch": 2.67175572519084,
279
+ "grad_norm": 0.38830748200416565,
280
  "learning_rate": 6.6793893129771e-05,
281
+ "loss": 0.0416,
282
  "step": 350
283
  },
284
  {
285
  "epoch": 2.7480916030534353,
286
+ "grad_norm": 0.0798097625374794,
287
  "learning_rate": 6.297709923664122e-05,
288
+ "loss": 0.1125,
289
  "step": 360
290
  },
291
  {
292
  "epoch": 2.8244274809160306,
293
+ "grad_norm": 2.4349348545074463,
294
  "learning_rate": 5.916030534351146e-05,
295
+ "loss": 0.0514,
296
  "step": 370
297
  },
298
  {
299
  "epoch": 2.900763358778626,
300
+ "grad_norm": 0.07260897010564804,
301
  "learning_rate": 5.534351145038168e-05,
302
+ "loss": 0.0704,
303
  "step": 380
304
  },
305
  {
306
  "epoch": 2.9770992366412212,
307
+ "grad_norm": 0.08120035380125046,
308
  "learning_rate": 5.152671755725191e-05,
309
+ "loss": 0.0562,
310
  "step": 390
311
  },
312
  {
313
  "epoch": 3.053435114503817,
314
+ "grad_norm": 0.06785257160663605,
315
  "learning_rate": 4.7709923664122144e-05,
316
+ "loss": 0.0384,
317
  "step": 400
318
  },
319
  {
320
  "epoch": 3.053435114503817,
321
+ "eval_accuracy": 0.970108695652174,
322
+ "eval_loss": 0.10201143473386765,
323
+ "eval_runtime": 1.5213,
324
+ "eval_samples_per_second": 241.899,
325
+ "eval_steps_per_second": 30.237,
326
  "step": 400
327
  },
328
  {
329
  "epoch": 3.1297709923664123,
330
+ "grad_norm": 0.6524462103843689,
331
  "learning_rate": 4.389312977099237e-05,
332
+ "loss": 0.0515,
333
  "step": 410
334
  },
335
  {
336
  "epoch": 3.2061068702290076,
337
+ "grad_norm": 0.07161343842744827,
338
  "learning_rate": 4.00763358778626e-05,
339
+ "loss": 0.0377,
340
  "step": 420
341
  },
342
  {
343
  "epoch": 3.282442748091603,
344
+ "grad_norm": 0.06215129420161247,
345
  "learning_rate": 3.625954198473282e-05,
346
+ "loss": 0.0259,
347
  "step": 430
348
  },
349
  {
350
  "epoch": 3.3587786259541983,
351
+ "grad_norm": 0.05841224268078804,
352
  "learning_rate": 3.2442748091603054e-05,
353
+ "loss": 0.0221,
354
  "step": 440
355
  },
356
  {
357
  "epoch": 3.435114503816794,
358
+ "grad_norm": 1.0506515502929688,
359
  "learning_rate": 2.862595419847328e-05,
360
+ "loss": 0.0561,
361
  "step": 450
362
  },
363
  {
364
  "epoch": 3.5114503816793894,
365
+ "grad_norm": 6.415824890136719,
366
  "learning_rate": 2.4809160305343512e-05,
367
+ "loss": 0.058,
368
  "step": 460
369
  },
370
  {
371
  "epoch": 3.5877862595419847,
372
+ "grad_norm": 0.056393466889858246,
373
  "learning_rate": 2.099236641221374e-05,
374
+ "loss": 0.0336,
375
  "step": 470
376
  },
377
  {
378
  "epoch": 3.66412213740458,
379
+ "grad_norm": 0.2498386651277542,
380
  "learning_rate": 1.717557251908397e-05,
381
+ "loss": 0.0231,
382
  "step": 480
383
  },
384
  {
385
  "epoch": 3.7404580152671754,
386
+ "grad_norm": 0.06095988303422928,
387
  "learning_rate": 1.3358778625954198e-05,
388
+ "loss": 0.0184,
389
  "step": 490
390
  },
391
  {
392
  "epoch": 3.816793893129771,
393
+ "grad_norm": 0.06443587690591812,
394
  "learning_rate": 9.541984732824428e-06,
395
+ "loss": 0.0189,
396
  "step": 500
397
  },
398
  {
399
  "epoch": 3.816793893129771,
400
+ "eval_accuracy": 0.9782608695652174,
401
+ "eval_loss": 0.08242087066173553,
402
+ "eval_runtime": 1.3838,
403
+ "eval_samples_per_second": 265.933,
404
+ "eval_steps_per_second": 33.242,
405
  "step": 500
406
  },
407
  {
408
  "epoch": 3.8931297709923665,
409
+ "grad_norm": 0.09356739372015,
410
  "learning_rate": 5.725190839694657e-06,
411
+ "loss": 0.0363,
412
  "step": 510
413
  },
414
  {
415
  "epoch": 3.969465648854962,
416
+ "grad_norm": 0.0693662166595459,
417
  "learning_rate": 1.908396946564886e-06,
418
+ "loss": 0.0379,
419
  "step": 520
420
  },
421
  {
422
  "epoch": 4.0,
423
  "step": 524,
424
  "total_flos": 6.45382209997357e+17,
425
+ "train_loss": 0.26906703446657604,
426
+ "train_runtime": 211.2966,
427
+ "train_samples_per_second": 39.414,
428
+ "train_steps_per_second": 2.48
429
  }
430
  ],
431
  "logging_steps": 10,