fuji12345 commited on
Commit
4d59c26
·
verified ·
1 Parent(s): 02dddee

End of training

Browse files
Files changed (5) hide show
  1. README.md +21 -1
  2. all_results.json +13 -0
  3. eval_results.json +8 -0
  4. train_results.json +8 -0
  5. trainer_state.json +722 -0
README.md CHANGED
@@ -3,12 +3,29 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
 
 
6
  - generated_from_trainer
7
  datasets:
8
  - imagefolder
 
 
9
  model-index:
10
  - name: vit-base-anime-e100
11
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -17,6 +34,9 @@ should probably proofread and complete it, then remove this comment. -->
17
  # vit-base-anime-e100
18
 
19
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
 
 
 
20
 
21
  ## Model description
22
 
 
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
6
+ - image-classification
7
+ - vision
8
  - generated_from_trainer
9
  datasets:
10
  - imagefolder
11
+ metrics:
12
+ - accuracy
13
  model-index:
14
  - name: vit-base-anime-e100
15
+ results:
16
+ - task:
17
+ name: Image Classification
18
+ type: image-classification
19
+ dataset:
20
+ name: imagefolder
21
+ type: imagefolder
22
+ config: default
23
+ split: train
24
+ args: default
25
+ metrics:
26
+ - name: Accuracy
27
+ type: accuracy
28
+ value: 0.9804063860667634
29
  ---
30
 
31
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
  # vit-base-anime-e100
35
 
36
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
37
+ It achieves the following results on the evaluation set:
38
+ - Loss: 0.0757
39
+ - Accuracy: 0.9804
40
 
41
  ## Model description
42
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9804063860667634,
4
+ "eval_loss": 0.07571936398744583,
5
+ "eval_runtime": 57.28,
6
+ "eval_samples_per_second": 24.057,
7
+ "eval_steps_per_second": 3.02,
8
+ "total_flos": 6.049024709315052e+17,
9
+ "train_loss": 0.1105953664289879,
10
+ "train_runtime": 396.2703,
11
+ "train_samples_per_second": 19.699,
12
+ "train_steps_per_second": 2.463
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9804063860667634,
4
+ "eval_loss": 0.07571936398744583,
5
+ "eval_runtime": 57.28,
6
+ "eval_samples_per_second": 24.057,
7
+ "eval_steps_per_second": 3.02
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 6.049024709315052e+17,
4
+ "train_loss": 0.1105953664289879,
5
+ "train_runtime": 396.2703,
6
+ "train_samples_per_second": 19.699,
7
+ "train_steps_per_second": 2.463
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,722 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 976,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.010245901639344262,
14
+ "grad_norm": 2.1242284774780273,
15
+ "learning_rate": 4.9538934426229514e-05,
16
+ "loss": 0.6555,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.020491803278688523,
21
+ "grad_norm": 1.8792022466659546,
22
+ "learning_rate": 4.90266393442623e-05,
23
+ "loss": 0.56,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.030737704918032786,
28
+ "grad_norm": 2.08386492729187,
29
+ "learning_rate": 4.8514344262295086e-05,
30
+ "loss": 0.4735,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.040983606557377046,
35
+ "grad_norm": 4.066464424133301,
36
+ "learning_rate": 4.800204918032787e-05,
37
+ "loss": 0.3951,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.05122950819672131,
42
+ "grad_norm": 3.497546672821045,
43
+ "learning_rate": 4.748975409836066e-05,
44
+ "loss": 0.4149,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.06147540983606557,
49
+ "grad_norm": 1.3420535326004028,
50
+ "learning_rate": 4.6977459016393445e-05,
51
+ "loss": 0.1828,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.07172131147540983,
56
+ "grad_norm": 15.952448844909668,
57
+ "learning_rate": 4.646516393442623e-05,
58
+ "loss": 0.1975,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.08196721311475409,
63
+ "grad_norm": 0.9606868624687195,
64
+ "learning_rate": 4.595286885245902e-05,
65
+ "loss": 0.129,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.09221311475409837,
70
+ "grad_norm": 6.09004020690918,
71
+ "learning_rate": 4.5440573770491804e-05,
72
+ "loss": 0.1198,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.10245901639344263,
77
+ "grad_norm": 0.18688400089740753,
78
+ "learning_rate": 4.49282786885246e-05,
79
+ "loss": 0.1869,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.11270491803278689,
84
+ "grad_norm": 0.9326221942901611,
85
+ "learning_rate": 4.4415983606557376e-05,
86
+ "loss": 0.1971,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.12295081967213115,
91
+ "grad_norm": 5.729072093963623,
92
+ "learning_rate": 4.390368852459016e-05,
93
+ "loss": 0.1302,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.13319672131147542,
98
+ "grad_norm": 8.113044738769531,
99
+ "learning_rate": 4.339139344262295e-05,
100
+ "loss": 0.1809,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.14344262295081966,
105
+ "grad_norm": 4.81787109375,
106
+ "learning_rate": 4.287909836065574e-05,
107
+ "loss": 0.2034,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.15368852459016394,
112
+ "grad_norm": 0.1648479700088501,
113
+ "learning_rate": 4.236680327868853e-05,
114
+ "loss": 0.1142,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.16393442622950818,
119
+ "grad_norm": 8.948138236999512,
120
+ "learning_rate": 4.1854508196721314e-05,
121
+ "loss": 0.2157,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.17418032786885246,
126
+ "grad_norm": 0.9185877442359924,
127
+ "learning_rate": 4.13422131147541e-05,
128
+ "loss": 0.1465,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.18442622950819673,
133
+ "grad_norm": 17.619211196899414,
134
+ "learning_rate": 4.0829918032786886e-05,
135
+ "loss": 0.0859,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.19467213114754098,
140
+ "grad_norm": 0.09351029992103577,
141
+ "learning_rate": 4.031762295081967e-05,
142
+ "loss": 0.1218,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.20491803278688525,
147
+ "grad_norm": 0.09682345390319824,
148
+ "learning_rate": 3.980532786885246e-05,
149
+ "loss": 0.1478,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.2151639344262295,
154
+ "grad_norm": 7.680890083312988,
155
+ "learning_rate": 3.9293032786885245e-05,
156
+ "loss": 0.1526,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.22540983606557377,
161
+ "grad_norm": 0.10127613693475723,
162
+ "learning_rate": 3.878073770491804e-05,
163
+ "loss": 0.1307,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.23565573770491804,
168
+ "grad_norm": 0.1154651865363121,
169
+ "learning_rate": 3.8268442622950824e-05,
170
+ "loss": 0.0716,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.2459016393442623,
175
+ "grad_norm": 9.724747657775879,
176
+ "learning_rate": 3.775614754098361e-05,
177
+ "loss": 0.0654,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.25614754098360654,
182
+ "grad_norm": 0.11626848578453064,
183
+ "learning_rate": 3.724385245901639e-05,
184
+ "loss": 0.0677,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.26639344262295084,
189
+ "grad_norm": 0.1878882795572281,
190
+ "learning_rate": 3.673155737704918e-05,
191
+ "loss": 0.0664,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.2766393442622951,
196
+ "grad_norm": 1.5728862285614014,
197
+ "learning_rate": 3.621926229508197e-05,
198
+ "loss": 0.1312,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.28688524590163933,
203
+ "grad_norm": 16.9953670501709,
204
+ "learning_rate": 3.5706967213114755e-05,
205
+ "loss": 0.0881,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.29713114754098363,
210
+ "grad_norm": 0.6894469261169434,
211
+ "learning_rate": 3.519467213114754e-05,
212
+ "loss": 0.0983,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.3073770491803279,
217
+ "grad_norm": 10.199541091918945,
218
+ "learning_rate": 3.4682377049180334e-05,
219
+ "loss": 0.1839,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.3176229508196721,
224
+ "grad_norm": 0.07533033937215805,
225
+ "learning_rate": 3.417008196721312e-05,
226
+ "loss": 0.0689,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.32786885245901637,
231
+ "grad_norm": 0.1446436196565628,
232
+ "learning_rate": 3.36577868852459e-05,
233
+ "loss": 0.0669,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.33811475409836067,
238
+ "grad_norm": 0.07076974213123322,
239
+ "learning_rate": 3.3145491803278686e-05,
240
+ "loss": 0.0638,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.3483606557377049,
245
+ "grad_norm": 0.13626152276992798,
246
+ "learning_rate": 3.263319672131148e-05,
247
+ "loss": 0.0965,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.35860655737704916,
252
+ "grad_norm": 0.09723177552223206,
253
+ "learning_rate": 3.2120901639344265e-05,
254
+ "loss": 0.0836,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.36885245901639346,
259
+ "grad_norm": 0.0948810800909996,
260
+ "learning_rate": 3.160860655737705e-05,
261
+ "loss": 0.1774,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.3790983606557377,
266
+ "grad_norm": 0.17850804328918457,
267
+ "learning_rate": 3.109631147540984e-05,
268
+ "loss": 0.048,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.38934426229508196,
273
+ "grad_norm": 0.18865767121315002,
274
+ "learning_rate": 3.0584016393442624e-05,
275
+ "loss": 0.0181,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.39959016393442626,
280
+ "grad_norm": 0.05496485158801079,
281
+ "learning_rate": 3.007172131147541e-05,
282
+ "loss": 0.1625,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.4098360655737705,
287
+ "grad_norm": 0.39744535088539124,
288
+ "learning_rate": 2.9559426229508196e-05,
289
+ "loss": 0.0092,
290
+ "step": 400
291
+ },
292
+ {
293
+ "epoch": 0.42008196721311475,
294
+ "grad_norm": 0.33696502447128296,
295
+ "learning_rate": 2.9047131147540986e-05,
296
+ "loss": 0.096,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 0.430327868852459,
301
+ "grad_norm": 1.563730239868164,
302
+ "learning_rate": 2.8534836065573772e-05,
303
+ "loss": 0.1656,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 0.4405737704918033,
308
+ "grad_norm": 1.198880672454834,
309
+ "learning_rate": 2.802254098360656e-05,
310
+ "loss": 0.1266,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 0.45081967213114754,
315
+ "grad_norm": 0.12829680740833282,
316
+ "learning_rate": 2.7510245901639348e-05,
317
+ "loss": 0.0509,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 0.4610655737704918,
322
+ "grad_norm": 0.11783476918935776,
323
+ "learning_rate": 2.699795081967213e-05,
324
+ "loss": 0.0318,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 0.4713114754098361,
329
+ "grad_norm": 0.06775141507387161,
330
+ "learning_rate": 2.6485655737704917e-05,
331
+ "loss": 0.0595,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 0.48155737704918034,
336
+ "grad_norm": 0.0522720031440258,
337
+ "learning_rate": 2.5973360655737707e-05,
338
+ "loss": 0.0096,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 0.4918032786885246,
343
+ "grad_norm": 0.0782211571931839,
344
+ "learning_rate": 2.5461065573770493e-05,
345
+ "loss": 0.0938,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 0.5020491803278688,
350
+ "grad_norm": 0.07287899404764175,
351
+ "learning_rate": 2.494877049180328e-05,
352
+ "loss": 0.0084,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 0.5122950819672131,
357
+ "grad_norm": 0.09823473542928696,
358
+ "learning_rate": 2.4436475409836065e-05,
359
+ "loss": 0.0621,
360
+ "step": 500
361
+ },
362
+ {
363
+ "epoch": 0.5225409836065574,
364
+ "grad_norm": 4.648950099945068,
365
+ "learning_rate": 2.392418032786885e-05,
366
+ "loss": 0.1338,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 0.5327868852459017,
371
+ "grad_norm": 0.06128643825650215,
372
+ "learning_rate": 2.341188524590164e-05,
373
+ "loss": 0.1891,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 0.5430327868852459,
378
+ "grad_norm": 0.14310023188591003,
379
+ "learning_rate": 2.2899590163934427e-05,
380
+ "loss": 0.0636,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 0.5532786885245902,
385
+ "grad_norm": 0.10368062555789948,
386
+ "learning_rate": 2.2387295081967214e-05,
387
+ "loss": 0.1269,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 0.5635245901639344,
392
+ "grad_norm": 1.8076454401016235,
393
+ "learning_rate": 2.1875e-05,
394
+ "loss": 0.0999,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 0.5737704918032787,
399
+ "grad_norm": 0.11112195998430252,
400
+ "learning_rate": 2.136270491803279e-05,
401
+ "loss": 0.0599,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 0.5840163934426229,
406
+ "grad_norm": 0.09928332269191742,
407
+ "learning_rate": 2.0850409836065572e-05,
408
+ "loss": 0.0574,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 0.5942622950819673,
413
+ "grad_norm": 2.169119119644165,
414
+ "learning_rate": 2.0338114754098362e-05,
415
+ "loss": 0.1283,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 0.6045081967213115,
420
+ "grad_norm": 28.740341186523438,
421
+ "learning_rate": 1.9825819672131148e-05,
422
+ "loss": 0.122,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 0.6147540983606558,
427
+ "grad_norm": 0.2639801800251007,
428
+ "learning_rate": 1.9313524590163938e-05,
429
+ "loss": 0.0113,
430
+ "step": 600
431
+ },
432
+ {
433
+ "epoch": 0.625,
434
+ "grad_norm": 0.04671436920762062,
435
+ "learning_rate": 1.880122950819672e-05,
436
+ "loss": 0.0747,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 0.6352459016393442,
441
+ "grad_norm": 2.0250561237335205,
442
+ "learning_rate": 1.828893442622951e-05,
443
+ "loss": 0.0979,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 0.6454918032786885,
448
+ "grad_norm": 0.05262044072151184,
449
+ "learning_rate": 1.7776639344262296e-05,
450
+ "loss": 0.0063,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 0.6557377049180327,
455
+ "grad_norm": 5.259546279907227,
456
+ "learning_rate": 1.7264344262295082e-05,
457
+ "loss": 0.1173,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 0.6659836065573771,
462
+ "grad_norm": 0.04738146439194679,
463
+ "learning_rate": 1.675204918032787e-05,
464
+ "loss": 0.0063,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 0.6762295081967213,
469
+ "grad_norm": 0.04586351662874222,
470
+ "learning_rate": 1.6239754098360658e-05,
471
+ "loss": 0.1245,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 0.6864754098360656,
476
+ "grad_norm": 8.384217262268066,
477
+ "learning_rate": 1.572745901639344e-05,
478
+ "loss": 0.0712,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 0.6967213114754098,
483
+ "grad_norm": 0.053021881729364395,
484
+ "learning_rate": 1.5215163934426229e-05,
485
+ "loss": 0.0071,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 0.7069672131147541,
490
+ "grad_norm": 0.1454160213470459,
491
+ "learning_rate": 1.4702868852459017e-05,
492
+ "loss": 0.0644,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 0.7172131147540983,
497
+ "grad_norm": 0.09849333763122559,
498
+ "learning_rate": 1.4190573770491805e-05,
499
+ "loss": 0.0221,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 0.7274590163934426,
504
+ "grad_norm": 0.056653790175914764,
505
+ "learning_rate": 1.367827868852459e-05,
506
+ "loss": 0.1279,
507
+ "step": 710
508
+ },
509
+ {
510
+ "epoch": 0.7377049180327869,
511
+ "grad_norm": 0.393472284078598,
512
+ "learning_rate": 1.3165983606557377e-05,
513
+ "loss": 0.0057,
514
+ "step": 720
515
+ },
516
+ {
517
+ "epoch": 0.7479508196721312,
518
+ "grad_norm": 0.5363253355026245,
519
+ "learning_rate": 1.2653688524590165e-05,
520
+ "loss": 0.0065,
521
+ "step": 730
522
+ },
523
+ {
524
+ "epoch": 0.7581967213114754,
525
+ "grad_norm": 0.046929918229579926,
526
+ "learning_rate": 1.2141393442622951e-05,
527
+ "loss": 0.1832,
528
+ "step": 740
529
+ },
530
+ {
531
+ "epoch": 0.7684426229508197,
532
+ "grad_norm": 0.050630487501621246,
533
+ "learning_rate": 1.1629098360655737e-05,
534
+ "loss": 0.0058,
535
+ "step": 750
536
+ },
537
+ {
538
+ "epoch": 0.7786885245901639,
539
+ "grad_norm": 0.042579714208841324,
540
+ "learning_rate": 1.1116803278688525e-05,
541
+ "loss": 0.0081,
542
+ "step": 760
543
+ },
544
+ {
545
+ "epoch": 0.7889344262295082,
546
+ "grad_norm": 0.04666388779878616,
547
+ "learning_rate": 1.0604508196721312e-05,
548
+ "loss": 0.0088,
549
+ "step": 770
550
+ },
551
+ {
552
+ "epoch": 0.7991803278688525,
553
+ "grad_norm": 0.06457391381263733,
554
+ "learning_rate": 1.0092213114754098e-05,
555
+ "loss": 0.0062,
556
+ "step": 780
557
+ },
558
+ {
559
+ "epoch": 0.8094262295081968,
560
+ "grad_norm": 0.05809812247753143,
561
+ "learning_rate": 9.579918032786886e-06,
562
+ "loss": 0.0647,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 0.819672131147541,
567
+ "grad_norm": 0.06629171967506409,
568
+ "learning_rate": 9.067622950819672e-06,
569
+ "loss": 0.0058,
570
+ "step": 800
571
+ },
572
+ {
573
+ "epoch": 0.8299180327868853,
574
+ "grad_norm": 0.050829801708459854,
575
+ "learning_rate": 8.55532786885246e-06,
576
+ "loss": 0.1274,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 0.8401639344262295,
581
+ "grad_norm": 0.05237673223018646,
582
+ "learning_rate": 8.043032786885246e-06,
583
+ "loss": 0.1201,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 0.8504098360655737,
588
+ "grad_norm": 0.04731244966387749,
589
+ "learning_rate": 7.530737704918032e-06,
590
+ "loss": 0.0356,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 0.860655737704918,
595
+ "grad_norm": 0.04225335642695427,
596
+ "learning_rate": 7.01844262295082e-06,
597
+ "loss": 0.1301,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 0.8709016393442623,
602
+ "grad_norm": 0.08855666220188141,
603
+ "learning_rate": 6.506147540983606e-06,
604
+ "loss": 0.0259,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 0.8811475409836066,
609
+ "grad_norm": 0.0881812572479248,
610
+ "learning_rate": 5.993852459016393e-06,
611
+ "loss": 0.0407,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 0.8913934426229508,
616
+ "grad_norm": 0.04759250208735466,
617
+ "learning_rate": 5.4815573770491805e-06,
618
+ "loss": 0.2223,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 0.9016393442622951,
623
+ "grad_norm": 0.08196867257356644,
624
+ "learning_rate": 4.9692622950819675e-06,
625
+ "loss": 0.0079,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 0.9118852459016393,
630
+ "grad_norm": 0.1053190603852272,
631
+ "learning_rate": 4.4569672131147546e-06,
632
+ "loss": 0.1755,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 0.9221311475409836,
637
+ "grad_norm": 0.10274066776037216,
638
+ "learning_rate": 3.944672131147542e-06,
639
+ "loss": 0.0128,
640
+ "step": 900
641
+ },
642
+ {
643
+ "epoch": 0.9323770491803278,
644
+ "grad_norm": 0.08095156401395798,
645
+ "learning_rate": 3.4323770491803283e-06,
646
+ "loss": 0.0778,
647
+ "step": 910
648
+ },
649
+ {
650
+ "epoch": 0.9426229508196722,
651
+ "grad_norm": 0.07761359214782715,
652
+ "learning_rate": 2.920081967213115e-06,
653
+ "loss": 0.0528,
654
+ "step": 920
655
+ },
656
+ {
657
+ "epoch": 0.9528688524590164,
658
+ "grad_norm": 0.24262677133083344,
659
+ "learning_rate": 2.4077868852459015e-06,
660
+ "loss": 0.091,
661
+ "step": 930
662
+ },
663
+ {
664
+ "epoch": 0.9631147540983607,
665
+ "grad_norm": 0.09831801801919937,
666
+ "learning_rate": 1.8954918032786886e-06,
667
+ "loss": 0.0617,
668
+ "step": 940
669
+ },
670
+ {
671
+ "epoch": 0.9733606557377049,
672
+ "grad_norm": 0.7775574326515198,
673
+ "learning_rate": 1.3831967213114754e-06,
674
+ "loss": 0.0262,
675
+ "step": 950
676
+ },
677
+ {
678
+ "epoch": 0.9836065573770492,
679
+ "grad_norm": 0.285656213760376,
680
+ "learning_rate": 8.709016393442623e-07,
681
+ "loss": 0.0083,
682
+ "step": 960
683
+ },
684
+ {
685
+ "epoch": 0.9938524590163934,
686
+ "grad_norm": 0.04368609935045242,
687
+ "learning_rate": 3.586065573770492e-07,
688
+ "loss": 0.1532,
689
+ "step": 970
690
+ },
691
+ {
692
+ "epoch": 1.0,
693
+ "step": 976,
694
+ "total_flos": 6.049024709315052e+17,
695
+ "train_loss": 0.1105953664289879,
696
+ "train_runtime": 396.2703,
697
+ "train_samples_per_second": 19.699,
698
+ "train_steps_per_second": 2.463
699
+ }
700
+ ],
701
+ "logging_steps": 10,
702
+ "max_steps": 976,
703
+ "num_input_tokens_seen": 0,
704
+ "num_train_epochs": 1,
705
+ "save_steps": 500,
706
+ "stateful_callbacks": {
707
+ "TrainerControl": {
708
+ "args": {
709
+ "should_epoch_stop": false,
710
+ "should_evaluate": false,
711
+ "should_log": false,
712
+ "should_save": true,
713
+ "should_training_stop": true
714
+ },
715
+ "attributes": {}
716
+ }
717
+ },
718
+ "total_flos": 6.049024709315052e+17,
719
+ "train_batch_size": 8,
720
+ "trial_name": null,
721
+ "trial_params": null
722
+ }