End of training

Browse files

Files changed (5) hide show

README.md +21 -1
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +722 -0

README.md CHANGED Viewed

@@ -3,12 +3,29 @@ library_name: transformers
 license: apache-2.0
 base_model: google/vit-base-patch16-224-in21k
 tags:
 - generated_from_trainer
 datasets:
 - imagefolder
 model-index:
 - name: vit-base-anime-e100
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -17,6 +34,9 @@ should probably proofread and complete it, then remove this comment. -->
 # vit-base-anime-e100
 This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
 ## Model description

 license: apache-2.0
 base_model: google/vit-base-patch16-224-in21k
 tags:
+- image-classification
+- vision
 - generated_from_trainer
 datasets:
 - imagefolder
+metrics:
+- accuracy
 model-index:
 - name: vit-base-anime-e100
+  results:
+  - task:
+      name: Image Classification
+      type: image-classification
+    dataset:
+      name: imagefolder
+      type: imagefolder
+      config: default
+      split: train
+      args: default
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.9804063860667634
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # vit-base-anime-e100
 This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0757
+- Accuracy: 0.9804
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.0,
+    "eval_accuracy": 0.9804063860667634,
+    "eval_loss": 0.07571936398744583,
+    "eval_runtime": 57.28,
+    "eval_samples_per_second": 24.057,
+    "eval_steps_per_second": 3.02,
+    "total_flos": 6.049024709315052e+17,
+    "train_loss": 0.1105953664289879,
+    "train_runtime": 396.2703,
+    "train_samples_per_second": 19.699,
+    "train_steps_per_second": 2.463
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "eval_accuracy": 0.9804063860667634,
+    "eval_loss": 0.07571936398744583,
+    "eval_runtime": 57.28,
+    "eval_samples_per_second": 24.057,
+    "eval_steps_per_second": 3.02
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 6.049024709315052e+17,
+    "train_loss": 0.1105953664289879,
+    "train_runtime": 396.2703,
+    "train_samples_per_second": 19.699,
+    "train_steps_per_second": 2.463
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,722 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 976,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.010245901639344262,
+      "grad_norm": 2.1242284774780273,
+      "learning_rate": 4.9538934426229514e-05,
+      "loss": 0.6555,
+      "step": 10
+    },
+    {
+      "epoch": 0.020491803278688523,
+      "grad_norm": 1.8792022466659546,
+      "learning_rate": 4.90266393442623e-05,
+      "loss": 0.56,
+      "step": 20
+    },
+    {
+      "epoch": 0.030737704918032786,
+      "grad_norm": 2.08386492729187,
+      "learning_rate": 4.8514344262295086e-05,
+      "loss": 0.4735,
+      "step": 30
+    },
+    {
+      "epoch": 0.040983606557377046,
+      "grad_norm": 4.066464424133301,
+      "learning_rate": 4.800204918032787e-05,
+      "loss": 0.3951,
+      "step": 40
+    },
+    {
+      "epoch": 0.05122950819672131,
+      "grad_norm": 3.497546672821045,
+      "learning_rate": 4.748975409836066e-05,
+      "loss": 0.4149,
+      "step": 50
+    },
+    {
+      "epoch": 0.06147540983606557,
+      "grad_norm": 1.3420535326004028,
+      "learning_rate": 4.6977459016393445e-05,
+      "loss": 0.1828,
+      "step": 60
+    },
+    {
+      "epoch": 0.07172131147540983,
+      "grad_norm": 15.952448844909668,
+      "learning_rate": 4.646516393442623e-05,
+      "loss": 0.1975,
+      "step": 70
+    },
+    {
+      "epoch": 0.08196721311475409,
+      "grad_norm": 0.9606868624687195,
+      "learning_rate": 4.595286885245902e-05,
+      "loss": 0.129,
+      "step": 80
+    },
+    {
+      "epoch": 0.09221311475409837,
+      "grad_norm": 6.09004020690918,
+      "learning_rate": 4.5440573770491804e-05,
+      "loss": 0.1198,
+      "step": 90
+    },
+    {
+      "epoch": 0.10245901639344263,
+      "grad_norm": 0.18688400089740753,
+      "learning_rate": 4.49282786885246e-05,
+      "loss": 0.1869,
+      "step": 100
+    },
+    {
+      "epoch": 0.11270491803278689,
+      "grad_norm": 0.9326221942901611,
+      "learning_rate": 4.4415983606557376e-05,
+      "loss": 0.1971,
+      "step": 110
+    },
+    {
+      "epoch": 0.12295081967213115,
+      "grad_norm": 5.729072093963623,
+      "learning_rate": 4.390368852459016e-05,
+      "loss": 0.1302,
+      "step": 120
+    },
+    {
+      "epoch": 0.13319672131147542,
+      "grad_norm": 8.113044738769531,
+      "learning_rate": 4.339139344262295e-05,
+      "loss": 0.1809,
+      "step": 130
+    },
+    {
+      "epoch": 0.14344262295081966,
+      "grad_norm": 4.81787109375,
+      "learning_rate": 4.287909836065574e-05,
+      "loss": 0.2034,
+      "step": 140
+    },
+    {
+      "epoch": 0.15368852459016394,
+      "grad_norm": 0.1648479700088501,
+      "learning_rate": 4.236680327868853e-05,
+      "loss": 0.1142,
+      "step": 150
+    },
+    {
+      "epoch": 0.16393442622950818,
+      "grad_norm": 8.948138236999512,
+      "learning_rate": 4.1854508196721314e-05,
+      "loss": 0.2157,
+      "step": 160
+    },
+    {
+      "epoch": 0.17418032786885246,
+      "grad_norm": 0.9185877442359924,
+      "learning_rate": 4.13422131147541e-05,
+      "loss": 0.1465,
+      "step": 170
+    },
+    {
+      "epoch": 0.18442622950819673,
+      "grad_norm": 17.619211196899414,
+      "learning_rate": 4.0829918032786886e-05,
+      "loss": 0.0859,
+      "step": 180
+    },
+    {
+      "epoch": 0.19467213114754098,
+      "grad_norm": 0.09351029992103577,
+      "learning_rate": 4.031762295081967e-05,
+      "loss": 0.1218,
+      "step": 190
+    },
+    {
+      "epoch": 0.20491803278688525,
+      "grad_norm": 0.09682345390319824,
+      "learning_rate": 3.980532786885246e-05,
+      "loss": 0.1478,
+      "step": 200
+    },
+    {
+      "epoch": 0.2151639344262295,
+      "grad_norm": 7.680890083312988,
+      "learning_rate": 3.9293032786885245e-05,
+      "loss": 0.1526,
+      "step": 210
+    },
+    {
+      "epoch": 0.22540983606557377,
+      "grad_norm": 0.10127613693475723,
+      "learning_rate": 3.878073770491804e-05,
+      "loss": 0.1307,
+      "step": 220
+    },
+    {
+      "epoch": 0.23565573770491804,
+      "grad_norm": 0.1154651865363121,
+      "learning_rate": 3.8268442622950824e-05,
+      "loss": 0.0716,
+      "step": 230
+    },
+    {
+      "epoch": 0.2459016393442623,
+      "grad_norm": 9.724747657775879,
+      "learning_rate": 3.775614754098361e-05,
+      "loss": 0.0654,
+      "step": 240
+    },
+    {
+      "epoch": 0.25614754098360654,
+      "grad_norm": 0.11626848578453064,
+      "learning_rate": 3.724385245901639e-05,
+      "loss": 0.0677,
+      "step": 250
+    },
+    {
+      "epoch": 0.26639344262295084,
+      "grad_norm": 0.1878882795572281,
+      "learning_rate": 3.673155737704918e-05,
+      "loss": 0.0664,
+      "step": 260
+    },
+    {
+      "epoch": 0.2766393442622951,
+      "grad_norm": 1.5728862285614014,
+      "learning_rate": 3.621926229508197e-05,
+      "loss": 0.1312,
+      "step": 270
+    },
+    {
+      "epoch": 0.28688524590163933,
+      "grad_norm": 16.9953670501709,
+      "learning_rate": 3.5706967213114755e-05,
+      "loss": 0.0881,
+      "step": 280
+    },
+    {
+      "epoch": 0.29713114754098363,
+      "grad_norm": 0.6894469261169434,
+      "learning_rate": 3.519467213114754e-05,
+      "loss": 0.0983,
+      "step": 290
+    },
+    {
+      "epoch": 0.3073770491803279,
+      "grad_norm": 10.199541091918945,
+      "learning_rate": 3.4682377049180334e-05,
+      "loss": 0.1839,
+      "step": 300
+    },
+    {
+      "epoch": 0.3176229508196721,
+      "grad_norm": 0.07533033937215805,
+      "learning_rate": 3.417008196721312e-05,
+      "loss": 0.0689,
+      "step": 310
+    },
+    {
+      "epoch": 0.32786885245901637,
+      "grad_norm": 0.1446436196565628,
+      "learning_rate": 3.36577868852459e-05,
+      "loss": 0.0669,
+      "step": 320
+    },
+    {
+      "epoch": 0.33811475409836067,
+      "grad_norm": 0.07076974213123322,
+      "learning_rate": 3.3145491803278686e-05,
+      "loss": 0.0638,
+      "step": 330
+    },
+    {
+      "epoch": 0.3483606557377049,
+      "grad_norm": 0.13626152276992798,
+      "learning_rate": 3.263319672131148e-05,
+      "loss": 0.0965,
+      "step": 340
+    },
+    {
+      "epoch": 0.35860655737704916,
+      "grad_norm": 0.09723177552223206,
+      "learning_rate": 3.2120901639344265e-05,
+      "loss": 0.0836,
+      "step": 350
+    },
+    {
+      "epoch": 0.36885245901639346,
+      "grad_norm": 0.0948810800909996,
+      "learning_rate": 3.160860655737705e-05,
+      "loss": 0.1774,
+      "step": 360
+    },
+    {
+      "epoch": 0.3790983606557377,
+      "grad_norm": 0.17850804328918457,
+      "learning_rate": 3.109631147540984e-05,
+      "loss": 0.048,
+      "step": 370
+    },
+    {
+      "epoch": 0.38934426229508196,
+      "grad_norm": 0.18865767121315002,
+      "learning_rate": 3.0584016393442624e-05,
+      "loss": 0.0181,
+      "step": 380
+    },
+    {
+      "epoch": 0.39959016393442626,
+      "grad_norm": 0.05496485158801079,
+      "learning_rate": 3.007172131147541e-05,
+      "loss": 0.1625,
+      "step": 390
+    },
+    {
+      "epoch": 0.4098360655737705,
+      "grad_norm": 0.39744535088539124,
+      "learning_rate": 2.9559426229508196e-05,
+      "loss": 0.0092,
+      "step": 400
+    },
+    {
+      "epoch": 0.42008196721311475,
+      "grad_norm": 0.33696502447128296,
+      "learning_rate": 2.9047131147540986e-05,
+      "loss": 0.096,
+      "step": 410
+    },
+    {
+      "epoch": 0.430327868852459,
+      "grad_norm": 1.563730239868164,
+      "learning_rate": 2.8534836065573772e-05,
+      "loss": 0.1656,
+      "step": 420
+    },
+    {
+      "epoch": 0.4405737704918033,
+      "grad_norm": 1.198880672454834,
+      "learning_rate": 2.802254098360656e-05,
+      "loss": 0.1266,
+      "step": 430
+    },
+    {
+      "epoch": 0.45081967213114754,
+      "grad_norm": 0.12829680740833282,
+      "learning_rate": 2.7510245901639348e-05,
+      "loss": 0.0509,
+      "step": 440
+    },
+    {
+      "epoch": 0.4610655737704918,
+      "grad_norm": 0.11783476918935776,
+      "learning_rate": 2.699795081967213e-05,
+      "loss": 0.0318,
+      "step": 450
+    },
+    {
+      "epoch": 0.4713114754098361,
+      "grad_norm": 0.06775141507387161,
+      "learning_rate": 2.6485655737704917e-05,
+      "loss": 0.0595,
+      "step": 460
+    },
+    {
+      "epoch": 0.48155737704918034,
+      "grad_norm": 0.0522720031440258,
+      "learning_rate": 2.5973360655737707e-05,
+      "loss": 0.0096,
+      "step": 470
+    },
+    {
+      "epoch": 0.4918032786885246,
+      "grad_norm": 0.0782211571931839,
+      "learning_rate": 2.5461065573770493e-05,
+      "loss": 0.0938,
+      "step": 480
+    },
+    {
+      "epoch": 0.5020491803278688,
+      "grad_norm": 0.07287899404764175,
+      "learning_rate": 2.494877049180328e-05,
+      "loss": 0.0084,
+      "step": 490
+    },
+    {
+      "epoch": 0.5122950819672131,
+      "grad_norm": 0.09823473542928696,
+      "learning_rate": 2.4436475409836065e-05,
+      "loss": 0.0621,
+      "step": 500
+    },
+    {
+      "epoch": 0.5225409836065574,
+      "grad_norm": 4.648950099945068,
+      "learning_rate": 2.392418032786885e-05,
+      "loss": 0.1338,
+      "step": 510
+    },
+    {
+      "epoch": 0.5327868852459017,
+      "grad_norm": 0.06128643825650215,
+      "learning_rate": 2.341188524590164e-05,
+      "loss": 0.1891,
+      "step": 520
+    },
+    {
+      "epoch": 0.5430327868852459,
+      "grad_norm": 0.14310023188591003,
+      "learning_rate": 2.2899590163934427e-05,
+      "loss": 0.0636,
+      "step": 530
+    },
+    {
+      "epoch": 0.5532786885245902,
+      "grad_norm": 0.10368062555789948,
+      "learning_rate": 2.2387295081967214e-05,
+      "loss": 0.1269,
+      "step": 540
+    },
+    {
+      "epoch": 0.5635245901639344,
+      "grad_norm": 1.8076454401016235,
+      "learning_rate": 2.1875e-05,
+      "loss": 0.0999,
+      "step": 550
+    },
+    {
+      "epoch": 0.5737704918032787,
+      "grad_norm": 0.11112195998430252,
+      "learning_rate": 2.136270491803279e-05,
+      "loss": 0.0599,
+      "step": 560
+    },
+    {
+      "epoch": 0.5840163934426229,
+      "grad_norm": 0.09928332269191742,
+      "learning_rate": 2.0850409836065572e-05,
+      "loss": 0.0574,
+      "step": 570
+    },
+    {
+      "epoch": 0.5942622950819673,
+      "grad_norm": 2.169119119644165,
+      "learning_rate": 2.0338114754098362e-05,
+      "loss": 0.1283,
+      "step": 580
+    },
+    {
+      "epoch": 0.6045081967213115,
+      "grad_norm": 28.740341186523438,
+      "learning_rate": 1.9825819672131148e-05,
+      "loss": 0.122,
+      "step": 590
+    },
+    {
+      "epoch": 0.6147540983606558,
+      "grad_norm": 0.2639801800251007,
+      "learning_rate": 1.9313524590163938e-05,
+      "loss": 0.0113,
+      "step": 600
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 0.04671436920762062,
+      "learning_rate": 1.880122950819672e-05,
+      "loss": 0.0747,
+      "step": 610
+    },
+    {
+      "epoch": 0.6352459016393442,
+      "grad_norm": 2.0250561237335205,
+      "learning_rate": 1.828893442622951e-05,
+      "loss": 0.0979,
+      "step": 620
+    },
+    {
+      "epoch": 0.6454918032786885,
+      "grad_norm": 0.05262044072151184,
+      "learning_rate": 1.7776639344262296e-05,
+      "loss": 0.0063,
+      "step": 630
+    },
+    {
+      "epoch": 0.6557377049180327,
+      "grad_norm": 5.259546279907227,
+      "learning_rate": 1.7264344262295082e-05,
+      "loss": 0.1173,
+      "step": 640
+    },
+    {
+      "epoch": 0.6659836065573771,
+      "grad_norm": 0.04738146439194679,
+      "learning_rate": 1.675204918032787e-05,
+      "loss": 0.0063,
+      "step": 650
+    },
+    {
+      "epoch": 0.6762295081967213,
+      "grad_norm": 0.04586351662874222,
+      "learning_rate": 1.6239754098360658e-05,
+      "loss": 0.1245,
+      "step": 660
+    },
+    {
+      "epoch": 0.6864754098360656,
+      "grad_norm": 8.384217262268066,
+      "learning_rate": 1.572745901639344e-05,
+      "loss": 0.0712,
+      "step": 670
+    },
+    {
+      "epoch": 0.6967213114754098,
+      "grad_norm": 0.053021881729364395,
+      "learning_rate": 1.5215163934426229e-05,
+      "loss": 0.0071,
+      "step": 680
+    },
+    {
+      "epoch": 0.7069672131147541,
+      "grad_norm": 0.1454160213470459,
+      "learning_rate": 1.4702868852459017e-05,
+      "loss": 0.0644,
+      "step": 690
+    },
+    {
+      "epoch": 0.7172131147540983,
+      "grad_norm": 0.09849333763122559,
+      "learning_rate": 1.4190573770491805e-05,
+      "loss": 0.0221,
+      "step": 700
+    },
+    {
+      "epoch": 0.7274590163934426,
+      "grad_norm": 0.056653790175914764,
+      "learning_rate": 1.367827868852459e-05,
+      "loss": 0.1279,
+      "step": 710
+    },
+    {
+      "epoch": 0.7377049180327869,
+      "grad_norm": 0.393472284078598,
+      "learning_rate": 1.3165983606557377e-05,
+      "loss": 0.0057,
+      "step": 720
+    },
+    {
+      "epoch": 0.7479508196721312,
+      "grad_norm": 0.5363253355026245,
+      "learning_rate": 1.2653688524590165e-05,
+      "loss": 0.0065,
+      "step": 730
+    },
+    {
+      "epoch": 0.7581967213114754,
+      "grad_norm": 0.046929918229579926,
+      "learning_rate": 1.2141393442622951e-05,
+      "loss": 0.1832,
+      "step": 740
+    },
+    {
+      "epoch": 0.7684426229508197,
+      "grad_norm": 0.050630487501621246,
+      "learning_rate": 1.1629098360655737e-05,
+      "loss": 0.0058,
+      "step": 750
+    },
+    {
+      "epoch": 0.7786885245901639,
+      "grad_norm": 0.042579714208841324,
+      "learning_rate": 1.1116803278688525e-05,
+      "loss": 0.0081,
+      "step": 760
+    },
+    {
+      "epoch": 0.7889344262295082,
+      "grad_norm": 0.04666388779878616,
+      "learning_rate": 1.0604508196721312e-05,
+      "loss": 0.0088,
+      "step": 770
+    },
+    {
+      "epoch": 0.7991803278688525,
+      "grad_norm": 0.06457391381263733,
+      "learning_rate": 1.0092213114754098e-05,
+      "loss": 0.0062,
+      "step": 780
+    },
+    {
+      "epoch": 0.8094262295081968,
+      "grad_norm": 0.05809812247753143,
+      "learning_rate": 9.579918032786886e-06,
+      "loss": 0.0647,
+      "step": 790
+    },
+    {
+      "epoch": 0.819672131147541,
+      "grad_norm": 0.06629171967506409,
+      "learning_rate": 9.067622950819672e-06,
+      "loss": 0.0058,
+      "step": 800
+    },
+    {
+      "epoch": 0.8299180327868853,
+      "grad_norm": 0.050829801708459854,
+      "learning_rate": 8.55532786885246e-06,
+      "loss": 0.1274,
+      "step": 810
+    },
+    {
+      "epoch": 0.8401639344262295,
+      "grad_norm": 0.05237673223018646,
+      "learning_rate": 8.043032786885246e-06,
+      "loss": 0.1201,
+      "step": 820
+    },
+    {
+      "epoch": 0.8504098360655737,
+      "grad_norm": 0.04731244966387749,
+      "learning_rate": 7.530737704918032e-06,
+      "loss": 0.0356,
+      "step": 830
+    },
+    {
+      "epoch": 0.860655737704918,
+      "grad_norm": 0.04225335642695427,
+      "learning_rate": 7.01844262295082e-06,
+      "loss": 0.1301,
+      "step": 840
+    },
+    {
+      "epoch": 0.8709016393442623,
+      "grad_norm": 0.08855666220188141,
+      "learning_rate": 6.506147540983606e-06,
+      "loss": 0.0259,
+      "step": 850
+    },
+    {
+      "epoch": 0.8811475409836066,
+      "grad_norm": 0.0881812572479248,
+      "learning_rate": 5.993852459016393e-06,
+      "loss": 0.0407,
+      "step": 860
+    },
+    {
+      "epoch": 0.8913934426229508,
+      "grad_norm": 0.04759250208735466,
+      "learning_rate": 5.4815573770491805e-06,
+      "loss": 0.2223,
+      "step": 870
+    },
+    {
+      "epoch": 0.9016393442622951,
+      "grad_norm": 0.08196867257356644,
+      "learning_rate": 4.9692622950819675e-06,
+      "loss": 0.0079,
+      "step": 880
+    },
+    {
+      "epoch": 0.9118852459016393,
+      "grad_norm": 0.1053190603852272,
+      "learning_rate": 4.4569672131147546e-06,
+      "loss": 0.1755,
+      "step": 890
+    },
+    {
+      "epoch": 0.9221311475409836,
+      "grad_norm": 0.10274066776037216,
+      "learning_rate": 3.944672131147542e-06,
+      "loss": 0.0128,
+      "step": 900
+    },
+    {
+      "epoch": 0.9323770491803278,
+      "grad_norm": 0.08095156401395798,
+      "learning_rate": 3.4323770491803283e-06,
+      "loss": 0.0778,
+      "step": 910
+    },
+    {
+      "epoch": 0.9426229508196722,
+      "grad_norm": 0.07761359214782715,
+      "learning_rate": 2.920081967213115e-06,
+      "loss": 0.0528,
+      "step": 920
+    },
+    {
+      "epoch": 0.9528688524590164,
+      "grad_norm": 0.24262677133083344,
+      "learning_rate": 2.4077868852459015e-06,
+      "loss": 0.091,
+      "step": 930
+    },
+    {
+      "epoch": 0.9631147540983607,
+      "grad_norm": 0.09831801801919937,
+      "learning_rate": 1.8954918032786886e-06,
+      "loss": 0.0617,
+      "step": 940
+    },
+    {
+      "epoch": 0.9733606557377049,
+      "grad_norm": 0.7775574326515198,
+      "learning_rate": 1.3831967213114754e-06,
+      "loss": 0.0262,
+      "step": 950
+    },
+    {
+      "epoch": 0.9836065573770492,
+      "grad_norm": 0.285656213760376,
+      "learning_rate": 8.709016393442623e-07,
+      "loss": 0.0083,
+      "step": 960
+    },
+    {
+      "epoch": 0.9938524590163934,
+      "grad_norm": 0.04368609935045242,
+      "learning_rate": 3.586065573770492e-07,
+      "loss": 0.1532,
+      "step": 970
+    },
+    {
+      "epoch": 1.0,
+      "step": 976,
+      "total_flos": 6.049024709315052e+17,
+      "train_loss": 0.1105953664289879,
+      "train_runtime": 396.2703,
+      "train_samples_per_second": 19.699,
+      "train_steps_per_second": 2.463
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 976,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.049024709315052e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}