End of training

Browse files

Files changed (6) hide show

README.md +5 -3
all_results.json +11 -11
eval_results.json +6 -6
runs/May23_21-28-08_Edus-MacBook-Pro.local/events.out.tfevents.1716496939.Edus-MacBook-Pro.local +3 -0
train_results.json +6 -6
trainer_state.json +328 -555

README.md CHANGED Viewed

@@ -2,6 +2,8 @@
 license: apache-2.0
 base_model: google/vit-base-patch16-224-in21k
 tags:
 - generated_from_trainer
 metrics:
 - accuracy
@@ -15,10 +17,10 @@ should probably proofread and complete it, then remove this comment. -->
 # vit-base-beans
-This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0797
-- Accuracy: 0.9774
 ## Model description

 license: apache-2.0
 base_model: google/vit-base-patch16-224-in21k
 tags:
+- image-classification
+- vision
 - generated_from_trainer
 metrics:
 - accuracy
 # vit-base-beans
+This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the beans dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0627
+- Accuracy: 0.9925
 ## Model description

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 2.0,
-    "eval_accuracy": 0.9699248120300752,
-    "eval_loss": 0.10282660275697708,
-    "eval_runtime": 9.7767,
-    "eval_samples_per_second": 13.604,
-    "eval_steps_per_second": 6.853,
-    "total_flos": 1.602548708238213e+17,
-    "train_loss": 0.2494486983959172,
-    "train_runtime": 527.8899,
-    "train_samples_per_second": 3.917,
-    "train_steps_per_second": 1.959
 }

 {
+    "epoch": 5.0,
+    "eval_accuracy": 0.9924812030075187,
+    "eval_loss": 0.06271149218082428,
+    "eval_runtime": 2.0152,
+    "eval_samples_per_second": 66.0,
+    "eval_steps_per_second": 8.436,
+    "total_flos": 4.006371770595533e+17,
+    "train_loss": 0.2178187003502479,
+    "train_runtime": 261.0621,
+    "train_samples_per_second": 19.804,
+    "train_steps_per_second": 2.49
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 2.0,
-    "eval_accuracy": 0.9699248120300752,
-    "eval_loss": 0.10282660275697708,
-    "eval_runtime": 9.7767,
-    "eval_samples_per_second": 13.604,
-    "eval_steps_per_second": 6.853
 }

 {
+    "epoch": 5.0,
+    "eval_accuracy": 0.9924812030075187,
+    "eval_loss": 0.06271149218082428,
+    "eval_runtime": 2.0152,
+    "eval_samples_per_second": 66.0,
+    "eval_steps_per_second": 8.436
 }

runs/May23_21-28-08_Edus-MacBook-Pro.local/events.out.tfevents.1716496939.Edus-MacBook-Pro.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97aa9bed90c56b9f4cce0a647462eb252cfecba65d0c6d9540025841fc42bae4
+size 253

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 2.0,
-    "total_flos": 1.602548708238213e+17,
-    "train_loss": 0.2494486983959172,
-    "train_runtime": 527.8899,
-    "train_samples_per_second": 3.917,
-    "train_steps_per_second": 1.959
 }

 {
+    "epoch": 5.0,
+    "total_flos": 4.006371770595533e+17,
+    "train_loss": 0.2178187003502479,
+    "train_runtime": 261.0621,
+    "train_samples_per_second": 19.804,
+    "train_steps_per_second": 2.49
 }

trainer_state.json CHANGED Viewed

@@ -1,769 +1,542 @@
 {
-  "best_metric": 0.10282659530639648,
-  "best_model_checkpoint": "./beans_outputs/checkpoint-517",
-  "epoch": 2.0,
   "eval_steps": 500,
-  "global_step": 1034,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.019342359767891684,
-      "grad_norm": 3.192776918411255,
-      "learning_rate": 1.9806576402321086e-05,
-      "loss": 1.0424,
       "step": 10
     },
     {
-      "epoch": 0.03868471953578337,
-      "grad_norm": 4.558914661407471,
-      "learning_rate": 1.961315280464217e-05,
-      "loss": 0.9602,
       "step": 20
     },
     {
-      "epoch": 0.058027079303675046,
-      "grad_norm": 4.579104423522949,
-      "learning_rate": 1.941972920696325e-05,
-      "loss": 0.8902,
       "step": 30
     },
     {
-      "epoch": 0.07736943907156674,
-      "grad_norm": 7.477652072906494,
-      "learning_rate": 1.9226305609284334e-05,
-      "loss": 0.9063,
       "step": 40
     },
     {
-      "epoch": 0.09671179883945841,
-      "grad_norm": 5.673726558685303,
-      "learning_rate": 1.9032882011605418e-05,
-      "loss": 0.8961,
       "step": 50
     },
     {
-      "epoch": 0.11605415860735009,
-      "grad_norm": 4.80955696105957,
-      "learning_rate": 1.8839458413926502e-05,
-      "loss": 0.8157,
       "step": 60
     },
     {
-      "epoch": 0.13539651837524178,
-      "grad_norm": 2.8267390727996826,
-      "learning_rate": 1.8646034816247586e-05,
-      "loss": 0.7936,
       "step": 70
     },
     {
-      "epoch": 0.15473887814313347,
-      "grad_norm": 3.271488904953003,
-      "learning_rate": 1.8452611218568667e-05,
-      "loss": 0.6614,
       "step": 80
     },
     {
-      "epoch": 0.17408123791102514,
-      "grad_norm": 3.2059671878814697,
-      "learning_rate": 1.825918762088975e-05,
-      "loss": 0.616,
       "step": 90
     },
     {
-      "epoch": 0.19342359767891681,
-      "grad_norm": 4.123504161834717,
-      "learning_rate": 1.806576402321083e-05,
-      "loss": 0.6023,
       "step": 100
     },
     {
-      "epoch": 0.2127659574468085,
-      "grad_norm": 2.275813579559326,
-      "learning_rate": 1.7872340425531915e-05,
-      "loss": 0.5268,
       "step": 110
     },
     {
-      "epoch": 0.23210831721470018,
-      "grad_norm": 4.621743679046631,
-      "learning_rate": 1.7678916827853e-05,
-      "loss": 0.5247,
       "step": 120
     },
     {
-      "epoch": 0.2514506769825919,
-      "grad_norm": 5.684010028839111,
-      "learning_rate": 1.7485493230174083e-05,
-      "loss": 0.4011,
       "step": 130
     },
     {
-      "epoch": 0.27079303675048355,
-      "grad_norm": 1.714890956878662,
-      "learning_rate": 1.7292069632495167e-05,
-      "loss": 0.354,
       "step": 140
     },
     {
-      "epoch": 0.2901353965183752,
-      "grad_norm": 1.8230618238449097,
-      "learning_rate": 1.7098646034816248e-05,
-      "loss": 0.3116,
       "step": 150
     },
     {
-      "epoch": 0.30947775628626695,
-      "grad_norm": 1.1684730052947998,
-      "learning_rate": 1.690522243713733e-05,
-      "loss": 0.3653,
       "step": 160
     },
     {
-      "epoch": 0.3288201160541586,
-      "grad_norm": 3.569240093231201,
-      "learning_rate": 1.6711798839458415e-05,
-      "loss": 0.3585,
       "step": 170
     },
     {
-      "epoch": 0.3481624758220503,
-      "grad_norm": 1.3044307231903076,
-      "learning_rate": 1.65183752417795e-05,
-      "loss": 0.3891,
       "step": 180
     },
     {
-      "epoch": 0.36750483558994196,
-      "grad_norm": 0.8555651903152466,
-      "learning_rate": 1.6324951644100583e-05,
-      "loss": 0.368,
       "step": 190
     },
     {
-      "epoch": 0.38684719535783363,
-      "grad_norm": 24.579862594604492,
-      "learning_rate": 1.6131528046421664e-05,
-      "loss": 0.3181,
       "step": 200
     },
     {
-      "epoch": 0.40618955512572535,
-      "grad_norm": 2.9059534072875977,
-      "learning_rate": 1.5938104448742748e-05,
-      "loss": 0.3285,
       "step": 210
     },
     {
-      "epoch": 0.425531914893617,
-      "grad_norm": 8.594366073608398,
-      "learning_rate": 1.5744680851063832e-05,
-      "loss": 0.4461,
       "step": 220
     },
     {
-      "epoch": 0.4448742746615087,
-      "grad_norm": 1.1595553159713745,
-      "learning_rate": 1.5551257253384916e-05,
-      "loss": 0.1521,
       "step": 230
     },
     {
-      "epoch": 0.46421663442940037,
-      "grad_norm": 14.084031105041504,
-      "learning_rate": 1.5357833655706e-05,
-      "loss": 0.1935,
       "step": 240
     },
     {
-      "epoch": 0.4835589941972921,
-      "grad_norm": 14.068747520446777,
-      "learning_rate": 1.5164410058027082e-05,
-      "loss": 0.5199,
       "step": 250
     },
     {
-      "epoch": 0.5029013539651838,
-      "grad_norm": 20.944473266601562,
-      "learning_rate": 1.4970986460348164e-05,
-      "loss": 0.1387,
       "step": 260
     },
     {
-      "epoch": 0.5222437137330754,
-      "grad_norm": 14.65572452545166,
-      "learning_rate": 1.4777562862669247e-05,
-      "loss": 0.6002,
       "step": 270
     },
     {
-      "epoch": 0.5415860735009671,
-      "grad_norm": 0.8473848104476929,
-      "learning_rate": 1.4584139264990329e-05,
-      "loss": 0.1309,
       "step": 280
     },
     {
-      "epoch": 0.5609284332688588,
-      "grad_norm": 2.6289236545562744,
-      "learning_rate": 1.4390715667311413e-05,
-      "loss": 0.0999,
       "step": 290
     },
     {
-      "epoch": 0.5802707930367504,
-      "grad_norm": 0.45386189222335815,
-      "learning_rate": 1.4197292069632495e-05,
-      "loss": 0.2121,
       "step": 300
     },
     {
-      "epoch": 0.5996131528046421,
-      "grad_norm": 0.400414377450943,
-      "learning_rate": 1.4003868471953579e-05,
-      "loss": 0.1389,
       "step": 310
     },
     {
-      "epoch": 0.6189555125725339,
-      "grad_norm": 0.4177948534488678,
-      "learning_rate": 1.3810444874274663e-05,
-      "loss": 0.0817,
       "step": 320
     },
     {
-      "epoch": 0.6382978723404256,
-      "grad_norm": 3.3589959144592285,
-      "learning_rate": 1.3617021276595745e-05,
-      "loss": 0.1227,
       "step": 330
     },
     {
-      "epoch": 0.6576402321083172,
-      "grad_norm": 0.5038989782333374,
-      "learning_rate": 1.342359767891683e-05,
-      "loss": 0.3583,
       "step": 340
     },
     {
-      "epoch": 0.6769825918762089,
-      "grad_norm": 15.520181655883789,
-      "learning_rate": 1.3230174081237912e-05,
-      "loss": 0.1876,
       "step": 350
     },
     {
-      "epoch": 0.6963249516441006,
-      "grad_norm": 0.2799462676048279,
-      "learning_rate": 1.3036750483558995e-05,
-      "loss": 0.0677,
       "step": 360
     },
     {
-      "epoch": 0.7156673114119922,
-      "grad_norm": 4.5798869132995605,
-      "learning_rate": 1.2843326885880078e-05,
-      "loss": 0.0657,
       "step": 370
     },
     {
-      "epoch": 0.7350096711798839,
-      "grad_norm": 1.6752430200576782,
-      "learning_rate": 1.2649903288201162e-05,
-      "loss": 0.1983,
       "step": 380
     },
     {
-      "epoch": 0.7543520309477756,
-      "grad_norm": 0.23511852324008942,
-      "learning_rate": 1.2456479690522246e-05,
-      "loss": 0.1278,
       "step": 390
     },
     {
-      "epoch": 0.7736943907156673,
-      "grad_norm": 0.3272879719734192,
-      "learning_rate": 1.2263056092843328e-05,
-      "loss": 0.138,
       "step": 400
     },
     {
-      "epoch": 0.793036750483559,
-      "grad_norm": 0.34640178084373474,
-      "learning_rate": 1.2069632495164412e-05,
-      "loss": 0.3669,
       "step": 410
     },
     {
-      "epoch": 0.8123791102514507,
-      "grad_norm": 0.3155074715614319,
-      "learning_rate": 1.1876208897485494e-05,
-      "loss": 0.1772,
       "step": 420
     },
     {
-      "epoch": 0.8317214700193424,
-      "grad_norm": 0.30559900403022766,
-      "learning_rate": 1.1682785299806578e-05,
-      "loss": 0.3992,
       "step": 430
     },
     {
-      "epoch": 0.851063829787234,
-      "grad_norm": 0.4976309835910797,
-      "learning_rate": 1.1489361702127662e-05,
-      "loss": 0.0475,
       "step": 440
     },
     {
-      "epoch": 0.8704061895551257,
-      "grad_norm": 0.40445780754089355,
-      "learning_rate": 1.1295938104448743e-05,
-      "loss": 0.0504,
       "step": 450
     },
     {
-      "epoch": 0.8897485493230174,
-      "grad_norm": 6.139995098114014,
-      "learning_rate": 1.1102514506769827e-05,
-      "loss": 0.0773,
       "step": 460
     },
     {
-      "epoch": 0.9090909090909091,
-      "grad_norm": 0.5419360995292664,
-      "learning_rate": 1.0909090909090909e-05,
-      "loss": 0.046,
       "step": 470
     },
     {
-      "epoch": 0.9284332688588007,
-      "grad_norm": 2.6806490421295166,
-      "learning_rate": 1.0715667311411993e-05,
-      "loss": 0.2814,
       "step": 480
     },
     {
-      "epoch": 0.9477756286266924,
-      "grad_norm": 6.263265132904053,
-      "learning_rate": 1.0522243713733075e-05,
-      "loss": 0.2632,
       "step": 490
     },
     {
-      "epoch": 0.9671179883945842,
-      "grad_norm": 0.29741278290748596,
-      "learning_rate": 1.0328820116054159e-05,
-      "loss": 0.355,
       "step": 500
     },
     {
-      "epoch": 0.9864603481624759,
-      "grad_norm": 2.9817466735839844,
-      "learning_rate": 1.0135396518375243e-05,
-      "loss": 0.056,
       "step": 510
     },
     {
-      "epoch": 1.0,
-      "eval_accuracy": 0.9699248120300752,
-      "eval_loss": 0.10282659530639648,
-      "eval_runtime": 9.3712,
-      "eval_samples_per_second": 14.192,
-      "eval_steps_per_second": 7.15,
-      "step": 517
     },
     {
-      "epoch": 1.0058027079303675,
-      "grad_norm": 0.93055659532547,
-      "learning_rate": 9.941972920696325e-06,
-      "loss": 0.0659,
       "step": 520
     },
     {
-      "epoch": 1.0251450676982592,
-      "grad_norm": 0.2567221522331238,
-      "learning_rate": 9.74854932301741e-06,
-      "loss": 0.1785,
       "step": 530
     },
     {
-      "epoch": 1.0444874274661509,
-      "grad_norm": 0.9089804291725159,
-      "learning_rate": 9.555125725338492e-06,
-      "loss": 0.0413,
       "step": 540
     },
     {
-      "epoch": 1.0638297872340425,
-      "grad_norm": 0.8061901926994324,
-      "learning_rate": 9.361702127659576e-06,
-      "loss": 0.0424,
       "step": 550
     },
     {
-      "epoch": 1.0831721470019342,
-      "grad_norm": 0.21418491005897522,
-      "learning_rate": 9.16827852998066e-06,
-      "loss": 0.0448,
       "step": 560
     },
     {
-      "epoch": 1.1025145067698259,
-      "grad_norm": 0.20464298129081726,
-      "learning_rate": 8.974854932301742e-06,
-      "loss": 0.0388,
       "step": 570
     },
     {
-      "epoch": 1.1218568665377175,
-      "grad_norm": 0.19184868037700653,
-      "learning_rate": 8.781431334622824e-06,
-      "loss": 0.4538,
       "step": 580
     },
     {
-      "epoch": 1.1411992263056092,
-      "grad_norm": 0.19347575306892395,
-      "learning_rate": 8.588007736943908e-06,
-      "loss": 0.0361,
       "step": 590
     },
     {
-      "epoch": 1.1605415860735009,
-      "grad_norm": 5.5723772048950195,
-      "learning_rate": 8.39458413926499e-06,
-      "loss": 0.126,
       "step": 600
     },
     {
-      "epoch": 1.1798839458413926,
-      "grad_norm": 8.067544937133789,
-      "learning_rate": 8.201160541586074e-06,
-      "loss": 0.1137,
       "step": 610
     },
     {
-      "epoch": 1.1992263056092844,
-      "grad_norm": 0.20713689923286438,
-      "learning_rate": 8.007736943907156e-06,
-      "loss": 0.0514,
       "step": 620
     },
     {
-      "epoch": 1.218568665377176,
-      "grad_norm": 0.21297387778759003,
-      "learning_rate": 7.81431334622824e-06,
-      "loss": 0.0538,
       "step": 630
     },
     {
-      "epoch": 1.2379110251450678,
-      "grad_norm": 6.387426376342773,
-      "learning_rate": 7.6208897485493236e-06,
-      "loss": 0.0757,
       "step": 640
     },
     {
-      "epoch": 1.2572533849129595,
-      "grad_norm": 0.20734897255897522,
-      "learning_rate": 7.4274661508704075e-06,
-      "loss": 0.3696,
       "step": 650
     },
     {
-      "epoch": 1.2765957446808511,
-      "grad_norm": 0.3389629125595093,
-      "learning_rate": 7.234042553191491e-06,
-      "loss": 0.0789,
-      "step": 660
-    },
-    {
-      "epoch": 1.2959381044487428,
-      "grad_norm": 0.26222383975982666,
-      "learning_rate": 7.040618955512573e-06,
-      "loss": 0.3863,
-      "step": 670
-    },
-    {
-      "epoch": 1.3152804642166345,
-      "grad_norm": 0.40525734424591064,
-      "learning_rate": 6.847195357833656e-06,
-      "loss": 0.0357,
-      "step": 680
-    },
-    {
-      "epoch": 1.3346228239845261,
-      "grad_norm": 0.341835081577301,
-      "learning_rate": 6.653771760154739e-06,
-      "loss": 0.1815,
-      "step": 690
-    },
-    {
-      "epoch": 1.3539651837524178,
-      "grad_norm": 0.17171211540699005,
-      "learning_rate": 6.460348162475822e-06,
-      "loss": 0.0815,
-      "step": 700
-    },
-    {
-      "epoch": 1.3733075435203095,
-      "grad_norm": 0.23651224374771118,
-      "learning_rate": 6.266924564796905e-06,
-      "loss": 0.1392,
-      "step": 710
-    },
-    {
-      "epoch": 1.3926499032882012,
-      "grad_norm": 0.19646570086479187,
-      "learning_rate": 6.073500967117989e-06,
-      "loss": 0.0331,
-      "step": 720
-    },
-    {
-      "epoch": 1.4119922630560928,
-      "grad_norm": 7.1733574867248535,
-      "learning_rate": 5.8800773694390724e-06,
-      "loss": 0.0638,
-      "step": 730
-    },
-    {
-      "epoch": 1.4313346228239845,
-      "grad_norm": 0.18651318550109863,
-      "learning_rate": 5.6866537717601556e-06,
-      "loss": 0.0352,
-      "step": 740
-    },
-    {
-      "epoch": 1.4506769825918762,
-      "grad_norm": 0.1645909547805786,
-      "learning_rate": 5.493230174081239e-06,
-      "loss": 0.2837,
-      "step": 750
-    },
-    {
-      "epoch": 1.4700193423597678,
-      "grad_norm": 0.16515417397022247,
-      "learning_rate": 5.299806576402321e-06,
-      "loss": 0.108,
-      "step": 760
-    },
-    {
-      "epoch": 1.4893617021276595,
-      "grad_norm": 0.17802861332893372,
-      "learning_rate": 5.106382978723404e-06,
-      "loss": 0.5353,
-      "step": 770
-    },
-    {
-      "epoch": 1.5087040618955512,
-      "grad_norm": 0.20135052502155304,
-      "learning_rate": 4.912959381044487e-06,
-      "loss": 0.1647,
-      "step": 780
-    },
-    {
-      "epoch": 1.528046421663443,
-      "grad_norm": 0.16655394434928894,
-      "learning_rate": 4.719535783365571e-06,
-      "loss": 0.2726,
-      "step": 790
-    },
-    {
-      "epoch": 1.5473887814313345,
-      "grad_norm": 0.25633805990219116,
-      "learning_rate": 4.526112185686654e-06,
-      "loss": 0.029,
-      "step": 800
-    },
-    {
-      "epoch": 1.5667311411992264,
-      "grad_norm": 27.006752014160156,
-      "learning_rate": 4.332688588007737e-06,
-      "loss": 0.0478,
-      "step": 810
-    },
-    {
-      "epoch": 1.5860735009671179,
-      "grad_norm": 60.104461669921875,
-      "learning_rate": 4.1392649903288205e-06,
-      "loss": 0.1252,
-      "step": 820
-    },
-    {
-      "epoch": 1.6054158607350097,
-      "grad_norm": 0.22381837666034698,
-      "learning_rate": 3.945841392649904e-06,
-      "loss": 0.2842,
-      "step": 830
-    },
-    {
-      "epoch": 1.6247582205029012,
-      "grad_norm": 0.20553378760814667,
-      "learning_rate": 3.7524177949709867e-06,
-      "loss": 0.06,
-      "step": 840
-    },
-    {
-      "epoch": 1.644100580270793,
-      "grad_norm": 0.18213896453380585,
-      "learning_rate": 3.55899419729207e-06,
-      "loss": 0.031,
-      "step": 850
-    },
-    {
-      "epoch": 1.6634429400386848,
-      "grad_norm": 0.18769198656082153,
-      "learning_rate": 3.3655705996131534e-06,
-      "loss": 0.4103,
-      "step": 860
-    },
-    {
-      "epoch": 1.6827852998065764,
-      "grad_norm": 26.75303077697754,
-      "learning_rate": 3.172147001934236e-06,
-      "loss": 0.4095,
-      "step": 870
-    },
-    {
-      "epoch": 1.702127659574468,
-      "grad_norm": 1.4275975227355957,
-      "learning_rate": 2.978723404255319e-06,
-      "loss": 0.0442,
-      "step": 880
-    },
-    {
-      "epoch": 1.7214700193423598,
-      "grad_norm": 0.7911761999130249,
-      "learning_rate": 2.7852998065764027e-06,
-      "loss": 0.0295,
-      "step": 890
-    },
-    {
-      "epoch": 1.7408123791102514,
-      "grad_norm": 0.1537817418575287,
-      "learning_rate": 2.591876208897486e-06,
-      "loss": 0.2648,
-      "step": 900
-    },
-    {
-      "epoch": 1.760154738878143,
-      "grad_norm": 0.19490283727645874,
-      "learning_rate": 2.398452611218569e-06,
-      "loss": 0.0336,
-      "step": 910
-    },
-    {
-      "epoch": 1.7794970986460348,
-      "grad_norm": 67.34917449951172,
-      "learning_rate": 2.205029013539652e-06,
-      "loss": 0.1109,
-      "step": 920
-    },
-    {
-      "epoch": 1.7988394584139265,
-      "grad_norm": 0.6602293252944946,
-      "learning_rate": 2.011605415860735e-06,
-      "loss": 0.0386,
-      "step": 930
-    },
-    {
-      "epoch": 1.8181818181818183,
-      "grad_norm": 0.20363681018352509,
-      "learning_rate": 1.8181818181818183e-06,
-      "loss": 0.1173,
-      "step": 940
-    },
-    {
-      "epoch": 1.8375241779497098,
-      "grad_norm": 0.1697886437177658,
-      "learning_rate": 1.6247582205029014e-06,
-      "loss": 0.1885,
-      "step": 950
-    },
-    {
-      "epoch": 1.8568665377176017,
-      "grad_norm": 0.1814304143190384,
-      "learning_rate": 1.4313346228239847e-06,
-      "loss": 0.2553,
-      "step": 960
-    },
-    {
-      "epoch": 1.8762088974854931,
-      "grad_norm": 0.3075575530529022,
-      "learning_rate": 1.2379110251450678e-06,
-      "loss": 0.2235,
-      "step": 970
-    },
-    {
-      "epoch": 1.895551257253385,
-      "grad_norm": 0.17250248789787292,
-      "learning_rate": 1.044487427466151e-06,
-      "loss": 0.0648,
-      "step": 980
-    },
-    {
-      "epoch": 1.9148936170212765,
-      "grad_norm": 0.18552158772945404,
-      "learning_rate": 8.510638297872341e-07,
-      "loss": 0.0266,
-      "step": 990
-    },
-    {
-      "epoch": 1.9342359767891684,
-      "grad_norm": 21.213985443115234,
-      "learning_rate": 6.576402321083172e-07,
-      "loss": 0.2269,
-      "step": 1000
-    },
-    {
-      "epoch": 1.9535783365570598,
-      "grad_norm": 0.20198415219783783,
-      "learning_rate": 4.6421663442940047e-07,
-      "loss": 0.0441,
-      "step": 1010
-    },
-    {
-      "epoch": 1.9729206963249517,
-      "grad_norm": 8.638155937194824,
-      "learning_rate": 2.707930367504836e-07,
-      "loss": 0.0328,
-      "step": 1020
-    },
-    {
-      "epoch": 1.9922630560928434,
-      "grad_norm": 0.18748199939727783,
-      "learning_rate": 7.736943907156674e-08,
-      "loss": 0.0627,
-      "step": 1030
-    },
-    {
-      "epoch": 2.0,
-      "eval_accuracy": 0.9699248120300752,
-      "eval_loss": 0.10996536910533905,
-      "eval_runtime": 9.2747,
-      "eval_samples_per_second": 14.34,
-      "eval_steps_per_second": 7.224,
-      "step": 1034
     },
     {
-      "epoch": 2.0,
-      "step": 1034,
-      "total_flos": 1.602548708238213e+17,
-      "train_loss": 0.2494486983959172,
-      "train_runtime": 527.8899,
-      "train_samples_per_second": 3.917,
-      "train_steps_per_second": 1.959
     }
   ],
   "logging_steps": 10,
-  "max_steps": 1034,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 2,
   "save_steps": 500,
-  "total_flos": 1.602548708238213e+17,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 0.06271149218082428,
+  "best_model_checkpoint": "./beans_outputs/checkpoint-520",
+  "epoch": 5.0,
   "eval_steps": 500,
+  "global_step": 650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.07692307692307693,
+      "grad_norm": 2.1943726539611816,
+      "learning_rate": 1.9692307692307696e-05,
+      "loss": 1.0249,
       "step": 10
     },
     {
+      "epoch": 0.15384615384615385,
+      "grad_norm": 1.8665084838867188,
+      "learning_rate": 1.9384615384615386e-05,
+      "loss": 0.9463,
       "step": 20
     },
     {
+      "epoch": 0.23076923076923078,
+      "grad_norm": 2.0366761684417725,
+      "learning_rate": 1.907692307692308e-05,
+      "loss": 0.8401,
       "step": 30
     },
     {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 2.6267013549804688,
+      "learning_rate": 1.876923076923077e-05,
+      "loss": 0.6974,
       "step": 40
     },
     {
+      "epoch": 0.38461538461538464,
+      "grad_norm": 4.034597396850586,
+      "learning_rate": 1.8461538461538465e-05,
+      "loss": 0.664,
       "step": 50
     },
     {
+      "epoch": 0.46153846153846156,
+      "grad_norm": 2.0909483432769775,
+      "learning_rate": 1.8153846153846155e-05,
+      "loss": 0.5928,
       "step": 60
     },
     {
+      "epoch": 0.5384615384615384,
+      "grad_norm": 1.782705307006836,
+      "learning_rate": 1.784615384615385e-05,
+      "loss": 0.5262,
       "step": 70
     },
     {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 1.3501217365264893,
+      "learning_rate": 1.753846153846154e-05,
+      "loss": 0.4015,
       "step": 80
     },
     {
+      "epoch": 0.6923076923076923,
+      "grad_norm": 2.443540573120117,
+      "learning_rate": 1.7230769230769234e-05,
+      "loss": 0.3581,
       "step": 90
     },
     {
+      "epoch": 0.7692307692307693,
+      "grad_norm": 3.095242738723755,
+      "learning_rate": 1.6923076923076924e-05,
+      "loss": 0.3728,
       "step": 100
     },
     {
+      "epoch": 0.8461538461538461,
+      "grad_norm": 2.682217836380005,
+      "learning_rate": 1.6615384615384618e-05,
+      "loss": 0.3364,
       "step": 110
     },
     {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 2.5428781509399414,
+      "learning_rate": 1.630769230769231e-05,
+      "loss": 0.2471,
       "step": 120
     },
     {
+      "epoch": 1.0,
+      "grad_norm": 6.824251651763916,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 0.281,
       "step": 130
     },
     {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9624060150375939,
+      "eval_loss": 0.2193511426448822,
+      "eval_runtime": 2.0821,
+      "eval_samples_per_second": 63.878,
+      "eval_steps_per_second": 8.165,
+      "step": 130
+    },
+    {
+      "epoch": 1.0769230769230769,
+      "grad_norm": 1.4056776762008667,
+      "learning_rate": 1.5692307692307693e-05,
+      "loss": 0.2017,
       "step": 140
     },
     {
+      "epoch": 1.1538461538461537,
+      "grad_norm": 2.9976863861083984,
+      "learning_rate": 1.5384615384615387e-05,
+      "loss": 0.225,
       "step": 150
     },
     {
+      "epoch": 1.2307692307692308,
+      "grad_norm": 1.4828065633773804,
+      "learning_rate": 1.5076923076923078e-05,
+      "loss": 0.2011,
       "step": 160
     },
     {
+      "epoch": 1.3076923076923077,
+      "grad_norm": 0.5160149335861206,
+      "learning_rate": 1.4769230769230772e-05,
+      "loss": 0.216,
       "step": 170
     },
     {
+      "epoch": 1.3846153846153846,
+      "grad_norm": 0.606515645980835,
+      "learning_rate": 1.4461538461538462e-05,
+      "loss": 0.2028,
       "step": 180
     },
     {
+      "epoch": 1.4615384615384617,
+      "grad_norm": 4.552361011505127,
+      "learning_rate": 1.4153846153846156e-05,
+      "loss": 0.1627,
       "step": 190
     },
     {
+      "epoch": 1.5384615384615383,
+      "grad_norm": 0.44607630372047424,
+      "learning_rate": 1.3846153846153847e-05,
+      "loss": 0.2465,
       "step": 200
     },
     {
+      "epoch": 1.6153846153846154,
+      "grad_norm": 0.686824381351471,
+      "learning_rate": 1.353846153846154e-05,
+      "loss": 0.1797,
       "step": 210
     },
     {
+      "epoch": 1.6923076923076923,
+      "grad_norm": 2.306919574737549,
+      "learning_rate": 1.3230769230769231e-05,
+      "loss": 0.2161,
       "step": 220
     },
     {
+      "epoch": 1.7692307692307692,
+      "grad_norm": 2.050942897796631,
+      "learning_rate": 1.2923076923076925e-05,
+      "loss": 0.1433,
       "step": 230
     },
     {
+      "epoch": 1.8461538461538463,
+      "grad_norm": 7.248354434967041,
+      "learning_rate": 1.2615384615384616e-05,
+      "loss": 0.2762,
       "step": 240
     },
     {
+      "epoch": 1.9230769230769231,
+      "grad_norm": 0.40929391980171204,
+      "learning_rate": 1.230769230769231e-05,
+      "loss": 0.1286,
       "step": 250
     },
     {
+      "epoch": 2.0,
+      "grad_norm": 0.3473702669143677,
+      "learning_rate": 1.2e-05,
+      "loss": 0.1268,
       "step": 260
     },
     {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9699248120300752,
+      "eval_loss": 0.12887412309646606,
+      "eval_runtime": 2.0415,
+      "eval_samples_per_second": 65.15,
+      "eval_steps_per_second": 8.327,
+      "step": 260
+    },
+    {
+      "epoch": 2.076923076923077,
+      "grad_norm": 0.6053388118743896,
+      "learning_rate": 1.1692307692307694e-05,
+      "loss": 0.0969,
       "step": 270
     },
     {
+      "epoch": 2.1538461538461537,
+      "grad_norm": 0.3397085666656494,
+      "learning_rate": 1.1384615384615385e-05,
+      "loss": 0.1396,
       "step": 280
     },
     {
+      "epoch": 2.230769230769231,
+      "grad_norm": 7.664570331573486,
+      "learning_rate": 1.1076923076923079e-05,
+      "loss": 0.1308,
       "step": 290
     },
     {
+      "epoch": 2.3076923076923075,
+      "grad_norm": 9.331936836242676,
+      "learning_rate": 1.076923076923077e-05,
+      "loss": 0.1427,
       "step": 300
     },
     {
+      "epoch": 2.3846153846153846,
+      "grad_norm": 6.082279682159424,
+      "learning_rate": 1.0461538461538463e-05,
+      "loss": 0.1687,
       "step": 310
     },
     {
+      "epoch": 2.4615384615384617,
+      "grad_norm": 0.3458414375782013,
+      "learning_rate": 1.0153846153846154e-05,
+      "loss": 0.1621,
       "step": 320
     },
     {
+      "epoch": 2.5384615384615383,
+      "grad_norm": 5.815878391265869,
+      "learning_rate": 9.846153846153848e-06,
+      "loss": 0.1863,
       "step": 330
     },
     {
+      "epoch": 2.6153846153846154,
+      "grad_norm": 1.0016790628433228,
+      "learning_rate": 9.53846153846154e-06,
+      "loss": 0.0756,
       "step": 340
     },
     {
+      "epoch": 2.6923076923076925,
+      "grad_norm": 0.35424068570137024,
+      "learning_rate": 9.230769230769232e-06,
+      "loss": 0.1243,
       "step": 350
     },
     {
+      "epoch": 2.769230769230769,
+      "grad_norm": 13.017029762268066,
+      "learning_rate": 8.923076923076925e-06,
+      "loss": 0.1215,
       "step": 360
     },
     {
+      "epoch": 2.8461538461538463,
+      "grad_norm": 0.30303019285202026,
+      "learning_rate": 8.615384615384617e-06,
+      "loss": 0.1552,
       "step": 370
     },
     {
+      "epoch": 2.9230769230769234,
+      "grad_norm": 1.2382951974868774,
+      "learning_rate": 8.307692307692309e-06,
+      "loss": 0.1296,
       "step": 380
     },
     {
+      "epoch": 3.0,
+      "grad_norm": 0.29275208711624146,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.1385,
+      "step": 390
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9699248120300752,
+      "eval_loss": 0.09677492827177048,
+      "eval_runtime": 2.1789,
+      "eval_samples_per_second": 61.041,
+      "eval_steps_per_second": 7.802,
       "step": 390
     },
     {
+      "epoch": 3.076923076923077,
+      "grad_norm": 2.064225673675537,
+      "learning_rate": 7.692307692307694e-06,
+      "loss": 0.1348,
       "step": 400
     },
     {
+      "epoch": 3.1538461538461537,
+      "grad_norm": 6.044634819030762,
+      "learning_rate": 7.384615384615386e-06,
+      "loss": 0.1496,
       "step": 410
     },
     {
+      "epoch": 3.230769230769231,
+      "grad_norm": 9.788597106933594,
+      "learning_rate": 7.076923076923078e-06,
+      "loss": 0.142,
       "step": 420
     },
     {
+      "epoch": 3.3076923076923075,
+      "grad_norm": 0.23322972655296326,
+      "learning_rate": 6.76923076923077e-06,
+      "loss": 0.0788,
       "step": 430
     },
     {
+      "epoch": 3.3846153846153846,
+      "grad_norm": 2.807680368423462,
+      "learning_rate": 6.461538461538463e-06,
+      "loss": 0.1074,
       "step": 440
     },
     {
+      "epoch": 3.4615384615384617,
+      "grad_norm": 4.750285625457764,
+      "learning_rate": 6.153846153846155e-06,
+      "loss": 0.0719,
       "step": 450
     },
     {
+      "epoch": 3.5384615384615383,
+      "grad_norm": 5.354732990264893,
+      "learning_rate": 5.846153846153847e-06,
+      "loss": 0.0966,
       "step": 460
     },
     {
+      "epoch": 3.6153846153846154,
+      "grad_norm": 8.170781135559082,
+      "learning_rate": 5.538461538461539e-06,
+      "loss": 0.0753,
       "step": 470
     },
     {
+      "epoch": 3.6923076923076925,
+      "grad_norm": 0.22035281360149384,
+      "learning_rate": 5.230769230769232e-06,
+      "loss": 0.0902,
       "step": 480
     },
     {
+      "epoch": 3.769230769230769,
+      "grad_norm": 0.2130032181739807,
+      "learning_rate": 4.923076923076924e-06,
+      "loss": 0.077,
       "step": 490
     },
     {
+      "epoch": 3.8461538461538463,
+      "grad_norm": 0.9436342716217041,
+      "learning_rate": 4.615384615384616e-06,
+      "loss": 0.1247,
       "step": 500
     },
     {
+      "epoch": 3.9230769230769234,
+      "grad_norm": 0.2058902233839035,
+      "learning_rate": 4.307692307692308e-06,
+      "loss": 0.1309,
       "step": 510
     },
     {
+      "epoch": 4.0,
+      "grad_norm": 0.30152463912963867,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.0749,
+      "step": 520
     },
     {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9924812030075187,
+      "eval_loss": 0.06271149218082428,
+      "eval_runtime": 1.9746,
+      "eval_samples_per_second": 67.355,
+      "eval_steps_per_second": 8.609,
       "step": 520
     },
     {
+      "epoch": 4.076923076923077,
+      "grad_norm": 2.3252828121185303,
+      "learning_rate": 3.692307692307693e-06,
+      "loss": 0.0948,
       "step": 530
     },
     {
+      "epoch": 4.153846153846154,
+      "grad_norm": 2.0056347846984863,
+      "learning_rate": 3.384615384615385e-06,
+      "loss": 0.1599,
       "step": 540
     },
     {
+      "epoch": 4.230769230769231,
+      "grad_norm": 0.3621722161769867,
+      "learning_rate": 3.0769230769230774e-06,
+      "loss": 0.1402,
       "step": 550
     },
     {
+      "epoch": 4.3076923076923075,
+      "grad_norm": 1.570186734199524,
+      "learning_rate": 2.7692307692307697e-06,
+      "loss": 0.0493,
       "step": 560
     },
     {
+      "epoch": 4.384615384615385,
+      "grad_norm": 0.31239092350006104,
+      "learning_rate": 2.461538461538462e-06,
+      "loss": 0.1283,
       "step": 570
     },
     {
+      "epoch": 4.461538461538462,
+      "grad_norm": 0.39084771275520325,
+      "learning_rate": 2.153846153846154e-06,
+      "loss": 0.0804,
       "step": 580
     },
     {
+      "epoch": 4.538461538461538,
+      "grad_norm": 9.627459526062012,
+      "learning_rate": 1.8461538461538465e-06,
+      "loss": 0.064,
       "step": 590
     },
     {
+      "epoch": 4.615384615384615,
+      "grad_norm": 3.2321269512176514,
+      "learning_rate": 1.5384615384615387e-06,
+      "loss": 0.0828,
       "step": 600
     },
     {
+      "epoch": 4.6923076923076925,
+      "grad_norm": 5.409714221954346,
+      "learning_rate": 1.230769230769231e-06,
+      "loss": 0.0899,
       "step": 610
     },
     {
+      "epoch": 4.769230769230769,
+      "grad_norm": 0.2573850750923157,
+      "learning_rate": 9.230769230769232e-07,
+      "loss": 0.0794,
       "step": 620
     },
     {
+      "epoch": 4.846153846153846,
+      "grad_norm": 0.21551428735256195,
+      "learning_rate": 6.153846153846155e-07,
+      "loss": 0.0616,
       "step": 630
     },
     {
+      "epoch": 4.923076923076923,
+      "grad_norm": 0.23159781098365784,
+      "learning_rate": 3.0769230769230774e-07,
+      "loss": 0.0776,
       "step": 640
     },
     {
+      "epoch": 5.0,
+      "grad_norm": 0.7027397155761719,
+      "learning_rate": 0.0,
+      "loss": 0.1089,
       "step": 650
     },
     {
+      "epoch": 5.0,
+      "eval_accuracy": 0.9774436090225563,
+      "eval_loss": 0.07966959476470947,
+      "eval_runtime": 1.9647,
+      "eval_samples_per_second": 67.695,
+      "eval_steps_per_second": 8.653,
+      "step": 650
     },
     {
+      "epoch": 5.0,
+      "step": 650,
+      "total_flos": 4.006371770595533e+17,
+      "train_loss": 0.2178187003502479,
+      "train_runtime": 261.0621,
+      "train_samples_per_second": 19.804,
+      "train_steps_per_second": 2.49
     }
   ],
   "logging_steps": 10,
+  "max_steps": 650,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.006371770595533e+17,
+  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }