End of training

Browse files

Files changed (5) hide show

README.md +4 -2
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +769 -0

README.md CHANGED Viewed

@@ -2,6 +2,8 @@
 license: apache-2.0
 base_model: google/vit-base-patch16-224-in21k
 tags:
 - generated_from_trainer
 metrics:
 - accuracy
@@ -15,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
 # vit-base-beans
-This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1100
 - Accuracy: 0.9699
 ## Model description

 license: apache-2.0
 base_model: google/vit-base-patch16-224-in21k
 tags:
+- image-classification
+- vision
 - generated_from_trainer
 metrics:
 - accuracy
 # vit-base-beans
+This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the beans dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1028
 - Accuracy: 0.9699
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 2.0,
+    "eval_accuracy": 0.9699248120300752,
+    "eval_loss": 0.10282660275697708,
+    "eval_runtime": 9.7767,
+    "eval_samples_per_second": 13.604,
+    "eval_steps_per_second": 6.853,
+    "total_flos": 1.602548708238213e+17,
+    "train_loss": 0.2494486983959172,
+    "train_runtime": 527.8899,
+    "train_samples_per_second": 3.917,
+    "train_steps_per_second": 1.959
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.0,
+    "eval_accuracy": 0.9699248120300752,
+    "eval_loss": 0.10282660275697708,
+    "eval_runtime": 9.7767,
+    "eval_samples_per_second": 13.604,
+    "eval_steps_per_second": 6.853
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.0,
+    "total_flos": 1.602548708238213e+17,
+    "train_loss": 0.2494486983959172,
+    "train_runtime": 527.8899,
+    "train_samples_per_second": 3.917,
+    "train_steps_per_second": 1.959
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,769 @@

+{
+  "best_metric": 0.10282659530639648,
+  "best_model_checkpoint": "./beans_outputs/checkpoint-517",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1034,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.019342359767891684,
+      "grad_norm": 3.192776918411255,
+      "learning_rate": 1.9806576402321086e-05,
+      "loss": 1.0424,
+      "step": 10
+    },
+    {
+      "epoch": 0.03868471953578337,
+      "grad_norm": 4.558914661407471,
+      "learning_rate": 1.961315280464217e-05,
+      "loss": 0.9602,
+      "step": 20
+    },
+    {
+      "epoch": 0.058027079303675046,
+      "grad_norm": 4.579104423522949,
+      "learning_rate": 1.941972920696325e-05,
+      "loss": 0.8902,
+      "step": 30
+    },
+    {
+      "epoch": 0.07736943907156674,
+      "grad_norm": 7.477652072906494,
+      "learning_rate": 1.9226305609284334e-05,
+      "loss": 0.9063,
+      "step": 40
+    },
+    {
+      "epoch": 0.09671179883945841,
+      "grad_norm": 5.673726558685303,
+      "learning_rate": 1.9032882011605418e-05,
+      "loss": 0.8961,
+      "step": 50
+    },
+    {
+      "epoch": 0.11605415860735009,
+      "grad_norm": 4.80955696105957,
+      "learning_rate": 1.8839458413926502e-05,
+      "loss": 0.8157,
+      "step": 60
+    },
+    {
+      "epoch": 0.13539651837524178,
+      "grad_norm": 2.8267390727996826,
+      "learning_rate": 1.8646034816247586e-05,
+      "loss": 0.7936,
+      "step": 70
+    },
+    {
+      "epoch": 0.15473887814313347,
+      "grad_norm": 3.271488904953003,
+      "learning_rate": 1.8452611218568667e-05,
+      "loss": 0.6614,
+      "step": 80
+    },
+    {
+      "epoch": 0.17408123791102514,
+      "grad_norm": 3.2059671878814697,
+      "learning_rate": 1.825918762088975e-05,
+      "loss": 0.616,
+      "step": 90
+    },
+    {
+      "epoch": 0.19342359767891681,
+      "grad_norm": 4.123504161834717,
+      "learning_rate": 1.806576402321083e-05,
+      "loss": 0.6023,
+      "step": 100
+    },
+    {
+      "epoch": 0.2127659574468085,
+      "grad_norm": 2.275813579559326,
+      "learning_rate": 1.7872340425531915e-05,
+      "loss": 0.5268,
+      "step": 110
+    },
+    {
+      "epoch": 0.23210831721470018,
+      "grad_norm": 4.621743679046631,
+      "learning_rate": 1.7678916827853e-05,
+      "loss": 0.5247,
+      "step": 120
+    },
+    {
+      "epoch": 0.2514506769825919,
+      "grad_norm": 5.684010028839111,
+      "learning_rate": 1.7485493230174083e-05,
+      "loss": 0.4011,
+      "step": 130
+    },
+    {
+      "epoch": 0.27079303675048355,
+      "grad_norm": 1.714890956878662,
+      "learning_rate": 1.7292069632495167e-05,
+      "loss": 0.354,
+      "step": 140
+    },
+    {
+      "epoch": 0.2901353965183752,
+      "grad_norm": 1.8230618238449097,
+      "learning_rate": 1.7098646034816248e-05,
+      "loss": 0.3116,
+      "step": 150
+    },
+    {
+      "epoch": 0.30947775628626695,
+      "grad_norm": 1.1684730052947998,
+      "learning_rate": 1.690522243713733e-05,
+      "loss": 0.3653,
+      "step": 160
+    },
+    {
+      "epoch": 0.3288201160541586,
+      "grad_norm": 3.569240093231201,
+      "learning_rate": 1.6711798839458415e-05,
+      "loss": 0.3585,
+      "step": 170
+    },
+    {
+      "epoch": 0.3481624758220503,
+      "grad_norm": 1.3044307231903076,
+      "learning_rate": 1.65183752417795e-05,
+      "loss": 0.3891,
+      "step": 180
+    },
+    {
+      "epoch": 0.36750483558994196,
+      "grad_norm": 0.8555651903152466,
+      "learning_rate": 1.6324951644100583e-05,
+      "loss": 0.368,
+      "step": 190
+    },
+    {
+      "epoch": 0.38684719535783363,
+      "grad_norm": 24.579862594604492,
+      "learning_rate": 1.6131528046421664e-05,
+      "loss": 0.3181,
+      "step": 200
+    },
+    {
+      "epoch": 0.40618955512572535,
+      "grad_norm": 2.9059534072875977,
+      "learning_rate": 1.5938104448742748e-05,
+      "loss": 0.3285,
+      "step": 210
+    },
+    {
+      "epoch": 0.425531914893617,
+      "grad_norm": 8.594366073608398,
+      "learning_rate": 1.5744680851063832e-05,
+      "loss": 0.4461,
+      "step": 220
+    },
+    {
+      "epoch": 0.4448742746615087,
+      "grad_norm": 1.1595553159713745,
+      "learning_rate": 1.5551257253384916e-05,
+      "loss": 0.1521,
+      "step": 230
+    },
+    {
+      "epoch": 0.46421663442940037,
+      "grad_norm": 14.084031105041504,
+      "learning_rate": 1.5357833655706e-05,
+      "loss": 0.1935,
+      "step": 240
+    },
+    {
+      "epoch": 0.4835589941972921,
+      "grad_norm": 14.068747520446777,
+      "learning_rate": 1.5164410058027082e-05,
+      "loss": 0.5199,
+      "step": 250
+    },
+    {
+      "epoch": 0.5029013539651838,
+      "grad_norm": 20.944473266601562,
+      "learning_rate": 1.4970986460348164e-05,
+      "loss": 0.1387,
+      "step": 260
+    },
+    {
+      "epoch": 0.5222437137330754,
+      "grad_norm": 14.65572452545166,
+      "learning_rate": 1.4777562862669247e-05,
+      "loss": 0.6002,
+      "step": 270
+    },
+    {
+      "epoch": 0.5415860735009671,
+      "grad_norm": 0.8473848104476929,
+      "learning_rate": 1.4584139264990329e-05,
+      "loss": 0.1309,
+      "step": 280
+    },
+    {
+      "epoch": 0.5609284332688588,
+      "grad_norm": 2.6289236545562744,
+      "learning_rate": 1.4390715667311413e-05,
+      "loss": 0.0999,
+      "step": 290
+    },
+    {
+      "epoch": 0.5802707930367504,
+      "grad_norm": 0.45386189222335815,
+      "learning_rate": 1.4197292069632495e-05,
+      "loss": 0.2121,
+      "step": 300
+    },
+    {
+      "epoch": 0.5996131528046421,
+      "grad_norm": 0.400414377450943,
+      "learning_rate": 1.4003868471953579e-05,
+      "loss": 0.1389,
+      "step": 310
+    },
+    {
+      "epoch": 0.6189555125725339,
+      "grad_norm": 0.4177948534488678,
+      "learning_rate": 1.3810444874274663e-05,
+      "loss": 0.0817,
+      "step": 320
+    },
+    {
+      "epoch": 0.6382978723404256,
+      "grad_norm": 3.3589959144592285,
+      "learning_rate": 1.3617021276595745e-05,
+      "loss": 0.1227,
+      "step": 330
+    },
+    {
+      "epoch": 0.6576402321083172,
+      "grad_norm": 0.5038989782333374,
+      "learning_rate": 1.342359767891683e-05,
+      "loss": 0.3583,
+      "step": 340
+    },
+    {
+      "epoch": 0.6769825918762089,
+      "grad_norm": 15.520181655883789,
+      "learning_rate": 1.3230174081237912e-05,
+      "loss": 0.1876,
+      "step": 350
+    },
+    {
+      "epoch": 0.6963249516441006,
+      "grad_norm": 0.2799462676048279,
+      "learning_rate": 1.3036750483558995e-05,
+      "loss": 0.0677,
+      "step": 360
+    },
+    {
+      "epoch": 0.7156673114119922,
+      "grad_norm": 4.5798869132995605,
+      "learning_rate": 1.2843326885880078e-05,
+      "loss": 0.0657,
+      "step": 370
+    },
+    {
+      "epoch": 0.7350096711798839,
+      "grad_norm": 1.6752430200576782,
+      "learning_rate": 1.2649903288201162e-05,
+      "loss": 0.1983,
+      "step": 380
+    },
+    {
+      "epoch": 0.7543520309477756,
+      "grad_norm": 0.23511852324008942,
+      "learning_rate": 1.2456479690522246e-05,
+      "loss": 0.1278,
+      "step": 390
+    },
+    {
+      "epoch": 0.7736943907156673,
+      "grad_norm": 0.3272879719734192,
+      "learning_rate": 1.2263056092843328e-05,
+      "loss": 0.138,
+      "step": 400
+    },
+    {
+      "epoch": 0.793036750483559,
+      "grad_norm": 0.34640178084373474,
+      "learning_rate": 1.2069632495164412e-05,
+      "loss": 0.3669,
+      "step": 410
+    },
+    {
+      "epoch": 0.8123791102514507,
+      "grad_norm": 0.3155074715614319,
+      "learning_rate": 1.1876208897485494e-05,
+      "loss": 0.1772,
+      "step": 420
+    },
+    {
+      "epoch": 0.8317214700193424,
+      "grad_norm": 0.30559900403022766,
+      "learning_rate": 1.1682785299806578e-05,
+      "loss": 0.3992,
+      "step": 430
+    },
+    {
+      "epoch": 0.851063829787234,
+      "grad_norm": 0.4976309835910797,
+      "learning_rate": 1.1489361702127662e-05,
+      "loss": 0.0475,
+      "step": 440
+    },
+    {
+      "epoch": 0.8704061895551257,
+      "grad_norm": 0.40445780754089355,
+      "learning_rate": 1.1295938104448743e-05,
+      "loss": 0.0504,
+      "step": 450
+    },
+    {
+      "epoch": 0.8897485493230174,
+      "grad_norm": 6.139995098114014,
+      "learning_rate": 1.1102514506769827e-05,
+      "loss": 0.0773,
+      "step": 460
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 0.5419360995292664,
+      "learning_rate": 1.0909090909090909e-05,
+      "loss": 0.046,
+      "step": 470
+    },
+    {
+      "epoch": 0.9284332688588007,
+      "grad_norm": 2.6806490421295166,
+      "learning_rate": 1.0715667311411993e-05,
+      "loss": 0.2814,
+      "step": 480
+    },
+    {
+      "epoch": 0.9477756286266924,
+      "grad_norm": 6.263265132904053,
+      "learning_rate": 1.0522243713733075e-05,
+      "loss": 0.2632,
+      "step": 490
+    },
+    {
+      "epoch": 0.9671179883945842,
+      "grad_norm": 0.29741278290748596,
+      "learning_rate": 1.0328820116054159e-05,
+      "loss": 0.355,
+      "step": 500
+    },
+    {
+      "epoch": 0.9864603481624759,
+      "grad_norm": 2.9817466735839844,
+      "learning_rate": 1.0135396518375243e-05,
+      "loss": 0.056,
+      "step": 510
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9699248120300752,
+      "eval_loss": 0.10282659530639648,
+      "eval_runtime": 9.3712,
+      "eval_samples_per_second": 14.192,
+      "eval_steps_per_second": 7.15,
+      "step": 517
+    },
+    {
+      "epoch": 1.0058027079303675,
+      "grad_norm": 0.93055659532547,
+      "learning_rate": 9.941972920696325e-06,
+      "loss": 0.0659,
+      "step": 520
+    },
+    {
+      "epoch": 1.0251450676982592,
+      "grad_norm": 0.2567221522331238,
+      "learning_rate": 9.74854932301741e-06,
+      "loss": 0.1785,
+      "step": 530
+    },
+    {
+      "epoch": 1.0444874274661509,
+      "grad_norm": 0.9089804291725159,
+      "learning_rate": 9.555125725338492e-06,
+      "loss": 0.0413,
+      "step": 540
+    },
+    {
+      "epoch": 1.0638297872340425,
+      "grad_norm": 0.8061901926994324,
+      "learning_rate": 9.361702127659576e-06,
+      "loss": 0.0424,
+      "step": 550
+    },
+    {
+      "epoch": 1.0831721470019342,
+      "grad_norm": 0.21418491005897522,
+      "learning_rate": 9.16827852998066e-06,
+      "loss": 0.0448,
+      "step": 560
+    },
+    {
+      "epoch": 1.1025145067698259,
+      "grad_norm": 0.20464298129081726,
+      "learning_rate": 8.974854932301742e-06,
+      "loss": 0.0388,
+      "step": 570
+    },
+    {
+      "epoch": 1.1218568665377175,
+      "grad_norm": 0.19184868037700653,
+      "learning_rate": 8.781431334622824e-06,
+      "loss": 0.4538,
+      "step": 580
+    },
+    {
+      "epoch": 1.1411992263056092,
+      "grad_norm": 0.19347575306892395,
+      "learning_rate": 8.588007736943908e-06,
+      "loss": 0.0361,
+      "step": 590
+    },
+    {
+      "epoch": 1.1605415860735009,
+      "grad_norm": 5.5723772048950195,
+      "learning_rate": 8.39458413926499e-06,
+      "loss": 0.126,
+      "step": 600
+    },
+    {
+      "epoch": 1.1798839458413926,
+      "grad_norm": 8.067544937133789,
+      "learning_rate": 8.201160541586074e-06,
+      "loss": 0.1137,
+      "step": 610
+    },
+    {
+      "epoch": 1.1992263056092844,
+      "grad_norm": 0.20713689923286438,
+      "learning_rate": 8.007736943907156e-06,
+      "loss": 0.0514,
+      "step": 620
+    },
+    {
+      "epoch": 1.218568665377176,
+      "grad_norm": 0.21297387778759003,
+      "learning_rate": 7.81431334622824e-06,
+      "loss": 0.0538,
+      "step": 630
+    },
+    {
+      "epoch": 1.2379110251450678,
+      "grad_norm": 6.387426376342773,
+      "learning_rate": 7.6208897485493236e-06,
+      "loss": 0.0757,
+      "step": 640
+    },
+    {
+      "epoch": 1.2572533849129595,
+      "grad_norm": 0.20734897255897522,
+      "learning_rate": 7.4274661508704075e-06,
+      "loss": 0.3696,
+      "step": 650
+    },
+    {
+      "epoch": 1.2765957446808511,
+      "grad_norm": 0.3389629125595093,
+      "learning_rate": 7.234042553191491e-06,
+      "loss": 0.0789,
+      "step": 660
+    },
+    {
+      "epoch": 1.2959381044487428,
+      "grad_norm": 0.26222383975982666,
+      "learning_rate": 7.040618955512573e-06,
+      "loss": 0.3863,
+      "step": 670
+    },
+    {
+      "epoch": 1.3152804642166345,
+      "grad_norm": 0.40525734424591064,
+      "learning_rate": 6.847195357833656e-06,
+      "loss": 0.0357,
+      "step": 680
+    },
+    {
+      "epoch": 1.3346228239845261,
+      "grad_norm": 0.341835081577301,
+      "learning_rate": 6.653771760154739e-06,
+      "loss": 0.1815,
+      "step": 690
+    },
+    {
+      "epoch": 1.3539651837524178,
+      "grad_norm": 0.17171211540699005,
+      "learning_rate": 6.460348162475822e-06,
+      "loss": 0.0815,
+      "step": 700
+    },
+    {
+      "epoch": 1.3733075435203095,
+      "grad_norm": 0.23651224374771118,
+      "learning_rate": 6.266924564796905e-06,
+      "loss": 0.1392,
+      "step": 710
+    },
+    {
+      "epoch": 1.3926499032882012,
+      "grad_norm": 0.19646570086479187,
+      "learning_rate": 6.073500967117989e-06,
+      "loss": 0.0331,
+      "step": 720
+    },
+    {
+      "epoch": 1.4119922630560928,
+      "grad_norm": 7.1733574867248535,
+      "learning_rate": 5.8800773694390724e-06,
+      "loss": 0.0638,
+      "step": 730
+    },
+    {
+      "epoch": 1.4313346228239845,
+      "grad_norm": 0.18651318550109863,
+      "learning_rate": 5.6866537717601556e-06,
+      "loss": 0.0352,
+      "step": 740
+    },
+    {
+      "epoch": 1.4506769825918762,
+      "grad_norm": 0.1645909547805786,
+      "learning_rate": 5.493230174081239e-06,
+      "loss": 0.2837,
+      "step": 750
+    },
+    {
+      "epoch": 1.4700193423597678,
+      "grad_norm": 0.16515417397022247,
+      "learning_rate": 5.299806576402321e-06,
+      "loss": 0.108,
+      "step": 760
+    },
+    {
+      "epoch": 1.4893617021276595,
+      "grad_norm": 0.17802861332893372,
+      "learning_rate": 5.106382978723404e-06,
+      "loss": 0.5353,
+      "step": 770
+    },
+    {
+      "epoch": 1.5087040618955512,
+      "grad_norm": 0.20135052502155304,
+      "learning_rate": 4.912959381044487e-06,
+      "loss": 0.1647,
+      "step": 780
+    },
+    {
+      "epoch": 1.528046421663443,
+      "grad_norm": 0.16655394434928894,
+      "learning_rate": 4.719535783365571e-06,
+      "loss": 0.2726,
+      "step": 790
+    },
+    {
+      "epoch": 1.5473887814313345,
+      "grad_norm": 0.25633805990219116,
+      "learning_rate": 4.526112185686654e-06,
+      "loss": 0.029,
+      "step": 800
+    },
+    {
+      "epoch": 1.5667311411992264,
+      "grad_norm": 27.006752014160156,
+      "learning_rate": 4.332688588007737e-06,
+      "loss": 0.0478,
+      "step": 810
+    },
+    {
+      "epoch": 1.5860735009671179,
+      "grad_norm": 60.104461669921875,
+      "learning_rate": 4.1392649903288205e-06,
+      "loss": 0.1252,
+      "step": 820
+    },
+    {
+      "epoch": 1.6054158607350097,
+      "grad_norm": 0.22381837666034698,
+      "learning_rate": 3.945841392649904e-06,
+      "loss": 0.2842,
+      "step": 830
+    },
+    {
+      "epoch": 1.6247582205029012,
+      "grad_norm": 0.20553378760814667,
+      "learning_rate": 3.7524177949709867e-06,
+      "loss": 0.06,
+      "step": 840
+    },
+    {
+      "epoch": 1.644100580270793,
+      "grad_norm": 0.18213896453380585,
+      "learning_rate": 3.55899419729207e-06,
+      "loss": 0.031,
+      "step": 850
+    },
+    {
+      "epoch": 1.6634429400386848,
+      "grad_norm": 0.18769198656082153,
+      "learning_rate": 3.3655705996131534e-06,
+      "loss": 0.4103,
+      "step": 860
+    },
+    {
+      "epoch": 1.6827852998065764,
+      "grad_norm": 26.75303077697754,
+      "learning_rate": 3.172147001934236e-06,
+      "loss": 0.4095,
+      "step": 870
+    },
+    {
+      "epoch": 1.702127659574468,
+      "grad_norm": 1.4275975227355957,
+      "learning_rate": 2.978723404255319e-06,
+      "loss": 0.0442,
+      "step": 880
+    },
+    {
+      "epoch": 1.7214700193423598,
+      "grad_norm": 0.7911761999130249,
+      "learning_rate": 2.7852998065764027e-06,
+      "loss": 0.0295,
+      "step": 890
+    },
+    {
+      "epoch": 1.7408123791102514,
+      "grad_norm": 0.1537817418575287,
+      "learning_rate": 2.591876208897486e-06,
+      "loss": 0.2648,
+      "step": 900
+    },
+    {
+      "epoch": 1.760154738878143,
+      "grad_norm": 0.19490283727645874,
+      "learning_rate": 2.398452611218569e-06,
+      "loss": 0.0336,
+      "step": 910
+    },
+    {
+      "epoch": 1.7794970986460348,
+      "grad_norm": 67.34917449951172,
+      "learning_rate": 2.205029013539652e-06,
+      "loss": 0.1109,
+      "step": 920
+    },
+    {
+      "epoch": 1.7988394584139265,
+      "grad_norm": 0.6602293252944946,
+      "learning_rate": 2.011605415860735e-06,
+      "loss": 0.0386,
+      "step": 930
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 0.20363681018352509,
+      "learning_rate": 1.8181818181818183e-06,
+      "loss": 0.1173,
+      "step": 940
+    },
+    {
+      "epoch": 1.8375241779497098,
+      "grad_norm": 0.1697886437177658,
+      "learning_rate": 1.6247582205029014e-06,
+      "loss": 0.1885,
+      "step": 950
+    },
+    {
+      "epoch": 1.8568665377176017,
+      "grad_norm": 0.1814304143190384,
+      "learning_rate": 1.4313346228239847e-06,
+      "loss": 0.2553,
+      "step": 960
+    },
+    {
+      "epoch": 1.8762088974854931,
+      "grad_norm": 0.3075575530529022,
+      "learning_rate": 1.2379110251450678e-06,
+      "loss": 0.2235,
+      "step": 970
+    },
+    {
+      "epoch": 1.895551257253385,
+      "grad_norm": 0.17250248789787292,
+      "learning_rate": 1.044487427466151e-06,
+      "loss": 0.0648,
+      "step": 980
+    },
+    {
+      "epoch": 1.9148936170212765,
+      "grad_norm": 0.18552158772945404,
+      "learning_rate": 8.510638297872341e-07,
+      "loss": 0.0266,
+      "step": 990
+    },
+    {
+      "epoch": 1.9342359767891684,
+      "grad_norm": 21.213985443115234,
+      "learning_rate": 6.576402321083172e-07,
+      "loss": 0.2269,
+      "step": 1000
+    },
+    {
+      "epoch": 1.9535783365570598,
+      "grad_norm": 0.20198415219783783,
+      "learning_rate": 4.6421663442940047e-07,
+      "loss": 0.0441,
+      "step": 1010
+    },
+    {
+      "epoch": 1.9729206963249517,
+      "grad_norm": 8.638155937194824,
+      "learning_rate": 2.707930367504836e-07,
+      "loss": 0.0328,
+      "step": 1020
+    },
+    {
+      "epoch": 1.9922630560928434,
+      "grad_norm": 0.18748199939727783,
+      "learning_rate": 7.736943907156674e-08,
+      "loss": 0.0627,
+      "step": 1030
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9699248120300752,
+      "eval_loss": 0.10996536910533905,
+      "eval_runtime": 9.2747,
+      "eval_samples_per_second": 14.34,
+      "eval_steps_per_second": 7.224,
+      "step": 1034
+    },
+    {
+      "epoch": 2.0,
+      "step": 1034,
+      "total_flos": 1.602548708238213e+17,
+      "train_loss": 0.2494486983959172,
+      "train_runtime": 527.8899,
+      "train_samples_per_second": 3.917,
+      "train_steps_per_second": 1.959
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1034,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "total_flos": 1.602548708238213e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}