Training in progress, step 1900, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +712 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c674bf9cdeb23c80b289ac43407765b1e78aedb4884b1385a152cf6a92469ca9
 size 528526760

 version https://git-lfs.github.com/spec/v1
+oid sha256:d06847da158ce168fb93e55cc61bf6bea8dd7967293bd7a3c8e8402b075b8fe4
 size 528526760

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:085e11b95cb9886558bd81f7c18c71d642195ef4464bfe7249b85b4efd5f0b49
 size 141172038

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2dadc0a09d187a8011aebcd91d84a82f37564642f08657fd83aef7232779b1b
 size 141172038

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:206426ce70c7f4de66ff8448310cacccd08ef2e69a6fa5935e72dbe8ba27401e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc0fc3759c3826ef1f53a065691416b3f84d400156909e771c89c5340e089c66
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a80a4389f665fb0206d9e4c906beb4f87034c401ec9e85d64dbadeb28ef38e37
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:69bc8f012b013c017f6532a85f380ecf6f76a1db060525a9d4f033160c892e6c
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.5684272050857544,
   "best_model_checkpoint": "miner_id_24/checkpoint-1600",
-  "epoch": 0.3378061368114854,
   "eval_steps": 100,
-  "global_step": 1800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12759,6 +12759,714 @@
       "eval_samples_per_second": 3.53,
       "eval_steps_per_second": 1.773,
       "step": 1800
     }
   ],
   "logging_steps": 1,
@@ -12773,7 +13481,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -12787,7 +13495,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.236871551229235e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.5684272050857544,
   "best_model_checkpoint": "miner_id_24/checkpoint-1600",
+  "epoch": 0.3565731444121235,
   "eval_steps": 100,
+  "global_step": 1900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.53,
       "eval_steps_per_second": 1.773,
       "step": 1800
+    },
+    {
+      "epoch": 0.3379938068874918,
+      "grad_norm": 7.328483581542969,
+      "learning_rate": 0.00019398047879803926,
+      "loss": 1.9055,
+      "step": 1801
+    },
+    {
+      "epoch": 0.33818147696349815,
+      "grad_norm": 6.055893421173096,
+      "learning_rate": 0.00019397375581320375,
+      "loss": 2.4346,
+      "step": 1802
+    },
+    {
+      "epoch": 0.33836914703950455,
+      "grad_norm": 8.04631519317627,
+      "learning_rate": 0.00019396701827645302,
+      "loss": 1.5086,
+      "step": 1803
+    },
+    {
+      "epoch": 0.33855681711551094,
+      "grad_norm": 6.279934406280518,
+      "learning_rate": 0.00019396026618778706,
+      "loss": 2.2011,
+      "step": 1804
+    },
+    {
+      "epoch": 0.3387444871915173,
+      "grad_norm": 7.011415481567383,
+      "learning_rate": 0.00019395352865103632,
+      "loss": 2.5766,
+      "step": 1805
+    },
+    {
+      "epoch": 0.3389321572675237,
+      "grad_norm": 11.007534980773926,
+      "learning_rate": 0.0001939467911142856,
+      "loss": 3.25,
+      "step": 1806
+    },
+    {
+      "epoch": 0.3391198273435301,
+      "grad_norm": 4.763912200927734,
+      "learning_rate": 0.00019394003902561963,
+      "loss": 1.7699,
+      "step": 1807
+    },
+    {
+      "epoch": 0.3393074974195365,
+      "grad_norm": 7.067093849182129,
+      "learning_rate": 0.00019393328693695366,
+      "loss": 2.0153,
+      "step": 1808
+    },
+    {
+      "epoch": 0.3394951674955428,
+      "grad_norm": 6.018016338348389,
+      "learning_rate": 0.0001939265348482877,
+      "loss": 1.0305,
+      "step": 1809
+    },
+    {
+      "epoch": 0.3396828375715492,
+      "grad_norm": 5.589052677154541,
+      "learning_rate": 0.0001939197682077065,
+      "loss": 1.623,
+      "step": 1810
+    },
+    {
+      "epoch": 0.3398705076475556,
+      "grad_norm": 4.106575965881348,
+      "learning_rate": 0.00019391300156712532,
+      "loss": 2.3932,
+      "step": 1811
+    },
+    {
+      "epoch": 0.340058177723562,
+      "grad_norm": 6.3381171226501465,
+      "learning_rate": 0.00019390624947845936,
+      "loss": 1.859,
+      "step": 1812
+    },
+    {
+      "epoch": 0.34024584779956835,
+      "grad_norm": 7.823512554168701,
+      "learning_rate": 0.00019389946828596294,
+      "loss": 2.0446,
+      "step": 1813
+    },
+    {
+      "epoch": 0.34043351787557474,
+      "grad_norm": 4.890605449676514,
+      "learning_rate": 0.00019389268709346652,
+      "loss": 0.8467,
+      "step": 1814
+    },
+    {
+      "epoch": 0.34062118795158114,
+      "grad_norm": 7.945042610168457,
+      "learning_rate": 0.00019388592045288533,
+      "loss": 2.6165,
+      "step": 1815
+    },
+    {
+      "epoch": 0.3408088580275875,
+      "grad_norm": 4.810164451599121,
+      "learning_rate": 0.0001938791392603889,
+      "loss": 1.1764,
+      "step": 1816
+    },
+    {
+      "epoch": 0.3409965281035939,
+      "grad_norm": 6.62808895111084,
+      "learning_rate": 0.0001938723580678925,
+      "loss": 2.3356,
+      "step": 1817
+    },
+    {
+      "epoch": 0.3411841981796003,
+      "grad_norm": 8.076144218444824,
+      "learning_rate": 0.00019386556232348084,
+      "loss": 2.2133,
+      "step": 1818
+    },
+    {
+      "epoch": 0.34137186825560667,
+      "grad_norm": 10.573699951171875,
+      "learning_rate": 0.0001938587665790692,
+      "loss": 2.3251,
+      "step": 1819
+    },
+    {
+      "epoch": 0.341559538331613,
+      "grad_norm": 5.872825622558594,
+      "learning_rate": 0.00019385197083465755,
+      "loss": 1.8493,
+      "step": 1820
+    },
+    {
+      "epoch": 0.3417472084076194,
+      "grad_norm": 3.5163869857788086,
+      "learning_rate": 0.0001938451750902459,
+      "loss": 1.1675,
+      "step": 1821
+    },
+    {
+      "epoch": 0.3419348784836258,
+      "grad_norm": 6.068939685821533,
+      "learning_rate": 0.00019383836479391903,
+      "loss": 1.7621,
+      "step": 1822
+    },
+    {
+      "epoch": 0.34212254855963214,
+      "grad_norm": 5.792971611022949,
+      "learning_rate": 0.00019383155449759215,
+      "loss": 1.8668,
+      "step": 1823
+    },
+    {
+      "epoch": 0.34231021863563854,
+      "grad_norm": 8.612492561340332,
+      "learning_rate": 0.00019382474420126528,
+      "loss": 2.2727,
+      "step": 1824
+    },
+    {
+      "epoch": 0.34249788871164494,
+      "grad_norm": 5.430174827575684,
+      "learning_rate": 0.0001938179339049384,
+      "loss": 1.7228,
+      "step": 1825
+    },
+    {
+      "epoch": 0.34268555878765133,
+      "grad_norm": 6.0315375328063965,
+      "learning_rate": 0.0001938111090566963,
+      "loss": 2.283,
+      "step": 1826
+    },
+    {
+      "epoch": 0.3428732288636577,
+      "grad_norm": 5.934972763061523,
+      "learning_rate": 0.00019380429876036942,
+      "loss": 1.37,
+      "step": 1827
+    },
+    {
+      "epoch": 0.34306089893966407,
+      "grad_norm": 8.644633293151855,
+      "learning_rate": 0.00019379747391212732,
+      "loss": 1.7698,
+      "step": 1828
+    },
+    {
+      "epoch": 0.34324856901567047,
+      "grad_norm": 6.068434238433838,
+      "learning_rate": 0.0001937906490638852,
+      "loss": 2.4035,
+      "step": 1829
+    },
+    {
+      "epoch": 0.3434362390916768,
+      "grad_norm": 5.352113723754883,
+      "learning_rate": 0.00019378380966372788,
+      "loss": 1.4043,
+      "step": 1830
+    },
+    {
+      "epoch": 0.3436239091676832,
+      "grad_norm": 5.741805553436279,
+      "learning_rate": 0.00019377697026357055,
+      "loss": 2.3867,
+      "step": 1831
+    },
+    {
+      "epoch": 0.3438115792436896,
+      "grad_norm": 5.221137523651123,
+      "learning_rate": 0.00019377013086341321,
+      "loss": 2.5909,
+      "step": 1832
+    },
+    {
+      "epoch": 0.343999249319696,
+      "grad_norm": 5.222947597503662,
+      "learning_rate": 0.00019376329146325588,
+      "loss": 1.8876,
+      "step": 1833
+    },
+    {
+      "epoch": 0.34418691939570234,
+      "grad_norm": 4.082976341247559,
+      "learning_rate": 0.00019375643751118332,
+      "loss": 1.4823,
+      "step": 1834
+    },
+    {
+      "epoch": 0.34437458947170874,
+      "grad_norm": 4.595718860626221,
+      "learning_rate": 0.00019374958355911076,
+      "loss": 1.3744,
+      "step": 1835
+    },
+    {
+      "epoch": 0.34456225954771513,
+      "grad_norm": 5.016329765319824,
+      "learning_rate": 0.0001937427296070382,
+      "loss": 2.3715,
+      "step": 1836
+    },
+    {
+      "epoch": 0.3447499296237215,
+      "grad_norm": 4.979654312133789,
+      "learning_rate": 0.00019373587565496564,
+      "loss": 1.7598,
+      "step": 1837
+    },
+    {
+      "epoch": 0.34493759969972787,
+      "grad_norm": 5.692010879516602,
+      "learning_rate": 0.00019372900715097785,
+      "loss": 1.5982,
+      "step": 1838
+    },
+    {
+      "epoch": 0.34512526977573427,
+      "grad_norm": 6.913799285888672,
+      "learning_rate": 0.0001937221531989053,
+      "loss": 1.9036,
+      "step": 1839
+    },
+    {
+      "epoch": 0.34531293985174066,
+      "grad_norm": 5.701316833496094,
+      "learning_rate": 0.0001937152846949175,
+      "loss": 1.9009,
+      "step": 1840
+    },
+    {
+      "epoch": 0.345500609927747,
+      "grad_norm": 19.9186954498291,
+      "learning_rate": 0.0001937084161909297,
+      "loss": 2.4767,
+      "step": 1841
+    },
+    {
+      "epoch": 0.3456882800037534,
+      "grad_norm": 5.613144874572754,
+      "learning_rate": 0.0001937015331350267,
+      "loss": 3.6305,
+      "step": 1842
+    },
+    {
+      "epoch": 0.3458759500797598,
+      "grad_norm": 3.7392101287841797,
+      "learning_rate": 0.0001936946646310389,
+      "loss": 1.1532,
+      "step": 1843
+    },
+    {
+      "epoch": 0.34606362015576614,
+      "grad_norm": 7.338042259216309,
+      "learning_rate": 0.00019368776702322066,
+      "loss": 1.95,
+      "step": 1844
+    },
+    {
+      "epoch": 0.34625129023177253,
+      "grad_norm": 5.003443717956543,
+      "learning_rate": 0.00019368088396731764,
+      "loss": 1.7028,
+      "step": 1845
+    },
+    {
+      "epoch": 0.34643896030777893,
+      "grad_norm": 6.806427001953125,
+      "learning_rate": 0.00019367400091141462,
+      "loss": 2.4638,
+      "step": 1846
+    },
+    {
+      "epoch": 0.34662663038378533,
+      "grad_norm": 5.704411506652832,
+      "learning_rate": 0.00019366710330359638,
+      "loss": 2.7797,
+      "step": 1847
+    },
+    {
+      "epoch": 0.34681430045979167,
+      "grad_norm": 3.765601873397827,
+      "learning_rate": 0.00019366020569577813,
+      "loss": 1.8054,
+      "step": 1848
+    },
+    {
+      "epoch": 0.34700197053579807,
+      "grad_norm": 5.865424156188965,
+      "learning_rate": 0.00019365330808795989,
+      "loss": 2.2234,
+      "step": 1849
+    },
+    {
+      "epoch": 0.34718964061180446,
+      "grad_norm": 9.001687049865723,
+      "learning_rate": 0.0001936463959282264,
+      "loss": 2.0534,
+      "step": 1850
+    },
+    {
+      "epoch": 0.3473773106878108,
+      "grad_norm": 3.3067872524261475,
+      "learning_rate": 0.00019363948376849294,
+      "loss": 1.7267,
+      "step": 1851
+    },
+    {
+      "epoch": 0.3475649807638172,
+      "grad_norm": 6.117793560028076,
+      "learning_rate": 0.00019363257160875946,
+      "loss": 1.721,
+      "step": 1852
+    },
+    {
+      "epoch": 0.3477526508398236,
+      "grad_norm": 9.610057830810547,
+      "learning_rate": 0.000193625659449026,
+      "loss": 4.0058,
+      "step": 1853
+    },
+    {
+      "epoch": 0.34794032091583,
+      "grad_norm": 5.996690273284912,
+      "learning_rate": 0.00019361873273737729,
+      "loss": 1.9939,
+      "step": 1854
+    },
+    {
+      "epoch": 0.34812799099183633,
+      "grad_norm": 4.617966175079346,
+      "learning_rate": 0.00019361180602572858,
+      "loss": 1.1395,
+      "step": 1855
+    },
+    {
+      "epoch": 0.34831566106784273,
+      "grad_norm": 6.157725811004639,
+      "learning_rate": 0.00019360487931407988,
+      "loss": 1.5481,
+      "step": 1856
+    },
+    {
+      "epoch": 0.3485033311438491,
+      "grad_norm": 4.873600482940674,
+      "learning_rate": 0.00019359795260243118,
+      "loss": 1.944,
+      "step": 1857
+    },
+    {
+      "epoch": 0.34869100121985547,
+      "grad_norm": 4.755199432373047,
+      "learning_rate": 0.00019359101133886725,
+      "loss": 1.2533,
+      "step": 1858
+    },
+    {
+      "epoch": 0.34887867129586186,
+      "grad_norm": 6.5496392250061035,
+      "learning_rate": 0.00019358408462721854,
+      "loss": 2.6968,
+      "step": 1859
+    },
+    {
+      "epoch": 0.34906634137186826,
+      "grad_norm": 8.573905944824219,
+      "learning_rate": 0.0001935771433636546,
+      "loss": 2.1588,
+      "step": 1860
+    },
+    {
+      "epoch": 0.34925401144787466,
+      "grad_norm": 10.172856330871582,
+      "learning_rate": 0.00019357018754817545,
+      "loss": 2.2443,
+      "step": 1861
+    },
+    {
+      "epoch": 0.349441681523881,
+      "grad_norm": 9.657448768615723,
+      "learning_rate": 0.00019356324628461152,
+      "loss": 2.5489,
+      "step": 1862
+    },
+    {
+      "epoch": 0.3496293515998874,
+      "grad_norm": 4.509249210357666,
+      "learning_rate": 0.00019355629046913236,
+      "loss": 1.3726,
+      "step": 1863
+    },
+    {
+      "epoch": 0.3498170216758938,
+      "grad_norm": 3.9321093559265137,
+      "learning_rate": 0.00019354934920556843,
+      "loss": 2.2097,
+      "step": 1864
+    },
+    {
+      "epoch": 0.35000469175190013,
+      "grad_norm": 5.736708641052246,
+      "learning_rate": 0.00019354237883817405,
+      "loss": 1.9158,
+      "step": 1865
+    },
+    {
+      "epoch": 0.35019236182790653,
+      "grad_norm": 7.1537089347839355,
+      "learning_rate": 0.00019353542302269489,
+      "loss": 2.3103,
+      "step": 1866
+    },
+    {
+      "epoch": 0.3503800319039129,
+      "grad_norm": 17.57890510559082,
+      "learning_rate": 0.00019352843810338527,
+      "loss": 2.7007,
+      "step": 1867
+    },
+    {
+      "epoch": 0.3505677019799193,
+      "grad_norm": 4.420182228088379,
+      "learning_rate": 0.0001935214822879061,
+      "loss": 1.8044,
+      "step": 1868
+    },
+    {
+      "epoch": 0.35075537205592566,
+      "grad_norm": 5.2922234535217285,
+      "learning_rate": 0.00019351451192051172,
+      "loss": 1.9718,
+      "step": 1869
+    },
+    {
+      "epoch": 0.35094304213193206,
+      "grad_norm": 6.406688690185547,
+      "learning_rate": 0.0001935075270012021,
+      "loss": 2.7322,
+      "step": 1870
+    },
+    {
+      "epoch": 0.35113071220793846,
+      "grad_norm": 5.1606974601745605,
+      "learning_rate": 0.00019350055663380772,
+      "loss": 1.9587,
+      "step": 1871
+    },
+    {
+      "epoch": 0.35131838228394485,
+      "grad_norm": 6.796870708465576,
+      "learning_rate": 0.00019349355716258287,
+      "loss": 2.0894,
+      "step": 1872
+    },
+    {
+      "epoch": 0.3515060523599512,
+      "grad_norm": 6.20542573928833,
+      "learning_rate": 0.00019348657224327326,
+      "loss": 2.7715,
+      "step": 1873
+    },
+    {
+      "epoch": 0.3516937224359576,
+      "grad_norm": 5.50722599029541,
+      "learning_rate": 0.00019347958732396364,
+      "loss": 1.8027,
+      "step": 1874
+    },
+    {
+      "epoch": 0.351881392511964,
+      "grad_norm": 7.180240631103516,
+      "learning_rate": 0.0001934725878527388,
+      "loss": 2.0733,
+      "step": 1875
+    },
+    {
+      "epoch": 0.3520690625879703,
+      "grad_norm": 6.353847980499268,
+      "learning_rate": 0.00019346558838151395,
+      "loss": 2.6648,
+      "step": 1876
+    },
+    {
+      "epoch": 0.3522567326639767,
+      "grad_norm": 5.271063327789307,
+      "learning_rate": 0.00019345857435837388,
+      "loss": 1.5801,
+      "step": 1877
+    },
+    {
+      "epoch": 0.3524444027399831,
+      "grad_norm": 5.9332380294799805,
+      "learning_rate": 0.00019345157488714904,
+      "loss": 2.1235,
+      "step": 1878
+    },
+    {
+      "epoch": 0.3526320728159895,
+      "grad_norm": 8.27168083190918,
+      "learning_rate": 0.00019344456086400896,
+      "loss": 2.6918,
+      "step": 1879
+    },
+    {
+      "epoch": 0.35281974289199586,
+      "grad_norm": 5.08257532119751,
+      "learning_rate": 0.0001934375468408689,
+      "loss": 1.2914,
+      "step": 1880
+    },
+    {
+      "epoch": 0.35300741296800225,
+      "grad_norm": 7.454222679138184,
+      "learning_rate": 0.00019343053281772882,
+      "loss": 2.7986,
+      "step": 1881
+    },
+    {
+      "epoch": 0.35319508304400865,
+      "grad_norm": 3.519970417022705,
+      "learning_rate": 0.00019342350424267352,
+      "loss": 1.4717,
+      "step": 1882
+    },
+    {
+      "epoch": 0.353382753120015,
+      "grad_norm": 7.176670074462891,
+      "learning_rate": 0.00019341649021953344,
+      "loss": 2.6793,
+      "step": 1883
+    },
+    {
+      "epoch": 0.3535704231960214,
+      "grad_norm": 9.557621002197266,
+      "learning_rate": 0.00019340946164447814,
+      "loss": 3.08,
+      "step": 1884
+    },
+    {
+      "epoch": 0.3537580932720278,
+      "grad_norm": 6.917932510375977,
+      "learning_rate": 0.00019340243306942284,
+      "loss": 2.3398,
+      "step": 1885
+    },
+    {
+      "epoch": 0.3539457633480342,
+      "grad_norm": 4.977582931518555,
+      "learning_rate": 0.0001933953899424523,
+      "loss": 1.6738,
+      "step": 1886
+    },
+    {
+      "epoch": 0.3541334334240405,
+      "grad_norm": 5.958339691162109,
+      "learning_rate": 0.00019338834681548178,
+      "loss": 1.9964,
+      "step": 1887
+    },
+    {
+      "epoch": 0.3543211035000469,
+      "grad_norm": 7.204160213470459,
+      "learning_rate": 0.00019338130368851125,
+      "loss": 2.4598,
+      "step": 1888
+    },
+    {
+      "epoch": 0.3545087735760533,
+      "grad_norm": 7.081577777862549,
+      "learning_rate": 0.00019337426056154072,
+      "loss": 2.799,
+      "step": 1889
+    },
+    {
+      "epoch": 0.35469644365205966,
+      "grad_norm": 6.637358665466309,
+      "learning_rate": 0.0001933672174345702,
+      "loss": 2.0836,
+      "step": 1890
+    },
+    {
+      "epoch": 0.35488411372806605,
+      "grad_norm": 10.060577392578125,
+      "learning_rate": 0.00019336015975568444,
+      "loss": 1.6455,
+      "step": 1891
+    },
+    {
+      "epoch": 0.35507178380407245,
+      "grad_norm": 6.906970024108887,
+      "learning_rate": 0.00019335310207679868,
+      "loss": 1.9957,
+      "step": 1892
+    },
+    {
+      "epoch": 0.35525945388007885,
+      "grad_norm": 7.135964870452881,
+      "learning_rate": 0.0001933460298459977,
+      "loss": 1.2429,
+      "step": 1893
+    },
+    {
+      "epoch": 0.3554471239560852,
+      "grad_norm": 7.937412261962891,
+      "learning_rate": 0.00019333897216711193,
+      "loss": 2.0331,
+      "step": 1894
+    },
+    {
+      "epoch": 0.3556347940320916,
+      "grad_norm": 5.553816318511963,
+      "learning_rate": 0.00019333189993631095,
+      "loss": 1.6946,
+      "step": 1895
+    },
+    {
+      "epoch": 0.355822464108098,
+      "grad_norm": 7.168800354003906,
+      "learning_rate": 0.0001933248422574252,
+      "loss": 2.1942,
+      "step": 1896
+    },
+    {
+      "epoch": 0.3560101341841043,
+      "grad_norm": 7.036073684692383,
+      "learning_rate": 0.00019331775547470897,
+      "loss": 2.5773,
+      "step": 1897
+    },
+    {
+      "epoch": 0.3561978042601107,
+      "grad_norm": 12.46209716796875,
+      "learning_rate": 0.00019331066869199276,
+      "loss": 2.8978,
+      "step": 1898
+    },
+    {
+      "epoch": 0.3563854743361171,
+      "grad_norm": 6.11179780960083,
+      "learning_rate": 0.00019330359646119177,
+      "loss": 2.1355,
+      "step": 1899
+    },
+    {
+      "epoch": 0.3565731444121235,
+      "grad_norm": 5.809276580810547,
+      "learning_rate": 0.00019329650967847556,
+      "loss": 1.7289,
+      "step": 1900
+    },
+    {
+      "epoch": 0.3565731444121235,
+      "eval_loss": 0.5747910141944885,
+      "eval_runtime": 60.9369,
+      "eval_samples_per_second": 3.528,
+      "eval_steps_per_second": 1.772,
+      "step": 1900
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 3.420104552257413e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null