Training in progress, step 1900, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +711 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96c7361330baef883a1cefd1a914f4dfd222e1942ded5aa53b0020e1631fa6a0
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:a97696a92cabb745ba6cadf2fcc30794ffbc03c91397a016c61944f979c42d0e
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:607344293b376b43b5ea9dcc72b5b6a4a1aec6d2f1618a8d50a1ba12a30b5cac
 size 179316182

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad721c15307c50724acd6ac4b18952a24ff74a5cf21f65080c486168ba9fcdd8
 size 179316182

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:744d24dfd442d827b2147e9ae7ffb1ebb0081316f3e80b03e398055f18dbb8c1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:431a2334446ca149d087ee53da191415d4c95211b38709b7a8404d805dee4327
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d92677bc2060a4e9fdf63252b2a62db0c583efdb9e75a15d0327906fb2aa5af3
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:536e3a5de3004cb57f6febbf7190ef605673677b3a7bfb0620dc7718281289d6
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4691164972634871,
   "eval_steps": 100,
-  "global_step": 1800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12759,6 +12759,714 @@
       "eval_samples_per_second": 2.868,
       "eval_steps_per_second": 1.434,
       "step": 1800
     }
   ],
   "logging_steps": 1,
@@ -12778,7 +13486,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.218854769338614e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.49517852488923636,
   "eval_steps": 100,
+  "global_step": 1900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.868,
       "eval_steps_per_second": 1.434,
       "step": 1800
+    },
+    {
+      "epoch": 0.4693771175397446,
+      "grad_norm": 10.37785816192627,
+      "learning_rate": 0.00018848585023079067,
+      "loss": 5.1423,
+      "step": 1801
+    },
+    {
+      "epoch": 0.4696377378160021,
+      "grad_norm": 13.957540512084961,
+      "learning_rate": 0.00018847310275305063,
+      "loss": 4.527,
+      "step": 1802
+    },
+    {
+      "epoch": 0.4698983580922596,
+      "grad_norm": 16.043025970458984,
+      "learning_rate": 0.00018846034072339535,
+      "loss": 5.4377,
+      "step": 1803
+    },
+    {
+      "epoch": 0.47015897836851706,
+      "grad_norm": 12.010665893554688,
+      "learning_rate": 0.00018844757869374007,
+      "loss": 4.7116,
+      "step": 1804
+    },
+    {
+      "epoch": 0.47041959864477456,
+      "grad_norm": 14.77712631225586,
+      "learning_rate": 0.0001884348166640848,
+      "loss": 5.9034,
+      "step": 1805
+    },
+    {
+      "epoch": 0.47068021892103207,
+      "grad_norm": 10.91681957244873,
+      "learning_rate": 0.00018842202553059906,
+      "loss": 5.0634,
+      "step": 1806
+    },
+    {
+      "epoch": 0.4709408391972896,
+      "grad_norm": 12.498564720153809,
+      "learning_rate": 0.00018840924894902855,
+      "loss": 5.6897,
+      "step": 1807
+    },
+    {
+      "epoch": 0.471201459473547,
+      "grad_norm": 15.501856803894043,
+      "learning_rate": 0.00018839645781554282,
+      "loss": 3.1788,
+      "step": 1808
+    },
+    {
+      "epoch": 0.47146207974980453,
+      "grad_norm": 13.191068649291992,
+      "learning_rate": 0.00018838365213014185,
+      "loss": 4.8908,
+      "step": 1809
+    },
+    {
+      "epoch": 0.47172270002606204,
+      "grad_norm": 18.41266441345215,
+      "learning_rate": 0.00018837086099665612,
+      "loss": 5.3714,
+      "step": 1810
+    },
+    {
+      "epoch": 0.47198332030231954,
+      "grad_norm": 15.97313117980957,
+      "learning_rate": 0.00018835805531125516,
+      "loss": 5.7231,
+      "step": 1811
+    },
+    {
+      "epoch": 0.472243940578577,
+      "grad_norm": 12.212700843811035,
+      "learning_rate": 0.00018834523507393897,
+      "loss": 5.0301,
+      "step": 1812
+    },
+    {
+      "epoch": 0.4725045608548345,
+      "grad_norm": 218.50746154785156,
+      "learning_rate": 0.00018833241483662277,
+      "loss": 6.6723,
+      "step": 1813
+    },
+    {
+      "epoch": 0.472765181131092,
+      "grad_norm": 12.388707160949707,
+      "learning_rate": 0.00018831958004739136,
+      "loss": 5.1241,
+      "step": 1814
+    },
+    {
+      "epoch": 0.4730258014073495,
+      "grad_norm": 18.206995010375977,
+      "learning_rate": 0.00018830674525815994,
+      "loss": 4.8303,
+      "step": 1815
+    },
+    {
+      "epoch": 0.47328642168360696,
+      "grad_norm": 72.17833709716797,
+      "learning_rate": 0.0001882938959170133,
+      "loss": 4.8661,
+      "step": 1816
+    },
+    {
+      "epoch": 0.47354704195986447,
+      "grad_norm": 18.09402847290039,
+      "learning_rate": 0.00018828104657586664,
+      "loss": 5.473,
+      "step": 1817
+    },
+    {
+      "epoch": 0.473807662236122,
+      "grad_norm": 241.85777282714844,
+      "learning_rate": 0.00018826819723472,
+      "loss": 5.6158,
+      "step": 1818
+    },
+    {
+      "epoch": 0.4740682825123795,
+      "grad_norm": 12.830618858337402,
+      "learning_rate": 0.00018825533334165812,
+      "loss": 4.5027,
+      "step": 1819
+    },
+    {
+      "epoch": 0.47432890278863693,
+      "grad_norm": 18.599620819091797,
+      "learning_rate": 0.000188242454896681,
+      "loss": 5.6648,
+      "step": 1820
+    },
+    {
+      "epoch": 0.47458952306489444,
+      "grad_norm": 13.098050117492676,
+      "learning_rate": 0.0001882295764517039,
+      "loss": 5.0379,
+      "step": 1821
+    },
+    {
+      "epoch": 0.47485014334115194,
+      "grad_norm": 9.492166519165039,
+      "learning_rate": 0.0001882166980067268,
+      "loss": 5.198,
+      "step": 1822
+    },
+    {
+      "epoch": 0.47511076361740945,
+      "grad_norm": 10.473576545715332,
+      "learning_rate": 0.00018820380500983447,
+      "loss": 5.303,
+      "step": 1823
+    },
+    {
+      "epoch": 0.47537138389366695,
+      "grad_norm": 9.241297721862793,
+      "learning_rate": 0.00018819091201294214,
+      "loss": 5.0623,
+      "step": 1824
+    },
+    {
+      "epoch": 0.4756320041699244,
+      "grad_norm": 17.09585189819336,
+      "learning_rate": 0.00018817800446413457,
+      "loss": 4.8895,
+      "step": 1825
+    },
+    {
+      "epoch": 0.4758926244461819,
+      "grad_norm": 10.225650787353516,
+      "learning_rate": 0.000188165096915327,
+      "loss": 4.9848,
+      "step": 1826
+    },
+    {
+      "epoch": 0.4761532447224394,
+      "grad_norm": 10.421326637268066,
+      "learning_rate": 0.00018815218936651945,
+      "loss": 4.5205,
+      "step": 1827
+    },
+    {
+      "epoch": 0.4764138649986969,
+      "grad_norm": 13.143168449401855,
+      "learning_rate": 0.00018813925271388143,
+      "loss": 5.2166,
+      "step": 1828
+    },
+    {
+      "epoch": 0.4766744852749544,
+      "grad_norm": 12.645442962646484,
+      "learning_rate": 0.00018812633061315864,
+      "loss": 5.1552,
+      "step": 1829
+    },
+    {
+      "epoch": 0.4769351055512119,
+      "grad_norm": 11.635645866394043,
+      "learning_rate": 0.00018811339396052063,
+      "loss": 4.7997,
+      "step": 1830
+    },
+    {
+      "epoch": 0.4771957258274694,
+      "grad_norm": 13.22658920288086,
+      "learning_rate": 0.0001881004573078826,
+      "loss": 4.2591,
+      "step": 1831
+    },
+    {
+      "epoch": 0.4774563461037269,
+      "grad_norm": 14.026827812194824,
+      "learning_rate": 0.00018808750610332936,
+      "loss": 5.8879,
+      "step": 1832
+    },
+    {
+      "epoch": 0.47771696637998434,
+      "grad_norm": 12.296269416809082,
+      "learning_rate": 0.00018807455489877611,
+      "loss": 4.9644,
+      "step": 1833
+    },
+    {
+      "epoch": 0.47797758665624185,
+      "grad_norm": 10.826774597167969,
+      "learning_rate": 0.00018806158914230764,
+      "loss": 4.0063,
+      "step": 1834
+    },
+    {
+      "epoch": 0.47823820693249935,
+      "grad_norm": 11.467571258544922,
+      "learning_rate": 0.00018804862338583916,
+      "loss": 4.6543,
+      "step": 1835
+    },
+    {
+      "epoch": 0.47849882720875686,
+      "grad_norm": 11.653413772583008,
+      "learning_rate": 0.00018803564307745546,
+      "loss": 5.013,
+      "step": 1836
+    },
+    {
+      "epoch": 0.4787594474850143,
+      "grad_norm": 10.89033317565918,
+      "learning_rate": 0.00018802266276907176,
+      "loss": 4.8319,
+      "step": 1837
+    },
+    {
+      "epoch": 0.4790200677612718,
+      "grad_norm": 12.89120101928711,
+      "learning_rate": 0.00018800966790877283,
+      "loss": 4.5967,
+      "step": 1838
+    },
+    {
+      "epoch": 0.4792806880375293,
+      "grad_norm": 12.160778045654297,
+      "learning_rate": 0.00018799668760038912,
+      "loss": 5.7447,
+      "step": 1839
+    },
+    {
+      "epoch": 0.47954130831378683,
+      "grad_norm": 13.569367408752441,
+      "learning_rate": 0.00018798367818817496,
+      "loss": 5.7456,
+      "step": 1840
+    },
+    {
+      "epoch": 0.4798019285900443,
+      "grad_norm": 16.512102127075195,
+      "learning_rate": 0.0001879706687759608,
+      "loss": 5.2688,
+      "step": 1841
+    },
+    {
+      "epoch": 0.4800625488663018,
+      "grad_norm": 15.978020668029785,
+      "learning_rate": 0.00018795764481183141,
+      "loss": 5.4303,
+      "step": 1842
+    },
+    {
+      "epoch": 0.4803231691425593,
+      "grad_norm": 9.993230819702148,
+      "learning_rate": 0.00018794463539961725,
+      "loss": 5.0611,
+      "step": 1843
+    },
+    {
+      "epoch": 0.4805837894188168,
+      "grad_norm": 16.74565315246582,
+      "learning_rate": 0.00018793161143548787,
+      "loss": 5.321,
+      "step": 1844
+    },
+    {
+      "epoch": 0.4808444096950743,
+      "grad_norm": 12.556840896606445,
+      "learning_rate": 0.00018791855836752802,
+      "loss": 5.2619,
+      "step": 1845
+    },
+    {
+      "epoch": 0.48110502997133175,
+      "grad_norm": 11.444070816040039,
+      "learning_rate": 0.0001879055198514834,
+      "loss": 5.0738,
+      "step": 1846
+    },
+    {
+      "epoch": 0.48136565024758926,
+      "grad_norm": 23.41417121887207,
+      "learning_rate": 0.0001878924813354388,
+      "loss": 4.5046,
+      "step": 1847
+    },
+    {
+      "epoch": 0.48162627052384677,
+      "grad_norm": 10.137743949890137,
+      "learning_rate": 0.00018787942826747894,
+      "loss": 4.5996,
+      "step": 1848
+    },
+    {
+      "epoch": 0.4818868908001043,
+      "grad_norm": 11.530888557434082,
+      "learning_rate": 0.00018786636064760387,
+      "loss": 4.9895,
+      "step": 1849
+    },
+    {
+      "epoch": 0.4821475110763617,
+      "grad_norm": 10.694371223449707,
+      "learning_rate": 0.0001878532930277288,
+      "loss": 4.4927,
+      "step": 1850
+    },
+    {
+      "epoch": 0.48240813135261923,
+      "grad_norm": 11.96599292755127,
+      "learning_rate": 0.00018784022540785372,
+      "loss": 4.7774,
+      "step": 1851
+    },
+    {
+      "epoch": 0.48266875162887674,
+      "grad_norm": 9.961639404296875,
+      "learning_rate": 0.00018782714323606342,
+      "loss": 4.7172,
+      "step": 1852
+    },
+    {
+      "epoch": 0.48292937190513424,
+      "grad_norm": 16.102052688598633,
+      "learning_rate": 0.0001878140465123579,
+      "loss": 5.5745,
+      "step": 1853
+    },
+    {
+      "epoch": 0.4831899921813917,
+      "grad_norm": 13.830510139465332,
+      "learning_rate": 0.00018780094978865236,
+      "loss": 5.3226,
+      "step": 1854
+    },
+    {
+      "epoch": 0.4834506124576492,
+      "grad_norm": 13.367227554321289,
+      "learning_rate": 0.00018778785306494683,
+      "loss": 4.7316,
+      "step": 1855
+    },
+    {
+      "epoch": 0.4837112327339067,
+      "grad_norm": 17.57742691040039,
+      "learning_rate": 0.00018777474178932607,
+      "loss": 5.3159,
+      "step": 1856
+    },
+    {
+      "epoch": 0.4839718530101642,
+      "grad_norm": 9.331389427185059,
+      "learning_rate": 0.0001877616305137053,
+      "loss": 5.1851,
+      "step": 1857
+    },
+    {
+      "epoch": 0.48423247328642166,
+      "grad_norm": 14.18066120147705,
+      "learning_rate": 0.00018774850468616933,
+      "loss": 4.0067,
+      "step": 1858
+    },
+    {
+      "epoch": 0.48449309356267917,
+      "grad_norm": 16.757022857666016,
+      "learning_rate": 0.00018773537885863334,
+      "loss": 5.3286,
+      "step": 1859
+    },
+    {
+      "epoch": 0.4847537138389367,
+      "grad_norm": 9.797025680541992,
+      "learning_rate": 0.00018772225303109735,
+      "loss": 4.3471,
+      "step": 1860
+    },
+    {
+      "epoch": 0.4850143341151942,
+      "grad_norm": 10.527436256408691,
+      "learning_rate": 0.00018770911265164614,
+      "loss": 4.6839,
+      "step": 1861
+    },
+    {
+      "epoch": 0.48527495439145163,
+      "grad_norm": 12.43220329284668,
+      "learning_rate": 0.0001876959577202797,
+      "loss": 4.8527,
+      "step": 1862
+    },
+    {
+      "epoch": 0.48553557466770914,
+      "grad_norm": 28.21805763244629,
+      "learning_rate": 0.00018768281734082848,
+      "loss": 5.3501,
+      "step": 1863
+    },
+    {
+      "epoch": 0.48579619494396664,
+      "grad_norm": 11.163954734802246,
+      "learning_rate": 0.0001876696478575468,
+      "loss": 4.8881,
+      "step": 1864
+    },
+    {
+      "epoch": 0.48605681522022415,
+      "grad_norm": 10.474588394165039,
+      "learning_rate": 0.00018765649292618036,
+      "loss": 5.016,
+      "step": 1865
+    },
+    {
+      "epoch": 0.48631743549648165,
+      "grad_norm": 9.17603588104248,
+      "learning_rate": 0.0001876433234428987,
+      "loss": 5.0946,
+      "step": 1866
+    },
+    {
+      "epoch": 0.4865780557727391,
+      "grad_norm": 9.54752254486084,
+      "learning_rate": 0.0001876301394077018,
+      "loss": 5.6084,
+      "step": 1867
+    },
+    {
+      "epoch": 0.4868386760489966,
+      "grad_norm": 14.107392311096191,
+      "learning_rate": 0.0001876169553725049,
+      "loss": 5.037,
+      "step": 1868
+    },
+    {
+      "epoch": 0.4870992963252541,
+      "grad_norm": 10.658012390136719,
+      "learning_rate": 0.00018760375678539276,
+      "loss": 5.0839,
+      "step": 1869
+    },
+    {
+      "epoch": 0.4873599166015116,
+      "grad_norm": 14.332069396972656,
+      "learning_rate": 0.00018759055819828063,
+      "loss": 5.2982,
+      "step": 1870
+    },
+    {
+      "epoch": 0.4876205368777691,
+      "grad_norm": 10.385351181030273,
+      "learning_rate": 0.00018757734505925328,
+      "loss": 5.3114,
+      "step": 1871
+    },
+    {
+      "epoch": 0.4878811571540266,
+      "grad_norm": 10.018989562988281,
+      "learning_rate": 0.00018756414647214115,
+      "loss": 5.7433,
+      "step": 1872
+    },
+    {
+      "epoch": 0.4881417774302841,
+      "grad_norm": 14.237773895263672,
+      "learning_rate": 0.00018755091878119856,
+      "loss": 5.8772,
+      "step": 1873
+    },
+    {
+      "epoch": 0.4884023977065416,
+      "grad_norm": 12.690217971801758,
+      "learning_rate": 0.0001875377056421712,
+      "loss": 5.134,
+      "step": 1874
+    },
+    {
+      "epoch": 0.48866301798279904,
+      "grad_norm": 13.357400894165039,
+      "learning_rate": 0.0001875244633993134,
+      "loss": 5.1887,
+      "step": 1875
+    },
+    {
+      "epoch": 0.48892363825905655,
+      "grad_norm": 13.582716941833496,
+      "learning_rate": 0.00018751122115645558,
+      "loss": 4.3156,
+      "step": 1876
+    },
+    {
+      "epoch": 0.48918425853531405,
+      "grad_norm": 15.98849868774414,
+      "learning_rate": 0.00018749797891359776,
+      "loss": 5.4427,
+      "step": 1877
+    },
+    {
+      "epoch": 0.48944487881157156,
+      "grad_norm": 11.24950122833252,
+      "learning_rate": 0.00018748472211882472,
+      "loss": 3.6912,
+      "step": 1878
+    },
+    {
+      "epoch": 0.489705499087829,
+      "grad_norm": 12.458173751831055,
+      "learning_rate": 0.00018747146532405168,
+      "loss": 4.5879,
+      "step": 1879
+    },
+    {
+      "epoch": 0.4899661193640865,
+      "grad_norm": 11.879009246826172,
+      "learning_rate": 0.00018745820852927864,
+      "loss": 5.0259,
+      "step": 1880
+    },
+    {
+      "epoch": 0.490226739640344,
+      "grad_norm": 12.405200958251953,
+      "learning_rate": 0.00018744493718259037,
+      "loss": 4.2239,
+      "step": 1881
+    },
+    {
+      "epoch": 0.49048735991660153,
+      "grad_norm": 16.421504974365234,
+      "learning_rate": 0.00018743165128398687,
+      "loss": 4.807,
+      "step": 1882
+    },
+    {
+      "epoch": 0.490747980192859,
+      "grad_norm": 14.629226684570312,
+      "learning_rate": 0.0001874183799372986,
+      "loss": 5.0214,
+      "step": 1883
+    },
+    {
+      "epoch": 0.4910086004691165,
+      "grad_norm": 14.704447746276855,
+      "learning_rate": 0.00018740507948677987,
+      "loss": 5.0339,
+      "step": 1884
+    },
+    {
+      "epoch": 0.491269220745374,
+      "grad_norm": 12.018187522888184,
+      "learning_rate": 0.00018739179358817637,
+      "loss": 5.4619,
+      "step": 1885
+    },
+    {
+      "epoch": 0.4915298410216315,
+      "grad_norm": 19.699617385864258,
+      "learning_rate": 0.00018737847858574241,
+      "loss": 5.2781,
+      "step": 1886
+    },
+    {
+      "epoch": 0.491790461297889,
+      "grad_norm": 13.180678367614746,
+      "learning_rate": 0.00018736516358330846,
+      "loss": 5.0445,
+      "step": 1887
+    },
+    {
+      "epoch": 0.49205108157414645,
+      "grad_norm": 15.855171203613281,
+      "learning_rate": 0.0001873518485808745,
+      "loss": 4.6027,
+      "step": 1888
+    },
+    {
+      "epoch": 0.49231170185040396,
+      "grad_norm": 11.354763984680176,
+      "learning_rate": 0.00018733851902652532,
+      "loss": 5.392,
+      "step": 1889
+    },
+    {
+      "epoch": 0.49257232212666147,
+      "grad_norm": 10.087592124938965,
+      "learning_rate": 0.00018732520402409136,
+      "loss": 4.3135,
+      "step": 1890
+    },
+    {
+      "epoch": 0.49283294240291897,
+      "grad_norm": 11.550101280212402,
+      "learning_rate": 0.00018731185991782695,
+      "loss": 4.9187,
+      "step": 1891
+    },
+    {
+      "epoch": 0.4930935626791764,
+      "grad_norm": 13.736188888549805,
+      "learning_rate": 0.00018729851581156254,
+      "loss": 5.687,
+      "step": 1892
+    },
+    {
+      "epoch": 0.49335418295543393,
+      "grad_norm": 12.104930877685547,
+      "learning_rate": 0.00018728517170529813,
+      "loss": 5.2789,
+      "step": 1893
+    },
+    {
+      "epoch": 0.49361480323169143,
+      "grad_norm": 12.98553466796875,
+      "learning_rate": 0.00018727181304711848,
+      "loss": 4.5616,
+      "step": 1894
+    },
+    {
+      "epoch": 0.49387542350794894,
+      "grad_norm": 11.326464653015137,
+      "learning_rate": 0.00018725845438893884,
+      "loss": 4.837,
+      "step": 1895
+    },
+    {
+      "epoch": 0.4941360437842064,
+      "grad_norm": 12.19218921661377,
+      "learning_rate": 0.00018724508117884398,
+      "loss": 5.0118,
+      "step": 1896
+    },
+    {
+      "epoch": 0.4943966640604639,
+      "grad_norm": 9.611741065979004,
+      "learning_rate": 0.0001872317079687491,
+      "loss": 5.421,
+      "step": 1897
+    },
+    {
+      "epoch": 0.4946572843367214,
+      "grad_norm": 20.134742736816406,
+      "learning_rate": 0.00018721833475865424,
+      "loss": 4.8459,
+      "step": 1898
+    },
+    {
+      "epoch": 0.4949179046129789,
+      "grad_norm": 18.743206024169922,
+      "learning_rate": 0.00018720494699664414,
+      "loss": 4.4803,
+      "step": 1899
+    },
+    {
+      "epoch": 0.49517852488923636,
+      "grad_norm": 12.631272315979004,
+      "learning_rate": 0.0001871915446827188,
+      "loss": 3.5087,
+      "step": 1900
+    },
+    {
+      "epoch": 0.49517852488923636,
+      "eval_loss": 2.3786075115203857,
+      "eval_runtime": 27.2055,
+      "eval_samples_per_second": 2.867,
+      "eval_steps_per_second": 1.434,
+      "step": 1900
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.397729721788662e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null