Training in progress, step 2000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +711 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a20574280942fbd9b18ef4658c8fb9227d2bcff7e36dd3cc79031ade6e5e8e1
 size 1573038792

 version https://git-lfs.github.com/spec/v1
+oid sha256:0eff2c7a72b1bd7a2154447eca5f146b8ca5e7366b2ee2b1e1b1ba795e79c381
 size 1573038792

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89078c1ca01ec081e982c6df806045ac81a541f9ab29d1a50a914de7ee2ea830
 size 520574714

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b578c2ecf931245e13d8454c6cfafeddde4216fa06f8f438fc30d0e9c43bfeb
 size 520574714

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8c9939287798422f6d4070a9dbb459bd39b466c39d4f02359769c9cee0c74ee
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:28ffaac67b1e5975b97cbb1f1fd2ace18e322aa6d0cb09f44e7757b6245496bc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b7f58c160991363273c99c78edfec7f155cdc9d1541125d363c13bb965de0e1
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:84539daf1c6cd50735af6f3b31ba998dcf05d26804999780a028b860cd6bc73e
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.14958303728357206,
   "eval_steps": 1000,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -715,6 +715,714 @@
       "eval_samples_per_second": 6.534,
       "eval_steps_per_second": 0.847,
       "step": 1000
     }
   ],
   "logging_steps": 10,
@@ -734,7 +1442,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.473942101832745e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2991660745671441,
   "eval_steps": 1000,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.534,
       "eval_steps_per_second": 0.847,
       "step": 1000
+    },
+    {
+      "epoch": 0.15107886765640777,
+      "grad_norm": 0.4198203980922699,
+      "learning_rate": 0.0004995341055758453,
+      "loss": 7.6051,
+      "step": 1010
+    },
+    {
+      "epoch": 0.15257469802924348,
+      "grad_norm": 0.614424467086792,
+      "learning_rate": 0.0004995156620444583,
+      "loss": 7.5955,
+      "step": 1020
+    },
+    {
+      "epoch": 0.15407052840207922,
+      "grad_norm": 0.5094268918037415,
+      "learning_rate": 0.0004994968608535066,
+      "loss": 7.5807,
+      "step": 1030
+    },
+    {
+      "epoch": 0.15556635877491493,
+      "grad_norm": 0.4448883831501007,
+      "learning_rate": 0.0004994777020299404,
+      "loss": 7.6112,
+      "step": 1040
+    },
+    {
+      "epoch": 0.15706218914775064,
+      "grad_norm": 0.4672151803970337,
+      "learning_rate": 0.0004994581856012219,
+      "loss": 7.6151,
+      "step": 1050
+    },
+    {
+      "epoch": 0.15855801952058637,
+      "grad_norm": 0.4254607856273651,
+      "learning_rate": 0.0004994383115953266,
+      "loss": 7.5931,
+      "step": 1060
+    },
+    {
+      "epoch": 0.16005384989342208,
+      "grad_norm": 0.5613588094711304,
+      "learning_rate": 0.000499418080040742,
+      "loss": 7.6144,
+      "step": 1070
+    },
+    {
+      "epoch": 0.1615496802662578,
+      "grad_norm": 0.5766307711601257,
+      "learning_rate": 0.0004993974909664683,
+      "loss": 7.595,
+      "step": 1080
+    },
+    {
+      "epoch": 0.16304551063909353,
+      "grad_norm": 0.44581693410873413,
+      "learning_rate": 0.0004993765444020183,
+      "loss": 7.6035,
+      "step": 1090
+    },
+    {
+      "epoch": 0.16454134101192924,
+      "grad_norm": 0.48525750637054443,
+      "learning_rate": 0.000499355240377417,
+      "loss": 7.622,
+      "step": 1100
+    },
+    {
+      "epoch": 0.16603717138476498,
+      "grad_norm": 0.44798675179481506,
+      "learning_rate": 0.000499333578923202,
+      "loss": 7.5642,
+      "step": 1110
+    },
+    {
+      "epoch": 0.1675330017576007,
+      "grad_norm": 0.45845216512680054,
+      "learning_rate": 0.0004993115600704233,
+      "loss": 7.6037,
+      "step": 1120
+    },
+    {
+      "epoch": 0.1690288321304364,
+      "grad_norm": 0.4126313030719757,
+      "learning_rate": 0.0004992891838506429,
+      "loss": 7.6047,
+      "step": 1130
+    },
+    {
+      "epoch": 0.17052466250327214,
+      "grad_norm": 0.42024609446525574,
+      "learning_rate": 0.0004992664502959351,
+      "loss": 7.576,
+      "step": 1140
+    },
+    {
+      "epoch": 0.17202049287610785,
+      "grad_norm": 0.40088099241256714,
+      "learning_rate": 0.0004992433594388868,
+      "loss": 7.6071,
+      "step": 1150
+    },
+    {
+      "epoch": 0.17351632324894356,
+      "grad_norm": 0.48331788182258606,
+      "learning_rate": 0.0004992199113125964,
+      "loss": 7.6145,
+      "step": 1160
+    },
+    {
+      "epoch": 0.1750121536217793,
+      "grad_norm": 0.40282952785491943,
+      "learning_rate": 0.0004991961059506754,
+      "loss": 7.6324,
+      "step": 1170
+    },
+    {
+      "epoch": 0.176507983994615,
+      "grad_norm": 0.4179229736328125,
+      "learning_rate": 0.0004991719433872461,
+      "loss": 7.5516,
+      "step": 1180
+    },
+    {
+      "epoch": 0.17800381436745072,
+      "grad_norm": 0.41357994079589844,
+      "learning_rate": 0.000499147423656944,
+      "loss": 7.5959,
+      "step": 1190
+    },
+    {
+      "epoch": 0.17949964474028646,
+      "grad_norm": 0.39742010831832886,
+      "learning_rate": 0.0004991225467949157,
+      "loss": 7.5794,
+      "step": 1200
+    },
+    {
+      "epoch": 0.18099547511312217,
+      "grad_norm": 0.415096253156662,
+      "learning_rate": 0.0004990973128368205,
+      "loss": 7.5593,
+      "step": 1210
+    },
+    {
+      "epoch": 0.1824913054859579,
+      "grad_norm": 0.4735938012599945,
+      "learning_rate": 0.0004990717218188286,
+      "loss": 7.6023,
+      "step": 1220
+    },
+    {
+      "epoch": 0.18398713585879362,
+      "grad_norm": 0.4850127696990967,
+      "learning_rate": 0.000499045773777623,
+      "loss": 7.596,
+      "step": 1230
+    },
+    {
+      "epoch": 0.18548296623162933,
+      "grad_norm": 0.3725271224975586,
+      "learning_rate": 0.0004990194687503977,
+      "loss": 7.646,
+      "step": 1240
+    },
+    {
+      "epoch": 0.18697879660446506,
+      "grad_norm": 0.42448315024375916,
+      "learning_rate": 0.000498992806774859,
+      "loss": 7.6134,
+      "step": 1250
+    },
+    {
+      "epoch": 0.18847462697730077,
+      "grad_norm": 0.4265682101249695,
+      "learning_rate": 0.0004989657878892244,
+      "loss": 7.5899,
+      "step": 1260
+    },
+    {
+      "epoch": 0.18997045735013648,
+      "grad_norm": 0.4955170154571533,
+      "learning_rate": 0.0004989384121322232,
+      "loss": 7.6075,
+      "step": 1270
+    },
+    {
+      "epoch": 0.19146628772297222,
+      "grad_norm": 0.37979021668434143,
+      "learning_rate": 0.0004989106795430965,
+      "loss": 7.6068,
+      "step": 1280
+    },
+    {
+      "epoch": 0.19296211809580793,
+      "grad_norm": 0.414298415184021,
+      "learning_rate": 0.0004988825901615962,
+      "loss": 7.6005,
+      "step": 1290
+    },
+    {
+      "epoch": 0.19445794846864364,
+      "grad_norm": 0.44306114315986633,
+      "learning_rate": 0.0004988541440279862,
+      "loss": 7.5973,
+      "step": 1300
+    },
+    {
+      "epoch": 0.19595377884147938,
+      "grad_norm": 0.3580173850059509,
+      "learning_rate": 0.0004988253411830418,
+      "loss": 7.595,
+      "step": 1310
+    },
+    {
+      "epoch": 0.1974496092143151,
+      "grad_norm": 0.4121604859828949,
+      "learning_rate": 0.0004987961816680492,
+      "loss": 7.5682,
+      "step": 1320
+    },
+    {
+      "epoch": 0.19894543958715083,
+      "grad_norm": 0.38618120551109314,
+      "learning_rate": 0.0004987666655248063,
+      "loss": 7.586,
+      "step": 1330
+    },
+    {
+      "epoch": 0.20044126995998654,
+      "grad_norm": 0.38373953104019165,
+      "learning_rate": 0.0004987367927956217,
+      "loss": 7.5604,
+      "step": 1340
+    },
+    {
+      "epoch": 0.20193710033282225,
+      "grad_norm": 0.420476496219635,
+      "learning_rate": 0.0004987065635233158,
+      "loss": 7.6216,
+      "step": 1350
+    },
+    {
+      "epoch": 0.203432930705658,
+      "grad_norm": 0.34908464550971985,
+      "learning_rate": 0.0004986759777512196,
+      "loss": 7.6102,
+      "step": 1360
+    },
+    {
+      "epoch": 0.2049287610784937,
+      "grad_norm": 0.5113145112991333,
+      "learning_rate": 0.0004986450355231748,
+      "loss": 7.6339,
+      "step": 1370
+    },
+    {
+      "epoch": 0.2064245914513294,
+      "grad_norm": 0.3865181803703308,
+      "learning_rate": 0.0004986137368835351,
+      "loss": 7.6221,
+      "step": 1380
+    },
+    {
+      "epoch": 0.20792042182416515,
+      "grad_norm": 0.3864174783229828,
+      "learning_rate": 0.0004985820818771639,
+      "loss": 7.5998,
+      "step": 1390
+    },
+    {
+      "epoch": 0.20941625219700086,
+      "grad_norm": 0.5763099789619446,
+      "learning_rate": 0.0004985500705494364,
+      "loss": 7.5634,
+      "step": 1400
+    },
+    {
+      "epoch": 0.21091208256983657,
+      "grad_norm": 0.3169045150279999,
+      "learning_rate": 0.0004985177029462379,
+      "loss": 7.5592,
+      "step": 1410
+    },
+    {
+      "epoch": 0.2124079129426723,
+      "grad_norm": 0.4025494456291199,
+      "learning_rate": 0.0004984849791139646,
+      "loss": 7.5902,
+      "step": 1420
+    },
+    {
+      "epoch": 0.21390374331550802,
+      "grad_norm": 0.3893519341945648,
+      "learning_rate": 0.0004984518990995234,
+      "loss": 7.5984,
+      "step": 1430
+    },
+    {
+      "epoch": 0.21539957368834375,
+      "grad_norm": 0.3353728950023651,
+      "learning_rate": 0.0004984184629503318,
+      "loss": 7.5662,
+      "step": 1440
+    },
+    {
+      "epoch": 0.21689540406117946,
+      "grad_norm": 0.36072856187820435,
+      "learning_rate": 0.0004983846707143174,
+      "loss": 7.5784,
+      "step": 1450
+    },
+    {
+      "epoch": 0.21839123443401517,
+      "grad_norm": 0.385777086019516,
+      "learning_rate": 0.0004983505224399187,
+      "loss": 7.593,
+      "step": 1460
+    },
+    {
+      "epoch": 0.2198870648068509,
+      "grad_norm": 0.3383258581161499,
+      "learning_rate": 0.0004983160181760845,
+      "loss": 7.5652,
+      "step": 1470
+    },
+    {
+      "epoch": 0.22138289517968662,
+      "grad_norm": 0.409434050321579,
+      "learning_rate": 0.0004982811579722735,
+      "loss": 7.5578,
+      "step": 1480
+    },
+    {
+      "epoch": 0.22287872555252233,
+      "grad_norm": 0.40944933891296387,
+      "learning_rate": 0.0004982459418784549,
+      "loss": 7.5988,
+      "step": 1490
+    },
+    {
+      "epoch": 0.22437455592535807,
+      "grad_norm": 0.3934713900089264,
+      "learning_rate": 0.0004982103699451082,
+      "loss": 7.5879,
+      "step": 1500
+    },
+    {
+      "epoch": 0.22587038629819378,
+      "grad_norm": 0.3455521762371063,
+      "learning_rate": 0.0004981744422232224,
+      "loss": 7.6158,
+      "step": 1510
+    },
+    {
+      "epoch": 0.2273662166710295,
+      "grad_norm": 0.35238131880760193,
+      "learning_rate": 0.000498138158764297,
+      "loss": 7.6229,
+      "step": 1520
+    },
+    {
+      "epoch": 0.22886204704386523,
+      "grad_norm": 0.3862851858139038,
+      "learning_rate": 0.0004981015196203414,
+      "loss": 7.5969,
+      "step": 1530
+    },
+    {
+      "epoch": 0.23035787741670094,
+      "grad_norm": 0.39322030544281006,
+      "learning_rate": 0.0004980645248438745,
+      "loss": 7.5595,
+      "step": 1540
+    },
+    {
+      "epoch": 0.23185370778953668,
+      "grad_norm": 0.4199928045272827,
+      "learning_rate": 0.0004980271744879254,
+      "loss": 7.5719,
+      "step": 1550
+    },
+    {
+      "epoch": 0.2333495381623724,
+      "grad_norm": 0.6090747714042664,
+      "learning_rate": 0.0004979894686060325,
+      "loss": 7.544,
+      "step": 1560
+    },
+    {
+      "epoch": 0.2348453685352081,
+      "grad_norm": 0.3939005136489868,
+      "learning_rate": 0.0004979514072522439,
+      "loss": 7.5816,
+      "step": 1570
+    },
+    {
+      "epoch": 0.23634119890804384,
+      "grad_norm": 0.3856000006198883,
+      "learning_rate": 0.0004979129904811176,
+      "loss": 7.5489,
+      "step": 1580
+    },
+    {
+      "epoch": 0.23783702928087955,
+      "grad_norm": 0.42172351479530334,
+      "learning_rate": 0.0004978742183477206,
+      "loss": 7.5819,
+      "step": 1590
+    },
+    {
+      "epoch": 0.23933285965371526,
+      "grad_norm": 0.3501926064491272,
+      "learning_rate": 0.0004978350909076295,
+      "loss": 7.5846,
+      "step": 1600
+    },
+    {
+      "epoch": 0.240828690026551,
+      "grad_norm": 0.39298102259635925,
+      "learning_rate": 0.0004977956082169303,
+      "loss": 7.5995,
+      "step": 1610
+    },
+    {
+      "epoch": 0.2423245203993867,
+      "grad_norm": 0.430789977312088,
+      "learning_rate": 0.0004977557703322178,
+      "loss": 7.6124,
+      "step": 1620
+    },
+    {
+      "epoch": 0.24382035077222242,
+      "grad_norm": 0.4584248661994934,
+      "learning_rate": 0.0004977155773105965,
+      "loss": 7.5875,
+      "step": 1630
+    },
+    {
+      "epoch": 0.24531618114505815,
+      "grad_norm": 0.39846205711364746,
+      "learning_rate": 0.0004976750292096796,
+      "loss": 7.6027,
+      "step": 1640
+    },
+    {
+      "epoch": 0.24681201151789386,
+      "grad_norm": 0.37316691875457764,
+      "learning_rate": 0.0004976341260875894,
+      "loss": 7.6182,
+      "step": 1650
+    },
+    {
+      "epoch": 0.2483078418907296,
+      "grad_norm": 0.3936406373977661,
+      "learning_rate": 0.000497592868002957,
+      "loss": 7.5557,
+      "step": 1660
+    },
+    {
+      "epoch": 0.2498036722635653,
+      "grad_norm": 0.337677538394928,
+      "learning_rate": 0.0004975512550149224,
+      "loss": 7.5995,
+      "step": 1670
+    },
+    {
+      "epoch": 0.25129950263640105,
+      "grad_norm": 0.31229522824287415,
+      "learning_rate": 0.0004975092871831343,
+      "loss": 7.5816,
+      "step": 1680
+    },
+    {
+      "epoch": 0.25279533300923673,
+      "grad_norm": 0.31100282073020935,
+      "learning_rate": 0.00049746696456775,
+      "loss": 7.5957,
+      "step": 1690
+    },
+    {
+      "epoch": 0.25429116338207247,
+      "grad_norm": 0.3856932520866394,
+      "learning_rate": 0.0004974242872294354,
+      "loss": 7.6212,
+      "step": 1700
+    },
+    {
+      "epoch": 0.2557869937549082,
+      "grad_norm": 0.34433454275131226,
+      "learning_rate": 0.000497381255229365,
+      "loss": 7.5182,
+      "step": 1710
+    },
+    {
+      "epoch": 0.2572828241277439,
+      "grad_norm": 0.3633713722229004,
+      "learning_rate": 0.0004973378686292211,
+      "loss": 7.619,
+      "step": 1720
+    },
+    {
+      "epoch": 0.25877865450057963,
+      "grad_norm": 0.4015657901763916,
+      "learning_rate": 0.0004972941274911952,
+      "loss": 7.6097,
+      "step": 1730
+    },
+    {
+      "epoch": 0.26027448487341537,
+      "grad_norm": 0.36401477456092834,
+      "learning_rate": 0.0004972500318779863,
+      "loss": 7.586,
+      "step": 1740
+    },
+    {
+      "epoch": 0.26177031524625105,
+      "grad_norm": 0.42530539631843567,
+      "learning_rate": 0.0004972055818528017,
+      "loss": 7.5906,
+      "step": 1750
+    },
+    {
+      "epoch": 0.2632661456190868,
+      "grad_norm": 0.5568501353263855,
+      "learning_rate": 0.0004971607774793569,
+      "loss": 7.5886,
+      "step": 1760
+    },
+    {
+      "epoch": 0.2647619759919225,
+      "grad_norm": 0.33592841029167175,
+      "learning_rate": 0.0004971156188218749,
+      "loss": 7.6141,
+      "step": 1770
+    },
+    {
+      "epoch": 0.2662578063647582,
+      "grad_norm": 0.48852601647377014,
+      "learning_rate": 0.0004970701059450872,
+      "loss": 7.5809,
+      "step": 1780
+    },
+    {
+      "epoch": 0.26775363673759395,
+      "grad_norm": 0.33326566219329834,
+      "learning_rate": 0.0004970242389142322,
+      "loss": 7.6085,
+      "step": 1790
+    },
+    {
+      "epoch": 0.2692494671104297,
+      "grad_norm": 0.301401287317276,
+      "learning_rate": 0.0004969780177950568,
+      "loss": 7.583,
+      "step": 1800
+    },
+    {
+      "epoch": 0.2707452974832654,
+      "grad_norm": 0.3466792702674866,
+      "learning_rate": 0.0004969314426538147,
+      "loss": 7.5854,
+      "step": 1810
+    },
+    {
+      "epoch": 0.2722411278561011,
+      "grad_norm": 0.3221440017223358,
+      "learning_rate": 0.0004968845135572677,
+      "loss": 7.5701,
+      "step": 1820
+    },
+    {
+      "epoch": 0.27373695822893684,
+      "grad_norm": 0.35062047839164734,
+      "learning_rate": 0.0004968372305726846,
+      "loss": 7.5589,
+      "step": 1830
+    },
+    {
+      "epoch": 0.2752327886017726,
+      "grad_norm": 0.3333365321159363,
+      "learning_rate": 0.0004967895937678416,
+      "loss": 7.5638,
+      "step": 1840
+    },
+    {
+      "epoch": 0.27672861897460826,
+      "grad_norm": 0.3974228501319885,
+      "learning_rate": 0.0004967416032110219,
+      "loss": 7.5589,
+      "step": 1850
+    },
+    {
+      "epoch": 0.278224449347444,
+      "grad_norm": 0.3333839178085327,
+      "learning_rate": 0.0004966932589710161,
+      "loss": 7.5926,
+      "step": 1860
+    },
+    {
+      "epoch": 0.27972027972027974,
+      "grad_norm": 0.3255492150783539,
+      "learning_rate": 0.0004966445611171212,
+      "loss": 7.595,
+      "step": 1870
+    },
+    {
+      "epoch": 0.2812161100931154,
+      "grad_norm": 0.3025016486644745,
+      "learning_rate": 0.0004965955097191419,
+      "loss": 7.6292,
+      "step": 1880
+    },
+    {
+      "epoch": 0.28271194046595116,
+      "grad_norm": 0.3485715985298157,
+      "learning_rate": 0.0004965461048473889,
+      "loss": 7.575,
+      "step": 1890
+    },
+    {
+      "epoch": 0.2842077708387869,
+      "grad_norm": 0.34214696288108826,
+      "learning_rate": 0.00049649634657268,
+      "loss": 7.5899,
+      "step": 1900
+    },
+    {
+      "epoch": 0.2857036012116226,
+      "grad_norm": 0.2842009663581848,
+      "learning_rate": 0.0004964462349663395,
+      "loss": 7.6015,
+      "step": 1910
+    },
+    {
+      "epoch": 0.2871994315844583,
+      "grad_norm": 0.41511401534080505,
+      "learning_rate": 0.0004963957701001982,
+      "loss": 7.5954,
+      "step": 1920
+    },
+    {
+      "epoch": 0.28869526195729406,
+      "grad_norm": 0.3453192412853241,
+      "learning_rate": 0.000496344952046593,
+      "loss": 7.5533,
+      "step": 1930
+    },
+    {
+      "epoch": 0.29019109233012974,
+      "grad_norm": 0.3092595338821411,
+      "learning_rate": 0.0004962937808783675,
+      "loss": 7.5903,
+      "step": 1940
+    },
+    {
+      "epoch": 0.2916869227029655,
+      "grad_norm": 0.3328978419303894,
+      "learning_rate": 0.0004962422566688711,
+      "loss": 7.6138,
+      "step": 1950
+    },
+    {
+      "epoch": 0.2931827530758012,
+      "grad_norm": 0.34327930212020874,
+      "learning_rate": 0.0004961903794919595,
+      "loss": 7.578,
+      "step": 1960
+    },
+    {
+      "epoch": 0.2946785834486369,
+      "grad_norm": 0.3706710934638977,
+      "learning_rate": 0.0004961381494219941,
+      "loss": 7.5915,
+      "step": 1970
+    },
+    {
+      "epoch": 0.29617441382147264,
+      "grad_norm": 0.33585718274116516,
+      "learning_rate": 0.0004960855665338424,
+      "loss": 7.6123,
+      "step": 1980
+    },
+    {
+      "epoch": 0.2976702441943084,
+      "grad_norm": 0.34515875577926636,
+      "learning_rate": 0.0004960326309028775,
+      "loss": 7.5872,
+      "step": 1990
+    },
+    {
+      "epoch": 0.2991660745671441,
+      "grad_norm": 0.36676159501075745,
+      "learning_rate": 0.000495979342604978,
+      "loss": 7.5539,
+      "step": 2000
+    },
+    {
+      "epoch": 0.2991660745671441,
+      "eval_loss": 7.588481426239014,
+      "eval_runtime": 16.541,
+      "eval_samples_per_second": 6.529,
+      "eval_steps_per_second": 0.846,
+      "step": 2000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.4943906767782748e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null