Training in progress, step 2000, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +715 -6

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02fd35dd30945d2a331fcc4229d682044d2333729fe2d642c02c88a4f8950290
 size 1583480280

 version https://git-lfs.github.com/spec/v1
+oid sha256:232d8b560f57f2f3e0c9ea82b2971aa7233a2f0da7541ae72a69d5be5f7c4c0f
 size 1583480280

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bfd5531726b88f6d30b16379bf8128025316616e6a5fdc46f82548ddef8b2f4
 size 3166958572

 version https://git-lfs.github.com/spec/v1
+oid sha256:7399efa78dd93a8dfe84c65853be63e1808d3a804e7c5d366051c082ddb0a0bf
 size 3166958572

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:172c6e9da1198fecb1104ae5588ac154055d22275bb62749b67f1d60379ff0a7
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:585a7c86a661a1c7d3b02c426dea20960cb9ee4b64c7bdd75f0ac4d7fe0b9d2f
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14ae2a2128444abab378aa06c09a61a84665f758fcc19fc46f5789b0bc1b5665
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:b825a5491bdbff2d6e4a9c3f7df2b4cc6e7db1d9df411de1f4114308ac5fa922
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68aec417c91400a5fbe9c98d7447dabd74ed3b0812272a5f21d640985e919bad
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:a368adbb52e795449b24532caf9095d64cedebc3cc6ea07dd29c30f5c86c5a8b
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "best_global_step": 1000,
-  "best_metric": 1.5370113849639893,
-  "best_model_checkpoint": "hieptt/vietnamese-correction-ft/checkpoint-1000",
-  "epoch": 0.024186136506554445,
   "eval_steps": 1000,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -724,6 +724,715 @@
       "eval_samples_per_second": 71.621,
       "eval_steps_per_second": 0.56,
       "step": 1000
     }
   ],
   "logging_steps": 10,
@@ -752,7 +1461,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.054005594049741e+16,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
+  "best_global_step": 2000,
+  "best_metric": 1.5096291303634644,
+  "best_model_checkpoint": "hieptt/vietnamese-correction-ft/checkpoint-2000",
+  "epoch": 0.04837227301310889,
   "eval_steps": 1000,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 71.621,
       "eval_steps_per_second": 0.56,
       "step": 1000
+    },
+    {
+      "epoch": 0.024427997871619988,
+      "grad_norm": 0.48966652154922485,
+      "learning_rate": 4.999999583799493e-05,
+      "loss": 1.5447,
+      "step": 1010
+    },
+    {
+      "epoch": 0.02466985923668553,
+      "grad_norm": 0.6714717149734497,
+      "learning_rate": 4.999998145081868e-05,
+      "loss": 1.5504,
+      "step": 1020
+    },
+    {
+      "epoch": 0.024911720601751078,
+      "grad_norm": 0.7765893340110779,
+      "learning_rate": 4.999995678709439e-05,
+      "loss": 1.5565,
+      "step": 1030
+    },
+    {
+      "epoch": 0.02515358196681662,
+      "grad_norm": 0.4742671549320221,
+      "learning_rate": 4.999992184683219e-05,
+      "loss": 1.5453,
+      "step": 1040
+    },
+    {
+      "epoch": 0.025395443331882164,
+      "grad_norm": 0.5306811332702637,
+      "learning_rate": 4.999987663004646e-05,
+      "loss": 1.5512,
+      "step": 1050
+    },
+    {
+      "epoch": 0.02563730469694771,
+      "grad_norm": 0.43038302659988403,
+      "learning_rate": 4.9999821136755766e-05,
+      "loss": 1.5513,
+      "step": 1060
+    },
+    {
+      "epoch": 0.025879166062013254,
+      "grad_norm": 0.788059413433075,
+      "learning_rate": 4.9999755366982925e-05,
+      "loss": 1.5326,
+      "step": 1070
+    },
+    {
+      "epoch": 0.026121027427078797,
+      "grad_norm": 0.4768883287906647,
+      "learning_rate": 4.999967932075499e-05,
+      "loss": 1.5526,
+      "step": 1080
+    },
+    {
+      "epoch": 0.026362888792144344,
+      "grad_norm": 0.383400559425354,
+      "learning_rate": 4.99995929981032e-05,
+      "loss": 1.5518,
+      "step": 1090
+    },
+    {
+      "epoch": 0.026604750157209887,
+      "grad_norm": 0.5224942564964294,
+      "learning_rate": 4.999949639906304e-05,
+      "loss": 1.5495,
+      "step": 1100
+    },
+    {
+      "epoch": 0.02684661152227543,
+      "grad_norm": 0.4375554025173187,
+      "learning_rate": 4.999938952367422e-05,
+      "loss": 1.5521,
+      "step": 1110
+    },
+    {
+      "epoch": 0.027088472887340977,
+      "grad_norm": 0.44675424695014954,
+      "learning_rate": 4.999927237198069e-05,
+      "loss": 1.5475,
+      "step": 1120
+    },
+    {
+      "epoch": 0.02733033425240652,
+      "grad_norm": 0.5646783709526062,
+      "learning_rate": 4.999914494403059e-05,
+      "loss": 1.539,
+      "step": 1130
+    },
+    {
+      "epoch": 0.027572195617472064,
+      "grad_norm": 0.5079995393753052,
+      "learning_rate": 4.9999007239876294e-05,
+      "loss": 1.5437,
+      "step": 1140
+    },
+    {
+      "epoch": 0.02781405698253761,
+      "grad_norm": 0.4094880223274231,
+      "learning_rate": 4.999885925957443e-05,
+      "loss": 1.5354,
+      "step": 1150
+    },
+    {
+      "epoch": 0.028055918347603154,
+      "grad_norm": 0.4403417408466339,
+      "learning_rate": 4.99987010031858e-05,
+      "loss": 1.5445,
+      "step": 1160
+    },
+    {
+      "epoch": 0.028297779712668697,
+      "grad_norm": 0.3404127061367035,
+      "learning_rate": 4.9998532470775484e-05,
+      "loss": 1.5321,
+      "step": 1170
+    },
+    {
+      "epoch": 0.028539641077734244,
+      "grad_norm": 0.4042949378490448,
+      "learning_rate": 4.999835366241274e-05,
+      "loss": 1.5442,
+      "step": 1180
+    },
+    {
+      "epoch": 0.028781502442799787,
+      "grad_norm": 0.3902073800563812,
+      "learning_rate": 4.9998164578171076e-05,
+      "loss": 1.5358,
+      "step": 1190
+    },
+    {
+      "epoch": 0.02902336380786533,
+      "grad_norm": 0.4594404399394989,
+      "learning_rate": 4.999796521812822e-05,
+      "loss": 1.5282,
+      "step": 1200
+    },
+    {
+      "epoch": 0.029265225172930877,
+      "grad_norm": 0.4223099648952484,
+      "learning_rate": 4.999775558236611e-05,
+      "loss": 1.5388,
+      "step": 1210
+    },
+    {
+      "epoch": 0.02950708653799642,
+      "grad_norm": 0.6008235812187195,
+      "learning_rate": 4.999753567097094e-05,
+      "loss": 1.5392,
+      "step": 1220
+    },
+    {
+      "epoch": 0.029748947903061963,
+      "grad_norm": 0.5003873705863953,
+      "learning_rate": 4.9997305484033085e-05,
+      "loss": 1.5434,
+      "step": 1230
+    },
+    {
+      "epoch": 0.02999080926812751,
+      "grad_norm": 0.5244422554969788,
+      "learning_rate": 4.999706502164718e-05,
+      "loss": 1.5481,
+      "step": 1240
+    },
+    {
+      "epoch": 0.030232670633193053,
+      "grad_norm": 0.36595821380615234,
+      "learning_rate": 4.999681428391207e-05,
+      "loss": 1.544,
+      "step": 1250
+    },
+    {
+      "epoch": 0.030474531998258596,
+      "grad_norm": 0.5237463116645813,
+      "learning_rate": 4.999655327093081e-05,
+      "loss": 1.5377,
+      "step": 1260
+    },
+    {
+      "epoch": 0.030716393363324143,
+      "grad_norm": 0.4382268190383911,
+      "learning_rate": 4.999628198281072e-05,
+      "loss": 1.5382,
+      "step": 1270
+    },
+    {
+      "epoch": 0.030958254728389686,
+      "grad_norm": 0.5116040706634521,
+      "learning_rate": 4.999600041966328e-05,
+      "loss": 1.5383,
+      "step": 1280
+    },
+    {
+      "epoch": 0.031200116093455233,
+      "grad_norm": 0.3517632782459259,
+      "learning_rate": 4.999570858160426e-05,
+      "loss": 1.5284,
+      "step": 1290
+    },
+    {
+      "epoch": 0.03144197745852077,
+      "grad_norm": 0.46076980233192444,
+      "learning_rate": 4.999540646875361e-05,
+      "loss": 1.5347,
+      "step": 1300
+    },
+    {
+      "epoch": 0.03168383882358632,
+      "grad_norm": 0.6168367266654968,
+      "learning_rate": 4.9995094081235524e-05,
+      "loss": 1.5387,
+      "step": 1310
+    },
+    {
+      "epoch": 0.031925700188651866,
+      "grad_norm": 0.40505921840667725,
+      "learning_rate": 4.9994771419178396e-05,
+      "loss": 1.5375,
+      "step": 1320
+    },
+    {
+      "epoch": 0.03216756155371741,
+      "grad_norm": 0.4371592104434967,
+      "learning_rate": 4.999443848271489e-05,
+      "loss": 1.5363,
+      "step": 1330
+    },
+    {
+      "epoch": 0.03240942291878295,
+      "grad_norm": 0.518997311592102,
+      "learning_rate": 4.9994095271981835e-05,
+      "loss": 1.5434,
+      "step": 1340
+    },
+    {
+      "epoch": 0.032651284283848496,
+      "grad_norm": 0.8396134972572327,
+      "learning_rate": 4.999374178712032e-05,
+      "loss": 1.5324,
+      "step": 1350
+    },
+    {
+      "epoch": 0.03289314564891404,
+      "grad_norm": 0.41988566517829895,
+      "learning_rate": 4.999337802827566e-05,
+      "loss": 1.5314,
+      "step": 1360
+    },
+    {
+      "epoch": 0.03313500701397959,
+      "grad_norm": 0.3672787845134735,
+      "learning_rate": 4.999300399559738e-05,
+      "loss": 1.525,
+      "step": 1370
+    },
+    {
+      "epoch": 0.03337686837904513,
+      "grad_norm": 0.4160480499267578,
+      "learning_rate": 4.999261968923922e-05,
+      "loss": 1.5298,
+      "step": 1380
+    },
+    {
+      "epoch": 0.033618729744110676,
+      "grad_norm": 0.5236791372299194,
+      "learning_rate": 4.999222510935915e-05,
+      "loss": 1.5306,
+      "step": 1390
+    },
+    {
+      "epoch": 0.03386059110917622,
+      "grad_norm": 0.4650459587574005,
+      "learning_rate": 4.9991820256119385e-05,
+      "loss": 1.535,
+      "step": 1400
+    },
+    {
+      "epoch": 0.03410245247424176,
+      "grad_norm": 0.39175882935523987,
+      "learning_rate": 4.999140512968634e-05,
+      "loss": 1.5302,
+      "step": 1410
+    },
+    {
+      "epoch": 0.03434431383930731,
+      "grad_norm": 0.35965096950531006,
+      "learning_rate": 4.999097973023065e-05,
+      "loss": 1.5236,
+      "step": 1420
+    },
+    {
+      "epoch": 0.034586175204372856,
+      "grad_norm": 0.3973771333694458,
+      "learning_rate": 4.999054405792718e-05,
+      "loss": 1.5261,
+      "step": 1430
+    },
+    {
+      "epoch": 0.0348280365694384,
+      "grad_norm": 0.5168911218643188,
+      "learning_rate": 4.999009811295503e-05,
+      "loss": 1.5289,
+      "step": 1440
+    },
+    {
+      "epoch": 0.03506989793450394,
+      "grad_norm": 0.4921228587627411,
+      "learning_rate": 4.998964189549751e-05,
+      "loss": 1.537,
+      "step": 1450
+    },
+    {
+      "epoch": 0.035311759299569485,
+      "grad_norm": 0.559264600276947,
+      "learning_rate": 4.9989175405742135e-05,
+      "loss": 1.5322,
+      "step": 1460
+    },
+    {
+      "epoch": 0.03555362066463503,
+      "grad_norm": 0.5126819014549255,
+      "learning_rate": 4.998869864388068e-05,
+      "loss": 1.5369,
+      "step": 1470
+    },
+    {
+      "epoch": 0.03579548202970058,
+      "grad_norm": 0.4884808361530304,
+      "learning_rate": 4.998821161010912e-05,
+      "loss": 1.5359,
+      "step": 1480
+    },
+    {
+      "epoch": 0.03603734339476612,
+      "grad_norm": 1.4691296815872192,
+      "learning_rate": 4.9987714304627655e-05,
+      "loss": 1.529,
+      "step": 1490
+    },
+    {
+      "epoch": 0.036279204759831665,
+      "grad_norm": 23.75047492980957,
+      "learning_rate": 4.9987206727640703e-05,
+      "loss": 1.9818,
+      "step": 1500
+    },
+    {
+      "epoch": 0.03652106612489721,
+      "grad_norm": 1.1937427520751953,
+      "learning_rate": 4.998668887935691e-05,
+      "loss": 2.3099,
+      "step": 1510
+    },
+    {
+      "epoch": 0.03676292748996275,
+      "grad_norm": 1.2184133529663086,
+      "learning_rate": 4.998616075998916e-05,
+      "loss": 1.9202,
+      "step": 1520
+    },
+    {
+      "epoch": 0.037004788855028295,
+      "grad_norm": 0.720676839351654,
+      "learning_rate": 4.9985622369754525e-05,
+      "loss": 1.8545,
+      "step": 1530
+    },
+    {
+      "epoch": 0.037246650220093845,
+      "grad_norm": 0.4644893705844879,
+      "learning_rate": 4.998507370887433e-05,
+      "loss": 1.6034,
+      "step": 1540
+    },
+    {
+      "epoch": 0.03748851158515939,
+      "grad_norm": 0.6309983134269714,
+      "learning_rate": 4.9984514777574085e-05,
+      "loss": 1.5414,
+      "step": 1550
+    },
+    {
+      "epoch": 0.03773037295022493,
+      "grad_norm": 0.3813267648220062,
+      "learning_rate": 4.998394557608358e-05,
+      "loss": 1.5335,
+      "step": 1560
+    },
+    {
+      "epoch": 0.037972234315290475,
+      "grad_norm": 0.7492319941520691,
+      "learning_rate": 4.998336610463677e-05,
+      "loss": 1.5299,
+      "step": 1570
+    },
+    {
+      "epoch": 0.03821409568035602,
+      "grad_norm": 0.5672308802604675,
+      "learning_rate": 4.998277636347186e-05,
+      "loss": 1.5323,
+      "step": 1580
+    },
+    {
+      "epoch": 0.03845595704542156,
+      "grad_norm": 0.3646668791770935,
+      "learning_rate": 4.998217635283127e-05,
+      "loss": 1.525,
+      "step": 1590
+    },
+    {
+      "epoch": 0.03869781841048711,
+      "grad_norm": 0.46738356351852417,
+      "learning_rate": 4.998156607296163e-05,
+      "loss": 1.5258,
+      "step": 1600
+    },
+    {
+      "epoch": 0.038939679775552655,
+      "grad_norm": 0.413133442401886,
+      "learning_rate": 4.998094552411382e-05,
+      "loss": 1.5317,
+      "step": 1610
+    },
+    {
+      "epoch": 0.0391815411406182,
+      "grad_norm": 0.9869425892829895,
+      "learning_rate": 4.9980314706542916e-05,
+      "loss": 1.5286,
+      "step": 1620
+    },
+    {
+      "epoch": 0.03942340250568374,
+      "grad_norm": 0.44352006912231445,
+      "learning_rate": 4.997967362050824e-05,
+      "loss": 1.518,
+      "step": 1630
+    },
+    {
+      "epoch": 0.039665263870749284,
+      "grad_norm": 0.33023595809936523,
+      "learning_rate": 4.997902226627329e-05,
+      "loss": 1.5239,
+      "step": 1640
+    },
+    {
+      "epoch": 0.03990712523581483,
+      "grad_norm": 0.5091515779495239,
+      "learning_rate": 4.997836064410583e-05,
+      "loss": 1.524,
+      "step": 1650
+    },
+    {
+      "epoch": 0.04014898660088038,
+      "grad_norm": 0.42869803309440613,
+      "learning_rate": 4.997768875427782e-05,
+      "loss": 1.5244,
+      "step": 1660
+    },
+    {
+      "epoch": 0.04039084796594592,
+      "grad_norm": 0.40443161129951477,
+      "learning_rate": 4.997700659706545e-05,
+      "loss": 1.5201,
+      "step": 1670
+    },
+    {
+      "epoch": 0.040632709331011464,
+      "grad_norm": 0.37971532344818115,
+      "learning_rate": 4.997631417274914e-05,
+      "loss": 1.5283,
+      "step": 1680
+    },
+    {
+      "epoch": 0.04087457069607701,
+      "grad_norm": 0.4408821165561676,
+      "learning_rate": 4.997561148161351e-05,
+      "loss": 1.5241,
+      "step": 1690
+    },
+    {
+      "epoch": 0.04111643206114255,
+      "grad_norm": 0.5017372965812683,
+      "learning_rate": 4.997489852394741e-05,
+      "loss": 1.519,
+      "step": 1700
+    },
+    {
+      "epoch": 0.0413582934262081,
+      "grad_norm": 0.3806293308734894,
+      "learning_rate": 4.997417530004391e-05,
+      "loss": 1.5278,
+      "step": 1710
+    },
+    {
+      "epoch": 0.041600154791273644,
+      "grad_norm": 1.141066312789917,
+      "learning_rate": 4.9973441810200306e-05,
+      "loss": 1.5174,
+      "step": 1720
+    },
+    {
+      "epoch": 0.04184201615633919,
+      "grad_norm": 0.3906162977218628,
+      "learning_rate": 4.997269805471809e-05,
+      "loss": 1.519,
+      "step": 1730
+    },
+    {
+      "epoch": 0.04208387752140473,
+      "grad_norm": 0.5911729335784912,
+      "learning_rate": 4.997194403390302e-05,
+      "loss": 1.536,
+      "step": 1740
+    },
+    {
+      "epoch": 0.042325738886470274,
+      "grad_norm": 0.6229117512702942,
+      "learning_rate": 4.9971179748065024e-05,
+      "loss": 1.5263,
+      "step": 1750
+    },
+    {
+      "epoch": 0.04256760025153582,
+      "grad_norm": 0.4941336512565613,
+      "learning_rate": 4.997040519751828e-05,
+      "loss": 1.5202,
+      "step": 1760
+    },
+    {
+      "epoch": 0.04280946161660137,
+      "grad_norm": 0.6714040040969849,
+      "learning_rate": 4.996962038258117e-05,
+      "loss": 1.5184,
+      "step": 1770
+    },
+    {
+      "epoch": 0.04305132298166691,
+      "grad_norm": 0.4575778841972351,
+      "learning_rate": 4.9968825303576314e-05,
+      "loss": 1.5265,
+      "step": 1780
+    },
+    {
+      "epoch": 0.043293184346732454,
+      "grad_norm": 0.3734686076641083,
+      "learning_rate": 4.996801996083052e-05,
+      "loss": 1.5223,
+      "step": 1790
+    },
+    {
+      "epoch": 0.043535045711798,
+      "grad_norm": 0.6092630624771118,
+      "learning_rate": 4.996720435467485e-05,
+      "loss": 1.5184,
+      "step": 1800
+    },
+    {
+      "epoch": 0.04377690707686354,
+      "grad_norm": 0.31611162424087524,
+      "learning_rate": 4.9966378485444567e-05,
+      "loss": 1.5201,
+      "step": 1810
+    },
+    {
+      "epoch": 0.04401876844192908,
+      "grad_norm": 0.4829297661781311,
+      "learning_rate": 4.9965542353479144e-05,
+      "loss": 1.519,
+      "step": 1820
+    },
+    {
+      "epoch": 0.044260629806994634,
+      "grad_norm": 0.4227820634841919,
+      "learning_rate": 4.9964695959122294e-05,
+      "loss": 1.5147,
+      "step": 1830
+    },
+    {
+      "epoch": 0.04450249117206018,
+      "grad_norm": 0.4444202184677124,
+      "learning_rate": 4.9963839302721936e-05,
+      "loss": 1.5241,
+      "step": 1840
+    },
+    {
+      "epoch": 0.04474435253712572,
+      "grad_norm": 0.42105644941329956,
+      "learning_rate": 4.99629723846302e-05,
+      "loss": 1.5248,
+      "step": 1850
+    },
+    {
+      "epoch": 0.04498621390219126,
+      "grad_norm": 0.34201350808143616,
+      "learning_rate": 4.996209520520346e-05,
+      "loss": 1.5097,
+      "step": 1860
+    },
+    {
+      "epoch": 0.045228075267256806,
+      "grad_norm": 0.410153865814209,
+      "learning_rate": 4.9961207764802275e-05,
+      "loss": 1.5191,
+      "step": 1870
+    },
+    {
+      "epoch": 0.04546993663232235,
+      "grad_norm": 0.38393330574035645,
+      "learning_rate": 4.996031006379145e-05,
+      "loss": 1.5119,
+      "step": 1880
+    },
+    {
+      "epoch": 0.0457117979973879,
+      "grad_norm": 0.3539496958255768,
+      "learning_rate": 4.9959402102539986e-05,
+      "loss": 1.5105,
+      "step": 1890
+    },
+    {
+      "epoch": 0.04595365936245344,
+      "grad_norm": 0.8583787679672241,
+      "learning_rate": 4.995848388142112e-05,
+      "loss": 1.5276,
+      "step": 1900
+    },
+    {
+      "epoch": 0.046195520727518986,
+      "grad_norm": 0.3652508854866028,
+      "learning_rate": 4.995755540081229e-05,
+      "loss": 1.5133,
+      "step": 1910
+    },
+    {
+      "epoch": 0.04643738209258453,
+      "grad_norm": 0.7512590885162354,
+      "learning_rate": 4.995661666109518e-05,
+      "loss": 1.5167,
+      "step": 1920
+    },
+    {
+      "epoch": 0.04667924345765007,
+      "grad_norm": 0.4336129128932953,
+      "learning_rate": 4.9955667662655636e-05,
+      "loss": 1.5171,
+      "step": 1930
+    },
+    {
+      "epoch": 0.046921104822715616,
+      "grad_norm": 0.4716378450393677,
+      "learning_rate": 4.995470840588379e-05,
+      "loss": 1.5336,
+      "step": 1940
+    },
+    {
+      "epoch": 0.047162966187781166,
+      "grad_norm": 0.3509134352207184,
+      "learning_rate": 4.995373889117393e-05,
+      "loss": 1.5282,
+      "step": 1950
+    },
+    {
+      "epoch": 0.04740482755284671,
+      "grad_norm": 0.6889932155609131,
+      "learning_rate": 4.99527591189246e-05,
+      "loss": 1.515,
+      "step": 1960
+    },
+    {
+      "epoch": 0.04764668891791225,
+      "grad_norm": 0.37906014919281006,
+      "learning_rate": 4.995176908953854e-05,
+      "loss": 1.5097,
+      "step": 1970
+    },
+    {
+      "epoch": 0.047888550282977796,
+      "grad_norm": 0.4350769519805908,
+      "learning_rate": 4.995076880342271e-05,
+      "loss": 1.5081,
+      "step": 1980
+    },
+    {
+      "epoch": 0.04813041164804334,
+      "grad_norm": 0.33059579133987427,
+      "learning_rate": 4.994975826098831e-05,
+      "loss": 1.5157,
+      "step": 1990
+    },
+    {
+      "epoch": 0.04837227301310889,
+      "grad_norm": 0.4527088701725006,
+      "learning_rate": 4.994873746265073e-05,
+      "loss": 1.5202,
+      "step": 2000
+    },
+    {
+      "epoch": 0.04837227301310889,
+      "eval_loss": 1.5096291303634644,
+      "eval_runtime": 1228.5547,
+      "eval_sacrebleu": 96.66770045228822,
+      "eval_samples_per_second": 82.248,
+      "eval_steps_per_second": 0.643,
+      "step": 2000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.116679707171226e+16,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null