Training in progress, step 572, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +1013 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b3c14f2a7cefaed118eec467a328dafde9262b2083b6c3d3b13f56930546ab7
 size 5327744

 version https://git-lfs.github.com/spec/v1
+oid sha256:83da110c38b42c8f26332874e8bb627d6ead7522e50fc51e39079d4f9c35796a
 size 5327744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eef80311cf6487d9649388953f0b17a845c1d2a58891b6fd75a929528a6c0196
 size 2857850

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b3646603ee02c3a1dc6142029b48d761a5d129039bb1a5936f191072a5323f6
 size 2857850

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:269d8e525caae1cce17fdcc8f66b6c9fe5c358eb983e1f28c6bd81602e6038b2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:691d0261780df62255146217a0fa66594f0de41a9521d0f54f49d2368cb05292
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ee1869594529a13e34b89e7bfe7be5bc83ad15c1d5f0963d178cc0ef9c1351e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:63ce17dd2c32e1042039dfe648c482c9ff0032ac68df46007019bf1f153ddc3e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.75032794053345,
   "eval_steps": 143,
-  "global_step": 429,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3042,6 +3042,1015 @@
       "eval_samples_per_second": 110.64,
       "eval_steps_per_second": 55.55,
       "step": 429
     }
   ],
   "logging_steps": 1,
@@ -3056,12 +4065,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1821492098629632.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0008745080891999,
   "eval_steps": 143,
+  "global_step": 572,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 110.64,
       "eval_steps_per_second": 55.55,
       "step": 429
+    },
+    {
+      "epoch": 0.7520769567118496,
+      "grad_norm": 6.074078559875488,
+      "learning_rate": 2.9884712134280324e-05,
+      "loss": 6.6575,
+      "step": 430
+    },
+    {
+      "epoch": 0.7538259728902492,
+      "grad_norm": 6.187041759490967,
+      "learning_rate": 2.948723496423379e-05,
+      "loss": 7.9171,
+      "step": 431
+    },
+    {
+      "epoch": 0.7555749890686488,
+      "grad_norm": 6.312707901000977,
+      "learning_rate": 2.909196119613218e-05,
+      "loss": 8.3438,
+      "step": 432
+    },
+    {
+      "epoch": 0.7573240052470486,
+      "grad_norm": 5.885359287261963,
+      "learning_rate": 2.8698903181597127e-05,
+      "loss": 7.2738,
+      "step": 433
+    },
+    {
+      "epoch": 0.7590730214254482,
+      "grad_norm": 6.485308647155762,
+      "learning_rate": 2.8308073203011663e-05,
+      "loss": 8.3997,
+      "step": 434
+    },
+    {
+      "epoch": 0.7608220376038478,
+      "grad_norm": 5.899681091308594,
+      "learning_rate": 2.7919483473136676e-05,
+      "loss": 6.9346,
+      "step": 435
+    },
+    {
+      "epoch": 0.7625710537822474,
+      "grad_norm": 6.48416805267334,
+      "learning_rate": 2.753314613472906e-05,
+      "loss": 8.0168,
+      "step": 436
+    },
+    {
+      "epoch": 0.7643200699606472,
+      "grad_norm": 6.464148998260498,
+      "learning_rate": 2.7149073260162416e-05,
+      "loss": 7.6221,
+      "step": 437
+    },
+    {
+      "epoch": 0.7660690861390468,
+      "grad_norm": 6.158417224884033,
+      "learning_rate": 2.6767276851049816e-05,
+      "loss": 8.3056,
+      "step": 438
+    },
+    {
+      "epoch": 0.7678181023174464,
+      "grad_norm": 6.386941432952881,
+      "learning_rate": 2.6387768837868597e-05,
+      "loss": 7.7409,
+      "step": 439
+    },
+    {
+      "epoch": 0.7695671184958461,
+      "grad_norm": 5.983461380004883,
+      "learning_rate": 2.6010561079587813e-05,
+      "loss": 8.2641,
+      "step": 440
+    },
+    {
+      "epoch": 0.7713161346742458,
+      "grad_norm": 6.103724479675293,
+      "learning_rate": 2.5635665363297422e-05,
+      "loss": 7.4958,
+      "step": 441
+    },
+    {
+      "epoch": 0.7730651508526454,
+      "grad_norm": 6.250931739807129,
+      "learning_rate": 2.5263093403840142e-05,
+      "loss": 8.1695,
+      "step": 442
+    },
+    {
+      "epoch": 0.774814167031045,
+      "grad_norm": 6.299392223358154,
+      "learning_rate": 2.4892856843445322e-05,
+      "loss": 8.1588,
+      "step": 443
+    },
+    {
+      "epoch": 0.7765631832094447,
+      "grad_norm": 7.157833099365234,
+      "learning_rate": 2.4524967251365026e-05,
+      "loss": 8.4203,
+      "step": 444
+    },
+    {
+      "epoch": 0.7783121993878444,
+      "grad_norm": 6.5352349281311035,
+      "learning_rate": 2.4159436123512735e-05,
+      "loss": 8.4377,
+      "step": 445
+    },
+    {
+      "epoch": 0.780061215566244,
+      "grad_norm": 5.913573265075684,
+      "learning_rate": 2.3796274882103964e-05,
+      "loss": 6.8979,
+      "step": 446
+    },
+    {
+      "epoch": 0.7818102317446436,
+      "grad_norm": 6.062626838684082,
+      "learning_rate": 2.3435494875299314e-05,
+      "loss": 9.2065,
+      "step": 447
+    },
+    {
+      "epoch": 0.7835592479230433,
+      "grad_norm": 6.175731658935547,
+      "learning_rate": 2.3077107376850005e-05,
+      "loss": 7.3399,
+      "step": 448
+    },
+    {
+      "epoch": 0.7853082641014429,
+      "grad_norm": 5.536900997161865,
+      "learning_rate": 2.2721123585745507e-05,
+      "loss": 7.0864,
+      "step": 449
+    },
+    {
+      "epoch": 0.7870572802798426,
+      "grad_norm": 6.569960117340088,
+      "learning_rate": 2.2367554625863497e-05,
+      "loss": 7.0612,
+      "step": 450
+    },
+    {
+      "epoch": 0.7888062964582422,
+      "grad_norm": 6.351140022277832,
+      "learning_rate": 2.2016411545622495e-05,
+      "loss": 7.9918,
+      "step": 451
+    },
+    {
+      "epoch": 0.7905553126366419,
+      "grad_norm": 6.007497787475586,
+      "learning_rate": 2.166770531763633e-05,
+      "loss": 8.2044,
+      "step": 452
+    },
+    {
+      "epoch": 0.7923043288150415,
+      "grad_norm": 6.685032844543457,
+      "learning_rate": 2.132144683837155e-05,
+      "loss": 8.174,
+      "step": 453
+    },
+    {
+      "epoch": 0.7940533449934412,
+      "grad_norm": 5.885077953338623,
+      "learning_rate": 2.0977646927806683e-05,
+      "loss": 7.6945,
+      "step": 454
+    },
+    {
+      "epoch": 0.7958023611718409,
+      "grad_norm": 6.740251064300537,
+      "learning_rate": 2.0636316329094317e-05,
+      "loss": 7.4873,
+      "step": 455
+    },
+    {
+      "epoch": 0.7975513773502405,
+      "grad_norm": 6.132180690765381,
+      "learning_rate": 2.0297465708225238e-05,
+      "loss": 8.0784,
+      "step": 456
+    },
+    {
+      "epoch": 0.7993003935286401,
+      "grad_norm": 5.6954779624938965,
+      "learning_rate": 1.9961105653695266e-05,
+      "loss": 8.4652,
+      "step": 457
+    },
+    {
+      "epoch": 0.8010494097070398,
+      "grad_norm": 6.492379665374756,
+      "learning_rate": 1.962724667617436e-05,
+      "loss": 8.178,
+      "step": 458
+    },
+    {
+      "epoch": 0.8027984258854395,
+      "grad_norm": 6.245090007781982,
+      "learning_rate": 1.929589920817806e-05,
+      "loss": 6.97,
+      "step": 459
+    },
+    {
+      "epoch": 0.8045474420638391,
+      "grad_norm": 5.776219367980957,
+      "learning_rate": 1.896707360374167e-05,
+      "loss": 8.1005,
+      "step": 460
+    },
+    {
+      "epoch": 0.8062964582422387,
+      "grad_norm": 6.456238746643066,
+      "learning_rate": 1.8640780138096513e-05,
+      "loss": 7.4033,
+      "step": 461
+    },
+    {
+      "epoch": 0.8080454744206383,
+      "grad_norm": 6.484393119812012,
+      "learning_rate": 1.8317029007349085e-05,
+      "loss": 8.597,
+      "step": 462
+    },
+    {
+      "epoch": 0.8097944905990381,
+      "grad_norm": 6.799935817718506,
+      "learning_rate": 1.799583032816219e-05,
+      "loss": 6.3503,
+      "step": 463
+    },
+    {
+      "epoch": 0.8115435067774377,
+      "grad_norm": 5.950386047363281,
+      "learning_rate": 1.7677194137439035e-05,
+      "loss": 7.3482,
+      "step": 464
+    },
+    {
+      "epoch": 0.8132925229558373,
+      "grad_norm": 6.03711462020874,
+      "learning_rate": 1.7361130392009407e-05,
+      "loss": 8.2516,
+      "step": 465
+    },
+    {
+      "epoch": 0.8150415391342369,
+      "grad_norm": 6.34808874130249,
+      "learning_rate": 1.7047648968318698e-05,
+      "loss": 8.1899,
+      "step": 466
+    },
+    {
+      "epoch": 0.8167905553126367,
+      "grad_norm": 5.498723030090332,
+      "learning_rate": 1.6736759662119183e-05,
+      "loss": 8.3873,
+      "step": 467
+    },
+    {
+      "epoch": 0.8185395714910363,
+      "grad_norm": 6.472886562347412,
+      "learning_rate": 1.642847218816398e-05,
+      "loss": 8.2182,
+      "step": 468
+    },
+    {
+      "epoch": 0.8202885876694359,
+      "grad_norm": 7.618271827697754,
+      "learning_rate": 1.6122796179903354e-05,
+      "loss": 8.3238,
+      "step": 469
+    },
+    {
+      "epoch": 0.8220376038478356,
+      "grad_norm": 6.079611778259277,
+      "learning_rate": 1.58197411891839e-05,
+      "loss": 7.5876,
+      "step": 470
+    },
+    {
+      "epoch": 0.8237866200262353,
+      "grad_norm": 6.017045497894287,
+      "learning_rate": 1.5519316685949903e-05,
+      "loss": 7.4218,
+      "step": 471
+    },
+    {
+      "epoch": 0.8255356362046349,
+      "grad_norm": 5.659122943878174,
+      "learning_rate": 1.5221532057947419e-05,
+      "loss": 8.3443,
+      "step": 472
+    },
+    {
+      "epoch": 0.8272846523830345,
+      "grad_norm": 7.566473484039307,
+      "learning_rate": 1.4926396610431059e-05,
+      "loss": 7.7792,
+      "step": 473
+    },
+    {
+      "epoch": 0.8290336685614342,
+      "grad_norm": 6.2454729080200195,
+      "learning_rate": 1.4633919565873033e-05,
+      "loss": 8.3766,
+      "step": 474
+    },
+    {
+      "epoch": 0.8307826847398339,
+      "grad_norm": 6.340610980987549,
+      "learning_rate": 1.4344110063675142e-05,
+      "loss": 7.4127,
+      "step": 475
+    },
+    {
+      "epoch": 0.8325317009182335,
+      "grad_norm": 5.981843948364258,
+      "learning_rate": 1.4056977159883012e-05,
+      "loss": 7.7706,
+      "step": 476
+    },
+    {
+      "epoch": 0.8342807170966331,
+      "grad_norm": 6.074410438537598,
+      "learning_rate": 1.3772529826903269e-05,
+      "loss": 7.0402,
+      "step": 477
+    },
+    {
+      "epoch": 0.8360297332750328,
+      "grad_norm": 6.144327163696289,
+      "learning_rate": 1.3490776953223105e-05,
+      "loss": 7.6445,
+      "step": 478
+    },
+    {
+      "epoch": 0.8377787494534324,
+      "grad_norm": 5.650998115539551,
+      "learning_rate": 1.321172734313244e-05,
+      "loss": 8.1351,
+      "step": 479
+    },
+    {
+      "epoch": 0.8395277656318321,
+      "grad_norm": 6.38914680480957,
+      "learning_rate": 1.2935389716448976e-05,
+      "loss": 8.9728,
+      "step": 480
+    },
+    {
+      "epoch": 0.8412767818102318,
+      "grad_norm": 6.161402225494385,
+      "learning_rate": 1.2661772708245535e-05,
+      "loss": 8.9174,
+      "step": 481
+    },
+    {
+      "epoch": 0.8430257979886314,
+      "grad_norm": 5.9002685546875,
+      "learning_rate": 1.23908848685804e-05,
+      "loss": 6.2995,
+      "step": 482
+    },
+    {
+      "epoch": 0.844774814167031,
+      "grad_norm": 5.791549205780029,
+      "learning_rate": 1.2122734662229984e-05,
+      "loss": 6.7385,
+      "step": 483
+    },
+    {
+      "epoch": 0.8465238303454307,
+      "grad_norm": 7.234724521636963,
+      "learning_rate": 1.1857330468424466e-05,
+      "loss": 6.838,
+      "step": 484
+    },
+    {
+      "epoch": 0.8482728465238304,
+      "grad_norm": 5.775229454040527,
+      "learning_rate": 1.1594680580585814e-05,
+      "loss": 8.2034,
+      "step": 485
+    },
+    {
+      "epoch": 0.85002186270223,
+      "grad_norm": 6.084395885467529,
+      "learning_rate": 1.133479320606874e-05,
+      "loss": 8.3378,
+      "step": 486
+    },
+    {
+      "epoch": 0.8517708788806296,
+      "grad_norm": 6.647040843963623,
+      "learning_rate": 1.1077676465904208e-05,
+      "loss": 8.3666,
+      "step": 487
+    },
+    {
+      "epoch": 0.8535198950590293,
+      "grad_norm": 6.034310340881348,
+      "learning_rate": 1.082333839454559e-05,
+      "loss": 8.4993,
+      "step": 488
+    },
+    {
+      "epoch": 0.855268911237429,
+      "grad_norm": 6.046447277069092,
+      "learning_rate": 1.0571786939617712e-05,
+      "loss": 6.8808,
+      "step": 489
+    },
+    {
+      "epoch": 0.8570179274158286,
+      "grad_norm": 7.073398590087891,
+      "learning_rate": 1.0323029961668462e-05,
+      "loss": 8.4804,
+      "step": 490
+    },
+    {
+      "epoch": 0.8587669435942282,
+      "grad_norm": 7.022246837615967,
+      "learning_rate": 1.0077075233923116e-05,
+      "loss": 8.4708,
+      "step": 491
+    },
+    {
+      "epoch": 0.8605159597726278,
+      "grad_norm": 6.369495868682861,
+      "learning_rate": 9.833930442041506e-06,
+      "loss": 8.9675,
+      "step": 492
+    },
+    {
+      "epoch": 0.8622649759510276,
+      "grad_norm": 7.497471809387207,
+      "learning_rate": 9.593603183877841e-06,
+      "loss": 9.2559,
+      "step": 493
+    },
+    {
+      "epoch": 0.8640139921294272,
+      "grad_norm": 6.291429042816162,
+      "learning_rate": 9.35610096924323e-06,
+      "loss": 8.7969,
+      "step": 494
+    },
+    {
+      "epoch": 0.8657630083078268,
+      "grad_norm": 5.909082412719727,
+      "learning_rate": 9.121431219671095e-06,
+      "loss": 9.1292,
+      "step": 495
+    },
+    {
+      "epoch": 0.8675120244862266,
+      "grad_norm": 6.053621292114258,
+      "learning_rate": 8.889601268185232e-06,
+      "loss": 8.1841,
+      "step": 496
+    },
+    {
+      "epoch": 0.8692610406646262,
+      "grad_norm": 7.89780855178833,
+      "learning_rate": 8.660618359070604e-06,
+      "loss": 8.2031,
+      "step": 497
+    },
+    {
+      "epoch": 0.8710100568430258,
+      "grad_norm": 5.878389835357666,
+      "learning_rate": 8.434489647647092e-06,
+      "loss": 7.5146,
+      "step": 498
+    },
+    {
+      "epoch": 0.8727590730214254,
+      "grad_norm": 6.576045513153076,
+      "learning_rate": 8.211222200045788e-06,
+      "loss": 6.9011,
+      "step": 499
+    },
+    {
+      "epoch": 0.8745080891998251,
+      "grad_norm": 6.503655433654785,
+      "learning_rate": 7.990822992988267e-06,
+      "loss": 7.2335,
+      "step": 500
+    },
+    {
+      "epoch": 0.8762571053782248,
+      "grad_norm": 6.5767998695373535,
+      "learning_rate": 7.773298913568505e-06,
+      "loss": 8.625,
+      "step": 501
+    },
+    {
+      "epoch": 0.8780061215566244,
+      "grad_norm": 6.215456485748291,
+      "learning_rate": 7.558656759037797e-06,
+      "loss": 6.6789,
+      "step": 502
+    },
+    {
+      "epoch": 0.879755137735024,
+      "grad_norm": 6.190433502197266,
+      "learning_rate": 7.346903236592162e-06,
+      "loss": 7.0194,
+      "step": 503
+    },
+    {
+      "epoch": 0.8815041539134237,
+      "grad_norm": 6.697807788848877,
+      "learning_rate": 7.13804496316296e-06,
+      "loss": 7.5084,
+      "step": 504
+    },
+    {
+      "epoch": 0.8832531700918234,
+      "grad_norm": 6.033225059509277,
+      "learning_rate": 6.9320884652099406e-06,
+      "loss": 7.5227,
+      "step": 505
+    },
+    {
+      "epoch": 0.885002186270223,
+      "grad_norm": 6.665908336639404,
+      "learning_rate": 6.729040178517454e-06,
+      "loss": 7.7323,
+      "step": 506
+    },
+    {
+      "epoch": 0.8867512024486226,
+      "grad_norm": 6.476802825927734,
+      "learning_rate": 6.528906447993288e-06,
+      "loss": 7.7431,
+      "step": 507
+    },
+    {
+      "epoch": 0.8885002186270223,
+      "grad_norm": 5.98831844329834,
+      "learning_rate": 6.331693527470306e-06,
+      "loss": 7.3852,
+      "step": 508
+    },
+    {
+      "epoch": 0.890249234805422,
+      "grad_norm": 6.885638236999512,
+      "learning_rate": 6.137407579511212e-06,
+      "loss": 7.667,
+      "step": 509
+    },
+    {
+      "epoch": 0.8919982509838216,
+      "grad_norm": 7.12498140335083,
+      "learning_rate": 5.946054675215784e-06,
+      "loss": 8.1353,
+      "step": 510
+    },
+    {
+      "epoch": 0.8937472671622213,
+      "grad_norm": 6.516235828399658,
+      "learning_rate": 5.757640794031361e-06,
+      "loss": 7.4876,
+      "step": 511
+    },
+    {
+      "epoch": 0.8954962833406209,
+      "grad_norm": 6.076444149017334,
+      "learning_rate": 5.572171823565797e-06,
+      "loss": 9.0379,
+      "step": 512
+    },
+    {
+      "epoch": 0.8972452995190205,
+      "grad_norm": 5.914377689361572,
+      "learning_rate": 5.389653559403629e-06,
+      "loss": 8.1439,
+      "step": 513
+    },
+    {
+      "epoch": 0.8989943156974202,
+      "grad_norm": 6.191368579864502,
+      "learning_rate": 5.210091704924946e-06,
+      "loss": 8.6825,
+      "step": 514
+    },
+    {
+      "epoch": 0.9007433318758199,
+      "grad_norm": 6.621984958648682,
+      "learning_rate": 5.033491871127105e-06,
+      "loss": 8.3616,
+      "step": 515
+    },
+    {
+      "epoch": 0.9024923480542195,
+      "grad_norm": 6.113316059112549,
+      "learning_rate": 4.859859576449444e-06,
+      "loss": 7.6225,
+      "step": 516
+    },
+    {
+      "epoch": 0.9042413642326191,
+      "grad_norm": 6.2419915199279785,
+      "learning_rate": 4.689200246600867e-06,
+      "loss": 7.8226,
+      "step": 517
+    },
+    {
+      "epoch": 0.9059903804110188,
+      "grad_norm": 6.267433166503906,
+      "learning_rate": 4.521519214390257e-06,
+      "loss": 7.5588,
+      "step": 518
+    },
+    {
+      "epoch": 0.9077393965894185,
+      "grad_norm": 7.470646381378174,
+      "learning_rate": 4.356821719559812e-06,
+      "loss": 7.5397,
+      "step": 519
+    },
+    {
+      "epoch": 0.9094884127678181,
+      "grad_norm": 5.909048080444336,
+      "learning_rate": 4.195112908621402e-06,
+      "loss": 8.8447,
+      "step": 520
+    },
+    {
+      "epoch": 0.9112374289462177,
+      "grad_norm": 6.860408782958984,
+      "learning_rate": 4.03639783469566e-06,
+      "loss": 8.8253,
+      "step": 521
+    },
+    {
+      "epoch": 0.9129864451246174,
+      "grad_norm": 7.22110652923584,
+      "learning_rate": 3.880681457354118e-06,
+      "loss": 7.9479,
+      "step": 522
+    },
+    {
+      "epoch": 0.9147354613030171,
+      "grad_norm": 5.659202575683594,
+      "learning_rate": 3.727968642464241e-06,
+      "loss": 7.4659,
+      "step": 523
+    },
+    {
+      "epoch": 0.9164844774814167,
+      "grad_norm": 7.78839111328125,
+      "learning_rate": 3.578264162037348e-06,
+      "loss": 7.8924,
+      "step": 524
+    },
+    {
+      "epoch": 0.9182334936598163,
+      "grad_norm": 6.77100133895874,
+      "learning_rate": 3.4315726940795433e-06,
+      "loss": 8.6822,
+      "step": 525
+    },
+    {
+      "epoch": 0.9199825098382161,
+      "grad_norm": 7.059048175811768,
+      "learning_rate": 3.2878988224454344e-06,
+      "loss": 8.3176,
+      "step": 526
+    },
+    {
+      "epoch": 0.9217315260166157,
+      "grad_norm": 5.931784152984619,
+      "learning_rate": 3.1472470366950334e-06,
+      "loss": 7.5494,
+      "step": 527
+    },
+    {
+      "epoch": 0.9234805421950153,
+      "grad_norm": 6.578647613525391,
+      "learning_rate": 3.0096217319533382e-06,
+      "loss": 8.5582,
+      "step": 528
+    },
+    {
+      "epoch": 0.9252295583734149,
+      "grad_norm": 5.951269626617432,
+      "learning_rate": 2.875027208773118e-06,
+      "loss": 7.3472,
+      "step": 529
+    },
+    {
+      "epoch": 0.9269785745518146,
+      "grad_norm": 6.003902912139893,
+      "learning_rate": 2.7434676730003884e-06,
+      "loss": 8.754,
+      "step": 530
+    },
+    {
+      "epoch": 0.9287275907302143,
+      "grad_norm": 6.373345851898193,
+      "learning_rate": 2.614947235643106e-06,
+      "loss": 7.5758,
+      "step": 531
+    },
+    {
+      "epoch": 0.9304766069086139,
+      "grad_norm": 6.780086517333984,
+      "learning_rate": 2.4894699127426367e-06,
+      "loss": 9.3402,
+      "step": 532
+    },
+    {
+      "epoch": 0.9322256230870135,
+      "grad_norm": 6.614811897277832,
+      "learning_rate": 2.367039625248302e-06,
+      "loss": 7.8778,
+      "step": 533
+    },
+    {
+      "epoch": 0.9339746392654132,
+      "grad_norm": 6.098567008972168,
+      "learning_rate": 2.2476601988947966e-06,
+      "loss": 7.928,
+      "step": 534
+    },
+    {
+      "epoch": 0.9357236554438129,
+      "grad_norm": 5.995659828186035,
+      "learning_rate": 2.1313353640827206e-06,
+      "loss": 9.0382,
+      "step": 535
+    },
+    {
+      "epoch": 0.9374726716222125,
+      "grad_norm": 7.359274387359619,
+      "learning_rate": 2.0180687557619816e-06,
+      "loss": 8.395,
+      "step": 536
+    },
+    {
+      "epoch": 0.9392216878006121,
+      "grad_norm": 6.481335639953613,
+      "learning_rate": 1.907863913318153e-06,
+      "loss": 8.3956,
+      "step": 537
+    },
+    {
+      "epoch": 0.9409707039790118,
+      "grad_norm": 5.920598983764648,
+      "learning_rate": 1.8007242804619628e-06,
+      "loss": 7.8273,
+      "step": 538
+    },
+    {
+      "epoch": 0.9427197201574115,
+      "grad_norm": 6.889771461486816,
+      "learning_rate": 1.696653205121612e-06,
+      "loss": 7.9301,
+      "step": 539
+    },
+    {
+      "epoch": 0.9444687363358111,
+      "grad_norm": 7.978548049926758,
+      "learning_rate": 1.595653939338204e-06,
+      "loss": 8.1209,
+      "step": 540
+    },
+    {
+      "epoch": 0.9462177525142108,
+      "grad_norm": 6.091990947723389,
+      "learning_rate": 1.4977296391641026e-06,
+      "loss": 7.4813,
+      "step": 541
+    },
+    {
+      "epoch": 0.9479667686926104,
+      "grad_norm": 5.8564372062683105,
+      "learning_rate": 1.4028833645643113e-06,
+      "loss": 8.299,
+      "step": 542
+    },
+    {
+      "epoch": 0.94971578487101,
+      "grad_norm": 6.623847484588623,
+      "learning_rate": 1.31111807932085e-06,
+      "loss": 7.3619,
+      "step": 543
+    },
+    {
+      "epoch": 0.9514648010494097,
+      "grad_norm": 6.130828380584717,
+      "learning_rate": 1.222436650940173e-06,
+      "loss": 8.2312,
+      "step": 544
+    },
+    {
+      "epoch": 0.9532138172278094,
+      "grad_norm": 5.993081569671631,
+      "learning_rate": 1.1368418505635302e-06,
+      "loss": 9.4749,
+      "step": 545
+    },
+    {
+      "epoch": 0.954962833406209,
+      "grad_norm": 6.714028835296631,
+      "learning_rate": 1.0543363528803696e-06,
+      "loss": 8.6106,
+      "step": 546
+    },
+    {
+      "epoch": 0.9567118495846086,
+      "grad_norm": 6.759970664978027,
+      "learning_rate": 9.749227360448143e-07,
+      "loss": 8.9938,
+      "step": 547
+    },
+    {
+      "epoch": 0.9584608657630083,
+      "grad_norm": 6.770102500915527,
+      "learning_rate": 8.986034815950172e-07,
+      "loss": 9.3766,
+      "step": 548
+    },
+    {
+      "epoch": 0.960209881941408,
+      "grad_norm": 5.988943576812744,
+      "learning_rate": 8.253809743756668e-07,
+      "loss": 8.0326,
+      "step": 549
+    },
+    {
+      "epoch": 0.9619588981198076,
+      "grad_norm": 6.561304092407227,
+      "learning_rate": 7.552575024634689e-07,
+      "loss": 7.3145,
+      "step": 550
+    },
+    {
+      "epoch": 0.9637079142982072,
+      "grad_norm": 6.857665061950684,
+      "learning_rate": 6.882352570956485e-07,
+      "loss": 8.4195,
+      "step": 551
+    },
+    {
+      "epoch": 0.9654569304766069,
+      "grad_norm": 7.414341926574707,
+      "learning_rate": 6.243163326014267e-07,
+      "loss": 7.5319,
+      "step": 552
+    },
+    {
+      "epoch": 0.9672059466550066,
+      "grad_norm": 7.155992031097412,
+      "learning_rate": 5.635027263366399e-07,
+      "loss": 7.9228,
+      "step": 553
+    },
+    {
+      "epoch": 0.9689549628334062,
+      "grad_norm": 5.8317551612854,
+      "learning_rate": 5.057963386213116e-07,
+      "loss": 8.3518,
+      "step": 554
+    },
+    {
+      "epoch": 0.9707039790118058,
+      "grad_norm": 6.625657081604004,
+      "learning_rate": 4.5119897268023347e-07,
+      "loss": 8.2842,
+      "step": 555
+    },
+    {
+      "epoch": 0.9724529951902056,
+      "grad_norm": 6.393413543701172,
+      "learning_rate": 3.9971233458665493e-07,
+      "loss": 8.9058,
+      "step": 556
+    },
+    {
+      "epoch": 0.9742020113686052,
+      "grad_norm": 6.962483882904053,
+      "learning_rate": 3.5133803320896994e-07,
+      "loss": 7.8907,
+      "step": 557
+    },
+    {
+      "epoch": 0.9759510275470048,
+      "grad_norm": 6.612078666687012,
+      "learning_rate": 3.060775801604354e-07,
+      "loss": 8.0385,
+      "step": 558
+    },
+    {
+      "epoch": 0.9777000437254044,
+      "grad_norm": 5.987767696380615,
+      "learning_rate": 2.639323897518975e-07,
+      "loss": 8.6059,
+      "step": 559
+    },
+    {
+      "epoch": 0.9794490599038042,
+      "grad_norm": 5.532016277313232,
+      "learning_rate": 2.2490377894768267e-07,
+      "loss": 7.1836,
+      "step": 560
+    },
+    {
+      "epoch": 0.9811980760822038,
+      "grad_norm": 6.169713020324707,
+      "learning_rate": 1.889929673243529e-07,
+      "loss": 7.9154,
+      "step": 561
+    },
+    {
+      "epoch": 0.9829470922606034,
+      "grad_norm": 6.610177040100098,
+      "learning_rate": 1.562010770326916e-07,
+      "loss": 7.9639,
+      "step": 562
+    },
+    {
+      "epoch": 0.984696108439003,
+      "grad_norm": 5.848151683807373,
+      "learning_rate": 1.2652913276250955e-07,
+      "loss": 7.69,
+      "step": 563
+    },
+    {
+      "epoch": 0.9864451246174027,
+      "grad_norm": 6.159895420074463,
+      "learning_rate": 9.99780617107815e-08,
+      "loss": 7.9916,
+      "step": 564
+    },
+    {
+      "epoch": 0.9881941407958024,
+      "grad_norm": 6.261981964111328,
+      "learning_rate": 7.654869355252504e-08,
+      "loss": 8.4931,
+      "step": 565
+    },
+    {
+      "epoch": 0.989943156974202,
+      "grad_norm": 6.078785419464111,
+      "learning_rate": 5.6241760414987856e-08,
+      "loss": 9.2733,
+      "step": 566
+    },
+    {
+      "epoch": 0.9916921731526016,
+      "grad_norm": 6.701304912567139,
+      "learning_rate": 3.905789685471062e-08,
+      "loss": 8.1601,
+      "step": 567
+    },
+    {
+      "epoch": 0.9934411893310013,
+      "grad_norm": 6.677995681762695,
+      "learning_rate": 2.4997639837687213e-08,
+      "loss": 7.0004,
+      "step": 568
+    },
+    {
+      "epoch": 0.995190205509401,
+      "grad_norm": 6.922513484954834,
+      "learning_rate": 1.4061428722633718e-08,
+      "loss": 8.7729,
+      "step": 569
+    },
+    {
+      "epoch": 0.9969392216878006,
+      "grad_norm": 6.470855236053467,
+      "learning_rate": 6.2496052472549304e-09,
+      "loss": 8.6932,
+      "step": 570
+    },
+    {
+      "epoch": 0.9986882378662003,
+      "grad_norm": 6.063665390014648,
+      "learning_rate": 1.5624135174974186e-09,
+      "loss": 8.2856,
+      "step": 571
+    },
+    {
+      "epoch": 1.0008745080891999,
+      "grad_norm": 6.530557155609131,
+      "learning_rate": 0.0,
+      "loss": 8.5752,
+      "step": 572
+    },
+    {
+      "epoch": 1.0008745080891999,
+      "eval_loss": 1.9150428771972656,
+      "eval_runtime": 2.1677,
+      "eval_samples_per_second": 111.18,
+      "eval_steps_per_second": 55.821,
+      "step": 572
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2428656131506176.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null