Training in progress, step 52728, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0de6285001bd8047f2168df2de913ecc009dcd5eaeb79d5dc0446d44fa60aff2
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba6684257e283f99e4654fa6c675e02d7147bd1823d28457e21845ce8ee1211c
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0092129d1c1becb33ccbfa9d8abf6fa6c26c43df945b0d8efc4d43888081e423
 size 168150738

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5105fef59c6f148a4647a6d396eee790d7ef1df177c316936d04fa3cb36d6d8
 size 168150738

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:838d077578cf57146d97930a4c665e7072d38b82b8ba980e0abc55d61872f263
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0349d2fe2a8c07a922fcdd7b869bf30ef13bdfa2a6c39cdb10a1e22ecb0cae68
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5d026ee22720d4c5dfda2ca21e8307fef2c9ea235f7efec09ae3c42586de386
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:195e1c218824e864593415afe68e9e8127e01aea101782826593ecf3daaaf683
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.996131087847064,
   "eval_steps": 5853,
-  "global_step": 52677,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -368926,6 +368926,363 @@
       "eval_test_samples_per_second": 12.628,
       "eval_test_steps_per_second": 0.789,
       "step": 52677
     }
   ],
   "logging_steps": 1,
@@ -368940,12 +369297,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.3312906253126074e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.0,
   "eval_steps": 5853,
+  "global_step": 52728,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_test_samples_per_second": 12.628,
       "eval_test_steps_per_second": 0.789,
       "step": 52677
+    },
+    {
+      "epoch": 3.9962069488696708,
+      "grad_norm": 1.1055113077163696,
+      "learning_rate": 3.0008135267917343e-06,
+      "loss": 2.2484,
+      "step": 52678
+    },
+    {
+      "epoch": 3.9962828098922776,
+      "grad_norm": 1.020114541053772,
+      "learning_rate": 3.000781311159022e-06,
+      "loss": 2.2629,
+      "step": 52679
+    },
+    {
+      "epoch": 3.996358670914884,
+      "grad_norm": 0.8700923323631287,
+      "learning_rate": 3.0007497463449255e-06,
+      "loss": 2.1114,
+      "step": 52680
+    },
+    {
+      "epoch": 3.9964345319374903,
+      "grad_norm": 1.25503671169281,
+      "learning_rate": 3.0007188323495756e-06,
+      "loss": 2.1506,
+      "step": 52681
+    },
+    {
+      "epoch": 3.996510392960097,
+      "grad_norm": 1.2695883512496948,
+      "learning_rate": 3.0006885691730872e-06,
+      "loss": 2.226,
+      "step": 52682
+    },
+    {
+      "epoch": 3.996586253982704,
+      "grad_norm": 1.0582082271575928,
+      "learning_rate": 3.0006589568156268e-06,
+      "loss": 2.2531,
+      "step": 52683
+    },
+    {
+      "epoch": 3.9966621150053103,
+      "grad_norm": 0.9807246327400208,
+      "learning_rate": 3.000629995277292e-06,
+      "loss": 2.1907,
+      "step": 52684
+    },
+    {
+      "epoch": 3.9967379760279167,
+      "grad_norm": 0.9967033863067627,
+      "learning_rate": 3.0006016845582158e-06,
+      "loss": 2.0615,
+      "step": 52685
+    },
+    {
+      "epoch": 3.9968138370505235,
+      "grad_norm": 1.1809074878692627,
+      "learning_rate": 3.0005740246585456e-06,
+      "loss": 2.1231,
+      "step": 52686
+    },
+    {
+      "epoch": 3.99688969807313,
+      "grad_norm": 0.9519857168197632,
+      "learning_rate": 3.0005470155783807e-06,
+      "loss": 2.2006,
+      "step": 52687
+    },
+    {
+      "epoch": 3.9969655590957367,
+      "grad_norm": 1.0005104541778564,
+      "learning_rate": 3.000520657317853e-06,
+      "loss": 2.2421,
+      "step": 52688
+    },
+    {
+      "epoch": 3.997041420118343,
+      "grad_norm": 0.9390363097190857,
+      "learning_rate": 3.0004949498770612e-06,
+      "loss": 2.2009,
+      "step": 52689
+    },
+    {
+      "epoch": 3.99711728114095,
+      "grad_norm": 1.043871521949768,
+      "learning_rate": 3.0004698932561214e-06,
+      "loss": 2.0931,
+      "step": 52690
+    },
+    {
+      "epoch": 3.9971931421635563,
+      "grad_norm": 0.9061468839645386,
+      "learning_rate": 3.0004454874551646e-06,
+      "loss": 2.0976,
+      "step": 52691
+    },
+    {
+      "epoch": 3.997269003186163,
+      "grad_norm": 1.4057188034057617,
+      "learning_rate": 3.000421732474274e-06,
+      "loss": 2.3907,
+      "step": 52692
+    },
+    {
+      "epoch": 3.9973448642087694,
+      "grad_norm": 1.1326065063476562,
+      "learning_rate": 3.0003986283135803e-06,
+      "loss": 2.2332,
+      "step": 52693
+    },
+    {
+      "epoch": 3.9974207252313763,
+      "grad_norm": 1.048019528388977,
+      "learning_rate": 3.0003761749731514e-06,
+      "loss": 2.0218,
+      "step": 52694
+    },
+    {
+      "epoch": 3.9974965862539826,
+      "grad_norm": 0.9427198767662048,
+      "learning_rate": 3.0003543724531006e-06,
+      "loss": 2.0542,
+      "step": 52695
+    },
+    {
+      "epoch": 3.9975724472765894,
+      "grad_norm": 1.1140172481536865,
+      "learning_rate": 3.0003332207535282e-06,
+      "loss": 2.2512,
+      "step": 52696
+    },
+    {
+      "epoch": 3.997648308299196,
+      "grad_norm": 1.0869436264038086,
+      "learning_rate": 3.000312719874516e-06,
+      "loss": 2.262,
+      "step": 52697
+    },
+    {
+      "epoch": 3.9977241693218026,
+      "grad_norm": 0.9387947916984558,
+      "learning_rate": 3.000292869816164e-06,
+      "loss": 2.1539,
+      "step": 52698
+    },
+    {
+      "epoch": 3.997800030344409,
+      "grad_norm": 0.8832263946533203,
+      "learning_rate": 3.0002736705785535e-06,
+      "loss": 2.4051,
+      "step": 52699
+    },
+    {
+      "epoch": 3.9978758913670154,
+      "grad_norm": 1.0139085054397583,
+      "learning_rate": 3.0002551221617832e-06,
+      "loss": 1.9968,
+      "step": 52700
+    },
+    {
+      "epoch": 3.997951752389622,
+      "grad_norm": 1.092558741569519,
+      "learning_rate": 3.000237224565903e-06,
+      "loss": 2.2382,
+      "step": 52701
+    },
+    {
+      "epoch": 3.998027613412229,
+      "grad_norm": 1.0027174949645996,
+      "learning_rate": 3.000219977791029e-06,
+      "loss": 2.0828,
+      "step": 52702
+    },
+    {
+      "epoch": 3.9981034744348354,
+      "grad_norm": 0.9388405084609985,
+      "learning_rate": 3.00020338183721e-06,
+      "loss": 2.0502,
+      "step": 52703
+    },
+    {
+      "epoch": 3.9981793354574418,
+      "grad_norm": 0.9701755046844482,
+      "learning_rate": 3.000187436704528e-06,
+      "loss": 2.109,
+      "step": 52704
+    },
+    {
+      "epoch": 3.9982551964800486,
+      "grad_norm": 0.9503781795501709,
+      "learning_rate": 3.0001721423930496e-06,
+      "loss": 2.169,
+      "step": 52705
+    },
+    {
+      "epoch": 3.9983310575026554,
+      "grad_norm": 1.0426063537597656,
+      "learning_rate": 3.000157498902841e-06,
+      "loss": 2.1868,
+      "step": 52706
+    },
+    {
+      "epoch": 3.9984069185252618,
+      "grad_norm": 0.9852115511894226,
+      "learning_rate": 3.000143506233984e-06,
+      "loss": 2.25,
+      "step": 52707
+    },
+    {
+      "epoch": 3.998482779547868,
+      "grad_norm": 0.8936397433280945,
+      "learning_rate": 3.0001301643865117e-06,
+      "loss": 2.0757,
+      "step": 52708
+    },
+    {
+      "epoch": 3.998558640570475,
+      "grad_norm": 0.9924890398979187,
+      "learning_rate": 3.00011747336049e-06,
+      "loss": 2.2185,
+      "step": 52709
+    },
+    {
+      "epoch": 3.9986345015930813,
+      "grad_norm": 1.0129131078720093,
+      "learning_rate": 3.0001054331560014e-06,
+      "loss": 2.1824,
+      "step": 52710
+    },
+    {
+      "epoch": 3.998710362615688,
+      "grad_norm": 0.9240451455116272,
+      "learning_rate": 3.0000940437730624e-06,
+      "loss": 2.0595,
+      "step": 52711
+    },
+    {
+      "epoch": 3.9987862236382945,
+      "grad_norm": 1.037165641784668,
+      "learning_rate": 3.0000833052117394e-06,
+      "loss": 2.1509,
+      "step": 52712
+    },
+    {
+      "epoch": 3.9988620846609013,
+      "grad_norm": 0.9835069179534912,
+      "learning_rate": 3.000073217472098e-06,
+      "loss": 2.0827,
+      "step": 52713
+    },
+    {
+      "epoch": 3.9989379456835077,
+      "grad_norm": 1.0959041118621826,
+      "learning_rate": 3.000063780554138e-06,
+      "loss": 2.1468,
+      "step": 52714
+    },
+    {
+      "epoch": 3.9990138067061145,
+      "grad_norm": 1.2777659893035889,
+      "learning_rate": 3.000054994457942e-06,
+      "loss": 2.3319,
+      "step": 52715
+    },
+    {
+      "epoch": 3.999089667728721,
+      "grad_norm": 0.9920614957809448,
+      "learning_rate": 3.0000468591835265e-06,
+      "loss": 2.1972,
+      "step": 52716
+    },
+    {
+      "epoch": 3.9991655287513277,
+      "grad_norm": 1.0651792287826538,
+      "learning_rate": 3.000039374730924e-06,
+      "loss": 2.0964,
+      "step": 52717
+    },
+    {
+      "epoch": 3.999241389773934,
+      "grad_norm": 1.1940739154815674,
+      "learning_rate": 3.000032541100185e-06,
+      "loss": 1.9985,
+      "step": 52718
+    },
+    {
+      "epoch": 3.999317250796541,
+      "grad_norm": 0.9705497026443481,
+      "learning_rate": 3.0000263582913414e-06,
+      "loss": 2.008,
+      "step": 52719
+    },
+    {
+      "epoch": 3.9993931118191473,
+      "grad_norm": 1.0034539699554443,
+      "learning_rate": 3.000020826304394e-06,
+      "loss": 2.0824,
+      "step": 52720
+    },
+    {
+      "epoch": 3.999468972841754,
+      "grad_norm": 1.047110915184021,
+      "learning_rate": 3.0000159451393913e-06,
+      "loss": 2.0688,
+      "step": 52721
+    },
+    {
+      "epoch": 3.9995448338643604,
+      "grad_norm": 0.9343622922897339,
+      "learning_rate": 3.000011714796335e-06,
+      "loss": 2.2584,
+      "step": 52722
+    },
+    {
+      "epoch": 3.999620694886967,
+      "grad_norm": 1.0422899723052979,
+      "learning_rate": 3.0000081352752726e-06,
+      "loss": 2.1829,
+      "step": 52723
+    },
+    {
+      "epoch": 3.9996965559095736,
+      "grad_norm": 1.077273964881897,
+      "learning_rate": 3.0000052065761888e-06,
+      "loss": 2.0929,
+      "step": 52724
+    },
+    {
+      "epoch": 3.9997724169321804,
+      "grad_norm": 0.9545094966888428,
+      "learning_rate": 3.0000029286991165e-06,
+      "loss": 2.1265,
+      "step": 52725
+    },
+    {
+      "epoch": 3.999848277954787,
+      "grad_norm": 0.9401389956474304,
+      "learning_rate": 3.0000013016440555e-06,
+      "loss": 1.9835,
+      "step": 52726
+    },
+    {
+      "epoch": 3.999924138977393,
+      "grad_norm": 0.9990488290786743,
+      "learning_rate": 3.0000003254110053e-06,
+      "loss": 2.2038,
+      "step": 52727
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.4670413732528687,
+      "learning_rate": 2.9999999999999997e-06,
+      "loss": 1.9777,
+      "step": 52728
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.3325515079398982e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null