Training in progress, step 5760, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +424 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8c4999d1d781390bb82ba66c422d6f2218afa2b655c8715f5e1b3b2dbee6e38
 size 389074464

 version https://git-lfs.github.com/spec/v1
+oid sha256:9235f4607cf8e84409de4ce4b2847c74a696066b57d3cdb69cbc2e01b28806d0
 size 389074464

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16a21d4f04d7ead68e40367b0c1693ce79f1b8164bbeb69b1d457b3bc7519725
 size 198011700

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5ff9daa50a0a5c480018483e7ad671a0dce75147275c76f66a36458c77efbee
 size 198011700

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b822d37b79d6999b750099b6fba79edc44b929584c0eb8e06dd9d96747579de
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4570cde6877f7ecdff7adaa553bffdfd408783d3b318129cb1d03c18f4128e5c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:230f75bba14745c5a2f9522a21df2a8ff99459f0af9d260483162250deae40f5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b35e714094c6b7ca02b4593d2c4332f2835d3b2ebcdf357e02eb76c44a693fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.9804788827896118,
   "best_model_checkpoint": "miner_id_24/checkpoint-5700",
-  "epoch": 0.07667731629392971,
   "eval_steps": 150,
-  "global_step": 5700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -40219,6 +40219,426 @@
       "eval_samples_per_second": 11.75,
       "eval_steps_per_second": 5.875,
       "step": 5700
     }
   ],
   "logging_steps": 1,
@@ -40242,12 +40662,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.6339320226971648e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.9804788827896118,
   "best_model_checkpoint": "miner_id_24/checkpoint-5700",
+  "epoch": 0.07748444593912897,
   "eval_steps": 150,
+  "global_step": 5760,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.75,
       "eval_steps_per_second": 5.875,
       "step": 5700
+    },
+    {
+      "epoch": 0.07669076845468303,
+      "grad_norm": 1.586574673652649,
+      "learning_rate": 5.195178525271294e-08,
+      "loss": 1.0676,
+      "step": 5701
+    },
+    {
+      "epoch": 0.07670422061543636,
+      "grad_norm": 1.4937348365783691,
+      "learning_rate": 5.0205778298384464e-08,
+      "loss": 0.9485,
+      "step": 5702
+    },
+    {
+      "epoch": 0.07671767277618967,
+      "grad_norm": 1.4224642515182495,
+      "learning_rate": 4.848960771166588e-08,
+      "loss": 0.9619,
+      "step": 5703
+    },
+    {
+      "epoch": 0.076731124936943,
+      "grad_norm": 1.5152407884597778,
+      "learning_rate": 4.680327400486961e-08,
+      "loss": 1.1684,
+      "step": 5704
+    },
+    {
+      "epoch": 0.07674457709769632,
+      "grad_norm": 1.5055732727050781,
+      "learning_rate": 4.514677768138187e-08,
+      "loss": 1.0995,
+      "step": 5705
+    },
+    {
+      "epoch": 0.07675802925844964,
+      "grad_norm": 1.5480650663375854,
+      "learning_rate": 4.352011923569599e-08,
+      "loss": 1.0017,
+      "step": 5706
+    },
+    {
+      "epoch": 0.07677148141920295,
+      "grad_norm": 1.6415358781814575,
+      "learning_rate": 4.192329915339022e-08,
+      "loss": 0.8832,
+      "step": 5707
+    },
+    {
+      "epoch": 0.07678493357995628,
+      "grad_norm": 1.4596832990646362,
+      "learning_rate": 4.035631791113881e-08,
+      "loss": 1.0574,
+      "step": 5708
+    },
+    {
+      "epoch": 0.0767983857407096,
+      "grad_norm": 1.5040479898452759,
+      "learning_rate": 3.881917597670093e-08,
+      "loss": 0.9738,
+      "step": 5709
+    },
+    {
+      "epoch": 0.07681183790146293,
+      "grad_norm": 1.401673674583435,
+      "learning_rate": 3.731187380893175e-08,
+      "loss": 0.8504,
+      "step": 5710
+    },
+    {
+      "epoch": 0.07682529006221624,
+      "grad_norm": 1.6924898624420166,
+      "learning_rate": 3.583441185779357e-08,
+      "loss": 1.1012,
+      "step": 5711
+    },
+    {
+      "epoch": 0.07683874222296956,
+      "grad_norm": 1.5380204916000366,
+      "learning_rate": 3.438679056432248e-08,
+      "loss": 0.9772,
+      "step": 5712
+    },
+    {
+      "epoch": 0.07685219438372289,
+      "grad_norm": 1.3671621084213257,
+      "learning_rate": 3.296901036065059e-08,
+      "loss": 0.7888,
+      "step": 5713
+    },
+    {
+      "epoch": 0.07686564654447621,
+      "grad_norm": 1.4451839923858643,
+      "learning_rate": 3.1581071670006015e-08,
+      "loss": 0.8835,
+      "step": 5714
+    },
+    {
+      "epoch": 0.07687909870522953,
+      "grad_norm": 1.5461901426315308,
+      "learning_rate": 3.022297490670178e-08,
+      "loss": 1.161,
+      "step": 5715
+    },
+    {
+      "epoch": 0.07689255086598284,
+      "grad_norm": 1.5095206499099731,
+      "learning_rate": 2.8894720476158044e-08,
+      "loss": 1.0474,
+      "step": 5716
+    },
+    {
+      "epoch": 0.07690600302673617,
+      "grad_norm": 1.3444650173187256,
+      "learning_rate": 2.7596308774879842e-08,
+      "loss": 0.9731,
+      "step": 5717
+    },
+    {
+      "epoch": 0.0769194551874895,
+      "grad_norm": 1.8739641904830933,
+      "learning_rate": 2.6327740190446037e-08,
+      "loss": 1.1698,
+      "step": 5718
+    },
+    {
+      "epoch": 0.07693290734824282,
+      "grad_norm": 1.4870262145996094,
+      "learning_rate": 2.50890151015426e-08,
+      "loss": 0.902,
+      "step": 5719
+    },
+    {
+      "epoch": 0.07694635950899613,
+      "grad_norm": 1.540949821472168,
+      "learning_rate": 2.388013387796262e-08,
+      "loss": 1.1144,
+      "step": 5720
+    },
+    {
+      "epoch": 0.07695981166974945,
+      "grad_norm": 1.6155050992965698,
+      "learning_rate": 2.270109688056188e-08,
+      "loss": 0.9566,
+      "step": 5721
+    },
+    {
+      "epoch": 0.07697326383050278,
+      "grad_norm": 1.5978723764419556,
+      "learning_rate": 2.1551904461292184e-08,
+      "loss": 1.0488,
+      "step": 5722
+    },
+    {
+      "epoch": 0.0769867159912561,
+      "grad_norm": 1.4267683029174805,
+      "learning_rate": 2.043255696322355e-08,
+      "loss": 0.9771,
+      "step": 5723
+    },
+    {
+      "epoch": 0.07700016815200941,
+      "grad_norm": 1.5521541833877563,
+      "learning_rate": 1.9343054720466492e-08,
+      "loss": 1.1641,
+      "step": 5724
+    },
+    {
+      "epoch": 0.07701362031276274,
+      "grad_norm": 1.5656774044036865,
+      "learning_rate": 1.8283398058283053e-08,
+      "loss": 1.0749,
+      "step": 5725
+    },
+    {
+      "epoch": 0.07702707247351606,
+      "grad_norm": 1.6160752773284912,
+      "learning_rate": 1.725358729297577e-08,
+      "loss": 1.0366,
+      "step": 5726
+    },
+    {
+      "epoch": 0.07704052463426939,
+      "grad_norm": 1.6153086423873901,
+      "learning_rate": 1.62536227319654e-08,
+      "loss": 0.9984,
+      "step": 5727
+    },
+    {
+      "epoch": 0.0770539767950227,
+      "grad_norm": 1.3504514694213867,
+      "learning_rate": 1.5283504673757608e-08,
+      "loss": 0.8402,
+      "step": 5728
+    },
+    {
+      "epoch": 0.07706742895577602,
+      "grad_norm": 1.4485338926315308,
+      "learning_rate": 1.4343233407931866e-08,
+      "loss": 0.9682,
+      "step": 5729
+    },
+    {
+      "epoch": 0.07708088111652935,
+      "grad_norm": 1.7915130853652954,
+      "learning_rate": 1.343280921518586e-08,
+      "loss": 1.0206,
+      "step": 5730
+    },
+    {
+      "epoch": 0.07709433327728267,
+      "grad_norm": 1.4139072895050049,
+      "learning_rate": 1.2552232367279981e-08,
+      "loss": 1.0062,
+      "step": 5731
+    },
+    {
+      "epoch": 0.07710778543803598,
+      "grad_norm": 1.5041614770889282,
+      "learning_rate": 1.1701503127092838e-08,
+      "loss": 0.9514,
+      "step": 5732
+    },
+    {
+      "epoch": 0.0771212375987893,
+      "grad_norm": 1.5072709321975708,
+      "learning_rate": 1.0880621748576847e-08,
+      "loss": 0.8834,
+      "step": 5733
+    },
+    {
+      "epoch": 0.07713468975954263,
+      "grad_norm": 1.4389986991882324,
+      "learning_rate": 1.0089588476758227e-08,
+      "loss": 0.9029,
+      "step": 5734
+    },
+    {
+      "epoch": 0.07714814192029595,
+      "grad_norm": 1.4401514530181885,
+      "learning_rate": 9.32840354779252e-09,
+      "loss": 1.0095,
+      "step": 5735
+    },
+    {
+      "epoch": 0.07716159408104926,
+      "grad_norm": 1.7930731773376465,
+      "learning_rate": 8.597067188897966e-09,
+      "loss": 1.2101,
+      "step": 5736
+    },
+    {
+      "epoch": 0.07717504624180259,
+      "grad_norm": 1.6275607347488403,
+      "learning_rate": 7.895579618388827e-09,
+      "loss": 1.0339,
+      "step": 5737
+    },
+    {
+      "epoch": 0.07718849840255591,
+      "grad_norm": 1.538468837738037,
+      "learning_rate": 7.223941045664262e-09,
+      "loss": 1.0778,
+      "step": 5738
+    },
+    {
+      "epoch": 0.07720195056330924,
+      "grad_norm": 1.4641597270965576,
+      "learning_rate": 6.582151671208348e-09,
+      "loss": 0.9457,
+      "step": 5739
+    },
+    {
+      "epoch": 0.07721540272406255,
+      "grad_norm": 1.4278123378753662,
+      "learning_rate": 5.970211686623372e-09,
+      "loss": 0.9369,
+      "step": 5740
+    },
+    {
+      "epoch": 0.07722885488481587,
+      "grad_norm": 1.355145812034607,
+      "learning_rate": 5.388121274574331e-09,
+      "loss": 0.8486,
+      "step": 5741
+    },
+    {
+      "epoch": 0.0772423070455692,
+      "grad_norm": 1.5918469429016113,
+      "learning_rate": 4.83588060882223e-09,
+      "loss": 0.9758,
+      "step": 5742
+    },
+    {
+      "epoch": 0.07725575920632252,
+      "grad_norm": 1.686596155166626,
+      "learning_rate": 4.313489854212982e-09,
+      "loss": 1.0544,
+      "step": 5743
+    },
+    {
+      "epoch": 0.07726921136707583,
+      "grad_norm": 1.381110429763794,
+      "learning_rate": 3.820949166699616e-09,
+      "loss": 0.8962,
+      "step": 5744
+    },
+    {
+      "epoch": 0.07728266352782916,
+      "grad_norm": 1.5031489133834839,
+      "learning_rate": 3.3582586932978666e-09,
+      "loss": 1.0251,
+      "step": 5745
+    },
+    {
+      "epoch": 0.07729611568858248,
+      "grad_norm": 1.4953575134277344,
+      "learning_rate": 2.9254185721416803e-09,
+      "loss": 1.0684,
+      "step": 5746
+    },
+    {
+      "epoch": 0.0773095678493358,
+      "grad_norm": 1.4256117343902588,
+      "learning_rate": 2.5224289324277118e-09,
+      "loss": 0.9782,
+      "step": 5747
+    },
+    {
+      "epoch": 0.07732302001008912,
+      "grad_norm": 1.7791322469711304,
+      "learning_rate": 2.149289894459727e-09,
+      "loss": 1.2141,
+      "step": 5748
+    },
+    {
+      "epoch": 0.07733647217084244,
+      "grad_norm": 1.8340305089950562,
+      "learning_rate": 1.8060015696264032e-09,
+      "loss": 1.059,
+      "step": 5749
+    },
+    {
+      "epoch": 0.07734992433159577,
+      "grad_norm": 1.5660282373428345,
+      "learning_rate": 1.4925640603902224e-09,
+      "loss": 0.9299,
+      "step": 5750
+    },
+    {
+      "epoch": 0.07736337649234909,
+      "grad_norm": 1.551103949546814,
+      "learning_rate": 1.208977460342986e-09,
+      "loss": 1.1289,
+      "step": 5751
+    },
+    {
+      "epoch": 0.0773768286531024,
+      "grad_norm": 1.676748514175415,
+      "learning_rate": 9.552418541058928e-10,
+      "loss": 0.9262,
+      "step": 5752
+    },
+    {
+      "epoch": 0.07739028081385572,
+      "grad_norm": 1.4658743143081665,
+      "learning_rate": 7.313573174516642e-10,
+      "loss": 0.8708,
+      "step": 5753
+    },
+    {
+      "epoch": 0.07740373297460905,
+      "grad_norm": 1.6699283123016357,
+      "learning_rate": 5.37323917204624e-10,
+      "loss": 1.083,
+      "step": 5754
+    },
+    {
+      "epoch": 0.07741718513536237,
+      "grad_norm": 1.4694359302520752,
+      "learning_rate": 3.731417112740054e-10,
+      "loss": 0.8933,
+      "step": 5755
+    },
+    {
+      "epoch": 0.07743063729611568,
+      "grad_norm": 1.7645187377929688,
+      "learning_rate": 2.388107486761548e-10,
+      "loss": 1.1274,
+      "step": 5756
+    },
+    {
+      "epoch": 0.07744408945686901,
+      "grad_norm": 1.5873627662658691,
+      "learning_rate": 1.343310695234301e-10,
+      "loss": 1.0934,
+      "step": 5757
+    },
+    {
+      "epoch": 0.07745754161762233,
+      "grad_norm": 1.486417293548584,
+      "learning_rate": 5.97027049908938e-11,
+      "loss": 1.0576,
+      "step": 5758
+    },
+    {
+      "epoch": 0.07747099377837566,
+      "grad_norm": 1.636743187904358,
+      "learning_rate": 1.4925677360722036e-11,
+      "loss": 1.1294,
+      "step": 5759
+    },
+    {
+      "epoch": 0.07748444593912897,
+      "grad_norm": 1.4540817737579346,
+      "learning_rate": 0.0,
+      "loss": 0.8497,
+      "step": 5760
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.6511313071466086e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null