Training in progress, step 1050, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a1d5bad61400722a8ccce1a07c1e753482b6ea4442e6ad16a46f6bbf4840e3d
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:16a868f82febe721b8e7c2fc71c0d5a97ce0919bf160da3b9aadd26d9a44949f
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89b1275a7a904e9e12119508c852a53c2be0d59c240caced6758ed88ae0b2843
 size 243802996

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1e5f5a977713981e8555bb273c1bd513af9b33d3395312c4dbb567c2fe16fc2
 size 243802996

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d6c4fffbe9021834ba7d217ea350a66ec98cffedd34126c6852ce5eadc39d6a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a76f63d05c6042ef27038fe0b51cdcbf2f7d68ac795e368d4b91246aabe1923c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6fd237cfec80c70f9a65cfeacff021654c249bb4c822db0994192c4dfbb39cb
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd162fd330fa2cba0192d15405a21539d18e6a2ba92040c97b68159b64ce43fa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.34348922967910767,
   "best_model_checkpoint": "miner_id_24/checkpoint-600",
-  "epoch": 0.06553557125172942,
   "eval_steps": 150,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -693,6 +693,119 @@
       "eval_samples_per_second": 30.988,
       "eval_steps_per_second": 3.875,
       "step": 900
     }
   ],
   "logging_steps": 10,
@@ -707,7 +820,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -716,12 +829,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.736415831518085e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.34348922967910767,
   "best_model_checkpoint": "miner_id_24/checkpoint-600",
+  "epoch": 0.07645816646035097,
   "eval_steps": 150,
+  "global_step": 1050,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 30.988,
       "eval_steps_per_second": 3.875,
       "step": 900
+    },
+    {
+      "epoch": 0.06626374426563751,
+      "grad_norm": 0.12492916733026505,
+      "learning_rate": 8.045567009897723e-05,
+      "loss": 0.1276,
+      "step": 910
+    },
+    {
+      "epoch": 0.06699191727954562,
+      "grad_norm": 0.21166566014289856,
+      "learning_rate": 8.003165547104305e-05,
+      "loss": 0.3204,
+      "step": 920
+    },
+    {
+      "epoch": 0.06772009029345373,
+      "grad_norm": 0.27438658475875854,
+      "learning_rate": 7.960423495103467e-05,
+      "loss": 0.2867,
+      "step": 930
+    },
+    {
+      "epoch": 0.06844826330736183,
+      "grad_norm": 0.3071146607398987,
+      "learning_rate": 7.917345701274207e-05,
+      "loss": 0.4194,
+      "step": 940
+    },
+    {
+      "epoch": 0.06917643632126994,
+      "grad_norm": 0.6998862624168396,
+      "learning_rate": 7.873937051072035e-05,
+      "loss": 0.6729,
+      "step": 950
+    },
+    {
+      "epoch": 0.06990460933517804,
+      "grad_norm": 0.12480669468641281,
+      "learning_rate": 7.830202467474899e-05,
+      "loss": 0.1254,
+      "step": 960
+    },
+    {
+      "epoch": 0.07063278234908614,
+      "grad_norm": 0.24528868496418,
+      "learning_rate": 7.786146910424876e-05,
+      "loss": 0.3341,
+      "step": 970
+    },
+    {
+      "epoch": 0.07136095536299425,
+      "grad_norm": 0.3267609775066376,
+      "learning_rate": 7.741775376265667e-05,
+      "loss": 0.3441,
+      "step": 980
+    },
+    {
+      "epoch": 0.07208912837690236,
+      "grad_norm": 0.3443754315376282,
+      "learning_rate": 7.697092897175957e-05,
+      "loss": 0.4224,
+      "step": 990
+    },
+    {
+      "epoch": 0.07281730139081045,
+      "grad_norm": 0.7945559620857239,
+      "learning_rate": 7.652104540598712e-05,
+      "loss": 0.6113,
+      "step": 1000
+    },
+    {
+      "epoch": 0.07354547440471856,
+      "grad_norm": 0.1654498428106308,
+      "learning_rate": 7.606815408666493e-05,
+      "loss": 0.1899,
+      "step": 1010
+    },
+    {
+      "epoch": 0.07427364741862666,
+      "grad_norm": 0.2405194789171219,
+      "learning_rate": 7.561230637622805e-05,
+      "loss": 0.3366,
+      "step": 1020
+    },
+    {
+      "epoch": 0.07500182043253477,
+      "grad_norm": 0.2601194679737091,
+      "learning_rate": 7.515355397239613e-05,
+      "loss": 0.3054,
+      "step": 1030
+    },
+    {
+      "epoch": 0.07572999344644288,
+      "grad_norm": 0.3462647795677185,
+      "learning_rate": 7.469194890231021e-05,
+      "loss": 0.385,
+      "step": 1040
+    },
+    {
+      "epoch": 0.07645816646035097,
+      "grad_norm": 0.9731826186180115,
+      "learning_rate": 7.422754351663252e-05,
+      "loss": 0.6843,
+      "step": 1050
+    },
+    {
+      "epoch": 0.07645816646035097,
+      "eval_loss": 0.3437187075614929,
+      "eval_runtime": 373.3766,
+      "eval_samples_per_second": 30.974,
+      "eval_steps_per_second": 3.873,
+      "step": 1050
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.025225674859479e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null