Training in progress, step 4390, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/2_Dense/model.safetensors +1 -1
last-checkpoint/README.md +14 -0
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +101 -3

last-checkpoint/2_Dense/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a59bfc4cd3767747c580ac670f0d6c48bfe9e402250467b22e693fdfc61b625d
 size 3149984

 version https://git-lfs.github.com/spec/v1
+oid sha256:98502da6c4dbee1502fa8ebc31ff356b5762eb792a899d4e5339d3cd3a7c0ae4
 size 3149984

last-checkpoint/README.md CHANGED Viewed

@@ -830,6 +830,20 @@ You can finetune this model on your own dataset.
 | 9.6146 | 4230 | 0.061         | -               | -                    |
 | 9.6374 | 4240 | 0.0616        | -               | -                    |
 | 9.6601 | 4250 | 0.0613        | 0.0418          | 0.8282               |
 </details>

 | 9.6146 | 4230 | 0.061         | -               | -                    |
 | 9.6374 | 4240 | 0.0616        | -               | -                    |
 | 9.6601 | 4250 | 0.0613        | 0.0418          | 0.8282               |
+| 9.6829 | 4260 | 0.0623        | -               | -                    |
+| 9.7056 | 4270 | 0.0605        | -               | -                    |
+| 9.7284 | 4280 | 0.0637        | -               | -                    |
+| 9.7512 | 4290 | 0.0604        | -               | -                    |
+| 9.7739 | 4300 | 0.0606        | -               | -                    |
+| 9.7967 | 4310 | 0.0622        | -               | -                    |
+| 9.8195 | 4320 | 0.0598        | -               | -                    |
+| 9.8422 | 4330 | 0.0611        | -               | -                    |
+| 9.8650 | 4340 | 0.0604        | -               | -                    |
+| 9.8878 | 4350 | 0.0598        | -               | -                    |
+| 9.9105 | 4360 | 0.0626        | -               | -                    |
+| 9.9333 | 4370 | 0.0624        | -               | -                    |
+| 9.9560 | 4380 | 0.0617        | -               | -                    |
+| 9.9788 | 4390 | 0.0603        | -               | -                    |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66daefb719ad12215c08363cf07f604053315b28142583dcc866c834327eca3f
 size 735216376

 version https://git-lfs.github.com/spec/v1
+oid sha256:4533a4e396a4cedfa433a333dfd7f93b95b00042cf7dc09f5854fa0650746841
 size 735216376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:796b01c86922133da7b4702097cf156006e03e00f92d857ba3d2713e738810f2
 size 1476823354

 version https://git-lfs.github.com/spec/v1
+oid sha256:03f40734fc193c019c97cbc28b1ae04414cdc745d19240313f29ceea320dd5c0
 size 1476823354

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:451fe1a5f62f2f6eed0b67a70a5f8f0f813e8a38e58c106c948a6c2c9e79f8ef
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbcc6d0c1acd705e5ef3f7c1bdfb510617f9ad5f9bd9d641c051fe36f40e8b31
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 9.660122350263196,
   "eval_steps": 250,
-  "global_step": 4250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3288,6 +3288,104 @@
       "eval_sts_dev_spearman_manhattan": 0.7996541111809876,
       "eval_sts_dev_spearman_max": 0.8282368218808581,
       "step": 4250
     }
   ],
   "logging_steps": 10,
@@ -3302,7 +3400,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.97880210556267,
   "eval_steps": 250,
+  "global_step": 4390,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_sts_dev_spearman_manhattan": 0.7996541111809876,
       "eval_sts_dev_spearman_max": 0.8282368218808581,
       "step": 4250
+    },
+    {
+      "epoch": 9.682885189927443,
+      "grad_norm": 0.2565889060497284,
+      "learning_rate": 3.0299797290088556e-06,
+      "loss": 0.0623,
+      "step": 4260
+    },
+    {
+      "epoch": 9.705648029591691,
+      "grad_norm": 0.2263515293598175,
+      "learning_rate": 3.037092357480707e-06,
+      "loss": 0.0605,
+      "step": 4270
+    },
+    {
+      "epoch": 9.72841086925594,
+      "grad_norm": 0.21705535054206848,
+      "learning_rate": 3.044204985952559e-06,
+      "loss": 0.0637,
+      "step": 4280
+    },
+    {
+      "epoch": 9.751173708920188,
+      "grad_norm": 0.21649038791656494,
+      "learning_rate": 3.0513176144244106e-06,
+      "loss": 0.0604,
+      "step": 4290
+    },
+    {
+      "epoch": 9.773936548584436,
+      "grad_norm": 0.22717022895812988,
+      "learning_rate": 3.0584302428962625e-06,
+      "loss": 0.0606,
+      "step": 4300
+    },
+    {
+      "epoch": 9.796699388248683,
+      "grad_norm": 0.23610946536064148,
+      "learning_rate": 3.0655428713681145e-06,
+      "loss": 0.0622,
+      "step": 4310
+    },
+    {
+      "epoch": 9.819462227912933,
+      "grad_norm": 0.2080880105495453,
+      "learning_rate": 3.072655499839966e-06,
+      "loss": 0.0598,
+      "step": 4320
+    },
+    {
+      "epoch": 9.84222506757718,
+      "grad_norm": 0.2862449884414673,
+      "learning_rate": 3.079768128311818e-06,
+      "loss": 0.0611,
+      "step": 4330
+    },
+    {
+      "epoch": 9.864987907241428,
+      "grad_norm": 0.2211073935031891,
+      "learning_rate": 3.0868807567836695e-06,
+      "loss": 0.0604,
+      "step": 4340
+    },
+    {
+      "epoch": 9.887750746905677,
+      "grad_norm": 0.2399899959564209,
+      "learning_rate": 3.0939933852555214e-06,
+      "loss": 0.0598,
+      "step": 4350
+    },
+    {
+      "epoch": 9.910513586569925,
+      "grad_norm": 0.2330579161643982,
+      "learning_rate": 3.101106013727373e-06,
+      "loss": 0.0626,
+      "step": 4360
+    },
+    {
+      "epoch": 9.933276426234173,
+      "grad_norm": 0.23163940012454987,
+      "learning_rate": 3.108218642199225e-06,
+      "loss": 0.0624,
+      "step": 4370
+    },
+    {
+      "epoch": 9.95603926589842,
+      "grad_norm": 0.2087012380361557,
+      "learning_rate": 3.115331270671077e-06,
+      "loss": 0.0617,
+      "step": 4380
+    },
+    {
+      "epoch": 9.97880210556267,
+      "grad_norm": 0.24286577105522156,
+      "learning_rate": 3.1224438991429284e-06,
+      "loss": 0.0603,
+      "step": 4390
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }