Training in progress, step 17500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee7142eae13c2e07731af54b545ac51bda5556ea3f592995cf80a6a611f7a001
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8ffabfeec9f6eebd2a7900dd8bf8405bf07a25820f0a7c5d6b4d5a8bfa48293
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8599a0264032f7e486689cdb52a108f56cf68a52b701dfa3ff59464f8da7ad06
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c081cd44daea3b6fc16a00cbe0211572d0c0a312f0060bc10c918341394bda2
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c552c5778e7213237c153e06b58720168ca8baccd54cd6b284f96928b35e9241
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:afa26191eaed430ee9b2402f370925cf1a280b0be2f2a361324924659b56d574
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:570514209dc9b87d23b36d54c5948989ae9b03cae5fc9a097914a9ef458ad51f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e544656266b7150eb42d485eede2fd99a780796aabfa23eadeeb94220226471
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.08219126611948013,
-  "best_model_checkpoint": "./fine-tuned/checkpoint-17000",
-  "epoch": 1.3599999999999999,
   "eval_steps": 500,
-  "global_step": 17000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2659,6 +2659,84 @@
       "eval_samples_per_second": 22.714,
       "eval_steps_per_second": 5.679,
       "step": 17000
     }
   ],
   "logging_steps": 50,
@@ -2678,7 +2756,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.140913655808e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.0821109265089035,
+  "best_model_checkpoint": "./fine-tuned/checkpoint-17500",
+  "epoch": 1.4,
   "eval_steps": 500,
+  "global_step": 17500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.714,
       "eval_steps_per_second": 5.679,
       "step": 17000
+    },
+    {
+      "epoch": 1.3639999999999999,
+      "grad_norm": 0.19439847767353058,
+      "learning_rate": 9.5448e-06,
+      "loss": 0.0566,
+      "step": 17050
+    },
+    {
+      "epoch": 1.3679999999999999,
+      "grad_norm": 0.1604561060667038,
+      "learning_rate": 9.4848e-06,
+      "loss": 0.0532,
+      "step": 17100
+    },
+    {
+      "epoch": 1.3719999999999999,
+      "grad_norm": 0.13188685476779938,
+      "learning_rate": 9.4248e-06,
+      "loss": 0.0547,
+      "step": 17150
+    },
+    {
+      "epoch": 1.376,
+      "grad_norm": 0.09787939488887787,
+      "learning_rate": 9.3648e-06,
+      "loss": 0.0476,
+      "step": 17200
+    },
+    {
+      "epoch": 1.38,
+      "grad_norm": 0.3960016369819641,
+      "learning_rate": 9.3048e-06,
+      "loss": 0.0607,
+      "step": 17250
+    },
+    {
+      "epoch": 1.384,
+      "grad_norm": 0.17494530975818634,
+      "learning_rate": 9.2448e-06,
+      "loss": 0.0579,
+      "step": 17300
+    },
+    {
+      "epoch": 1.388,
+      "grad_norm": 0.17870362102985382,
+      "learning_rate": 9.1848e-06,
+      "loss": 0.0481,
+      "step": 17350
+    },
+    {
+      "epoch": 1.392,
+      "grad_norm": 0.1152658611536026,
+      "learning_rate": 9.1248e-06,
+      "loss": 0.0507,
+      "step": 17400
+    },
+    {
+      "epoch": 1.396,
+      "grad_norm": 0.10560191422700882,
+      "learning_rate": 9.0648e-06,
+      "loss": 0.0509,
+      "step": 17450
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 0.09202779084444046,
+      "learning_rate": 9.004799999999999e-06,
+      "loss": 0.0571,
+      "step": 17500
+    },
+    {
+      "epoch": 1.4,
+      "eval_loss": 0.0821109265089035,
+      "eval_runtime": 88.0678,
+      "eval_samples_per_second": 22.71,
+      "eval_steps_per_second": 5.677,
+      "step": 17500
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 4.26270523392e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null