Training in progress, step 1000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +97 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5dac5437f6ae84224977f75f9084512e2f5a204352865d41726f02a807c53cd1
 size 1315238776

 version https://git-lfs.github.com/spec/v1
+oid sha256:73ae4eb11ccd76a846a2f705bf4d4ff76860942937dceb88845f097d06d43b4d
 size 1315238776

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f2d7876b2f76fb6a691661f937b240671a73c43c0ffaac97f498978f741b277
 size 2630727050

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5a174fb5366b53f40cfe85c697b0f93cae31a6d5b91f246c623d1cd21969a79
 size 2630727050

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:834cd886e0eeedcea906fed8c169bdb850f647da76f2202362f0874098e69d0e
-size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8a536803ce62f22b8221a8dd984affe7a0f8f81305e773e07a8fafbee4b6869
+size 14372

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7aade0a5cf98168c48557061a263a0ce94127833f42effbb1795ad03b5bd88a5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa5974852ecb19394b20e36e43d8a03c1cb27cf510d3afdab7c91e54ecd54439
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.723589001447178,
   "eval_steps": 200,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -94,6 +94,100 @@
       "learning_rate": 0.00029939999999999996,
       "loss": 624.1802,
       "step": 500
     }
   ],
   "logging_steps": 50,
@@ -113,7 +207,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.82481936005718e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.447178002894356,
   "eval_steps": 200,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00029939999999999996,
       "loss": 624.1802,
       "step": 500
+    },
+    {
+      "epoch": 0.7959479015918958,
+      "grad_norm": 5244923.0,
+      "learning_rate": 0.0002977067082683307,
+      "loss": 617.4588,
+      "step": 550
+    },
+    {
+      "epoch": 0.8683068017366136,
+      "grad_norm": 5422852.5,
+      "learning_rate": 0.00029536661466458657,
+      "loss": 610.4225,
+      "step": 600
+    },
+    {
+      "epoch": 0.8683068017366136,
+      "eval_loss": 599.3718872070312,
+      "eval_runtime": 240.859,
+      "eval_samples_per_second": 10.197,
+      "eval_steps_per_second": 0.639,
+      "step": 600
+    },
+    {
+      "epoch": 0.9406657018813314,
+      "grad_norm": 15222385.0,
+      "learning_rate": 0.0002930265210608424,
+      "loss": 605.7685,
+      "step": 650
+    },
+    {
+      "epoch": 1.0130246020260492,
+      "grad_norm": 10619589.0,
+      "learning_rate": 0.00029068642745709827,
+      "loss": 593.7761,
+      "step": 700
+    },
+    {
+      "epoch": 1.085383502170767,
+      "grad_norm": 5425615.5,
+      "learning_rate": 0.0002883463338533541,
+      "loss": 581.7033,
+      "step": 750
+    },
+    {
+      "epoch": 1.1577424023154848,
+      "grad_norm": 7700796.0,
+      "learning_rate": 0.00028600624024961,
+      "loss": 575.6007,
+      "step": 800
+    },
+    {
+      "epoch": 1.1577424023154848,
+      "eval_loss": 552.0042114257812,
+      "eval_runtime": 239.4632,
+      "eval_samples_per_second": 10.256,
+      "eval_steps_per_second": 0.643,
+      "step": 800
+    },
+    {
+      "epoch": 1.2301013024602026,
+      "grad_norm": 5580448.0,
+      "learning_rate": 0.0002836661466458658,
+      "loss": 557.8052,
+      "step": 850
+    },
+    {
+      "epoch": 1.3024602026049203,
+      "grad_norm": 5330764.0,
+      "learning_rate": 0.0002813260530421217,
+      "loss": 522.3477,
+      "step": 900
+    },
+    {
+      "epoch": 1.3748191027496381,
+      "grad_norm": 7907119.5,
+      "learning_rate": 0.00027898595943837753,
+      "loss": 499.9348,
+      "step": 950
+    },
+    {
+      "epoch": 1.447178002894356,
+      "grad_norm": 8895919.0,
+      "learning_rate": 0.0002766458658346334,
+      "loss": 470.3078,
+      "step": 1000
+    },
+    {
+      "epoch": 1.447178002894356,
+      "eval_loss": 434.424560546875,
+      "eval_runtime": 239.2094,
+      "eval_samples_per_second": 10.267,
+      "eval_steps_per_second": 0.644,
+      "step": 1000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 7.654320361699615e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null