Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +93 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:368e648350fa88e97ed24bf06f585bb7f7097580946a4a9480bef2318de437fd
 size 18257035

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fbf29aa08f41e0f0601f4765837411287c24654fa0450a669320cc88dc0d507
 size 18257035

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f92bb13c8c261c5bbacd52e4713611a2458ef3c2d47986ab438b3233a082b5f
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:f39368c97fa07f86c85b709084202463eb1ea663fda6349cc12642c32aea74f1
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4393a84a3109995aa1202073b039b12062e3189ed89aa0b94ef0510ba843009
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2b5ed8d4c0db2e24674d7f125356981e2c73273d96a8f3eabaf284b99f24856
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:a68b30ede80d2355469409876134f9fb0aa83dcc5c47c7d77b992bd6fb65d11f
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.020057665789143787,
   "eval_steps": 500,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -98,6 +98,96 @@
       "mean_token_accuracy": 0.0,
       "num_tokens": 371196.0,
       "step": 100
     }
   ],
   "logging_steps": 10,
@@ -117,7 +207,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8310689070243840.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.040115331578287575,
   "eval_steps": 500,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.0,
       "num_tokens": 371196.0,
       "step": 100
+    },
+    {
+      "epoch": 0.022063432368058168,
+      "grad_norm": 0.0,
+      "learning_rate": 3.643048128342246e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 408430.0,
+      "step": 110
+    },
+    {
+      "epoch": 0.024069198946972545,
+      "grad_norm": 0.0,
+      "learning_rate": 3.9772727272727275e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 445095.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.026074965525886926,
+      "grad_norm": 0.0,
+      "learning_rate": 4.311497326203208e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 483443.0,
+      "step": 130
+    },
+    {
+      "epoch": 0.028080732104801304,
+      "grad_norm": 0.0,
+      "learning_rate": 4.64572192513369e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 519939.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.03008649868371568,
+      "grad_norm": 0.0,
+      "learning_rate": 4.979946524064171e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 557291.0,
+      "step": 150
+    },
+    {
+      "epoch": 0.03209226526263006,
+      "grad_norm": 0.0,
+      "learning_rate": 5.314171122994652e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 594842.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.03409803184154444,
+      "grad_norm": 0.0,
+      "learning_rate": 5.648395721925134e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 630588.0,
+      "step": 170
+    },
+    {
+      "epoch": 0.03610379842045882,
+      "grad_norm": 0.0,
+      "learning_rate": 5.982620320855615e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 666902.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.0381095649993732,
+      "grad_norm": 0.0,
+      "learning_rate": 6.316844919786097e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 704018.0,
+      "step": 190
+    },
+    {
+      "epoch": 0.040115331578287575,
+      "grad_norm": 0.0,
+      "learning_rate": 6.651069518716578e-06,
+      "loss": 0.0,
+      "mean_token_accuracy": 0.0,
+      "num_tokens": 741862.0,
+      "step": 200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.660951199643648e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null