Training in progress, step 900, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9a757842f36fd94aead624d11ae735bab0021a2cf1d22b12f1d19d8eb3745df
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:f464bb34c079c49fc4894f86d25745fe89898016c82a86a608f7319e127060fb
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:864f28f2db139c235def5468478555614e1208e6c7e1636a6be1a9a8a84d2903
 size 2479955235

 version https://git-lfs.github.com/spec/v1
+oid sha256:89a0275491ae162fc646c048a499bb9160456c508f7be5abff3710269e6fdf4a
 size 2479955235

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee1ff4b6d230d52fbd75a1fdfc717e2baaa7034a01541c1dee54a5bf5dd662d6
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:504b7bc543b9e5f039f6559d07b099507a66c15c86836ff5981e4eee51792c02
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87f9876af7981b4f995b217441438c53a026fb406c344a1e30a18ad2545bd292
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcb268d80dcd8f32a0b291e0f300bdc2df3898cc3661ea44beb8067d70741b7b
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.32,
   "eval_steps": 500,
-  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -568,6 +568,76 @@
       "learning_rate": 3.4244372990353704e-05,
       "loss": 0.7991,
       "step": 800
     }
   ],
   "logging_steps": 10,
@@ -587,7 +657,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.442001474164736e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.36,
   "eval_steps": 500,
+  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.4244372990353704e-05,
       "loss": 0.7991,
       "step": 800
+    },
+    {
+      "epoch": 0.324,
+      "grad_norm": 25.02106475830078,
+      "learning_rate": 3.404340836012862e-05,
+      "loss": 0.7284,
+      "step": 810
+    },
+    {
+      "epoch": 0.328,
+      "grad_norm": 14.035198211669922,
+      "learning_rate": 3.384244372990354e-05,
+      "loss": 0.7589,
+      "step": 820
+    },
+    {
+      "epoch": 0.332,
+      "grad_norm": 11.368013381958008,
+      "learning_rate": 3.364147909967846e-05,
+      "loss": 0.7638,
+      "step": 830
+    },
+    {
+      "epoch": 0.336,
+      "grad_norm": 21.951080322265625,
+      "learning_rate": 3.344051446945338e-05,
+      "loss": 0.7869,
+      "step": 840
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 17.966073989868164,
+      "learning_rate": 3.32395498392283e-05,
+      "loss": 0.6792,
+      "step": 850
+    },
+    {
+      "epoch": 0.344,
+      "grad_norm": 36.02198791503906,
+      "learning_rate": 3.3038585209003216e-05,
+      "loss": 0.6968,
+      "step": 860
+    },
+    {
+      "epoch": 0.348,
+      "grad_norm": 32.43560791015625,
+      "learning_rate": 3.283762057877814e-05,
+      "loss": 0.7523,
+      "step": 870
+    },
+    {
+      "epoch": 0.352,
+      "grad_norm": 30.29490852355957,
+      "learning_rate": 3.263665594855306e-05,
+      "loss": 0.6548,
+      "step": 880
+    },
+    {
+      "epoch": 0.356,
+      "grad_norm": 8.957921981811523,
+      "learning_rate": 3.243569131832798e-05,
+      "loss": 0.7151,
+      "step": 890
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 15.583487510681152,
+      "learning_rate": 3.22347266881029e-05,
+      "loss": 0.625,
+      "step": 900
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.6191274912395264e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null