Training in progress, step 1900, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:229e19659100db88dd521b24c7f3783cb59725c077f5c58e0b1e8cbed6566cad
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:84f12537cd80e96d5b00db2adce34c918e49c02c0163196b020849bfc5dcea70
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:086d4ae403b4653b546f4d669e2f8c98a6c3bc786f7ff28201dea70b2067e4f2
 size 2479955235

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bba8b74741b8f956ed154bc4ece6dfb19904a6a7c6b034624740300cde18a97
 size 2479955235

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1547aae10ac7691e1716f567b08e3b4d274fa923879a48af8c2bb55c815a28a2
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb4bb891f0ebc45b239e473bb43ab5a6e8916e99a94e990939ec84f3da08f81e
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cdd5d251a495085a19339ae2e6833dedf33f9b2050a0e70b16dd4cd5da2b7a12
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:34908dcef28f44e129bc7cf6f353b95daa97084843f3b737ac3e87c6a4beba8f
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.72,
   "eval_steps": 500,
-  "global_step": 1800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1268,6 +1268,76 @@
       "learning_rate": 1.414790996784566e-05,
       "loss": 0.5619,
       "step": 1800
     }
   ],
   "logging_steps": 10,
@@ -1287,7 +1357,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.2448823590445056e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.76,
   "eval_steps": 500,
+  "global_step": 1900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.414790996784566e-05,
       "loss": 0.5619,
       "step": 1800
+    },
+    {
+      "epoch": 0.724,
+      "grad_norm": 5.905683517456055,
+      "learning_rate": 1.3946945337620579e-05,
+      "loss": 0.3286,
+      "step": 1810
+    },
+    {
+      "epoch": 0.728,
+      "grad_norm": 10.061610221862793,
+      "learning_rate": 1.3745980707395497e-05,
+      "loss": 0.3899,
+      "step": 1820
+    },
+    {
+      "epoch": 0.732,
+      "grad_norm": 12.293854713439941,
+      "learning_rate": 1.354501607717042e-05,
+      "loss": 0.4059,
+      "step": 1830
+    },
+    {
+      "epoch": 0.736,
+      "grad_norm": 13.248871803283691,
+      "learning_rate": 1.3344051446945338e-05,
+      "loss": 0.4339,
+      "step": 1840
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 9.589434623718262,
+      "learning_rate": 1.3143086816720257e-05,
+      "loss": 0.4178,
+      "step": 1850
+    },
+    {
+      "epoch": 0.744,
+      "grad_norm": 8.538604736328125,
+      "learning_rate": 1.2942122186495179e-05,
+      "loss": 0.3152,
+      "step": 1860
+    },
+    {
+      "epoch": 0.748,
+      "grad_norm": 18.58129119873047,
+      "learning_rate": 1.2741157556270097e-05,
+      "loss": 0.4276,
+      "step": 1870
+    },
+    {
+      "epoch": 0.752,
+      "grad_norm": 8.69501781463623,
+      "learning_rate": 1.2540192926045016e-05,
+      "loss": 0.4304,
+      "step": 1880
+    },
+    {
+      "epoch": 0.756,
+      "grad_norm": 14.74836254119873,
+      "learning_rate": 1.2339228295819937e-05,
+      "loss": 0.3541,
+      "step": 1890
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 7.415429592132568,
+      "learning_rate": 1.2138263665594855e-05,
+      "loss": 0.3713,
+      "step": 1900
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.4245796106594304e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null