Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccd1607970824f8435dff0ec490e2197f145ee38b543dfac0f375baae91f3e84
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:12de36f02475ba36424b6cbbc78a99fb5d247b1f59b0671ec136b90196dbc42e
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05aa8a7bf492a3ad20e0d6edfff8c0717a4f0bbc1219a587cbf095503cf2d00e
-size 2479122661

 version https://git-lfs.github.com/spec/v1
+oid sha256:71a973f442004b75157ae01481531805c844e77a68190e59a5218c09d8d6df94
+size 2479123301

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09b7290d8ca07041eee19d5fa227aba688d13ea17f7d6f3c0e4a7903d483d295
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc1a0da602f8abf4bf342932694d528cc1f0baa4d5027de58ad34f4d9855d085
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19ed95effe7569c75627b601fa080ba53727e518015156dc63042342eab93ca8
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:38dd85de4e747e5477e492c54af5b212cebc40d19045c2dfc5361392de0ed8a7
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.08,
   "eval_steps": 500,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -148,6 +148,76 @@
       "learning_rate": 4.617234468937876e-05,
       "loss": 1.3504,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -167,7 +237,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3595273633732608.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.12,
   "eval_steps": 500,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.617234468937876e-05,
       "loss": 1.3504,
       "step": 200
+    },
+    {
+      "epoch": 0.084,
+      "grad_norm": 33.88198471069336,
+      "learning_rate": 4.5971943887775554e-05,
+      "loss": 1.5029,
+      "step": 210
+    },
+    {
+      "epoch": 0.088,
+      "grad_norm": 24.533716201782227,
+      "learning_rate": 4.5771543086172346e-05,
+      "loss": 1.2978,
+      "step": 220
+    },
+    {
+      "epoch": 0.092,
+      "grad_norm": 27.563339233398438,
+      "learning_rate": 4.557114228456914e-05,
+      "loss": 1.7014,
+      "step": 230
+    },
+    {
+      "epoch": 0.096,
+      "grad_norm": 29.428752899169922,
+      "learning_rate": 4.5370741482965936e-05,
+      "loss": 1.3845,
+      "step": 240
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 20.272520065307617,
+      "learning_rate": 4.517034068136273e-05,
+      "loss": 1.2192,
+      "step": 250
+    },
+    {
+      "epoch": 0.104,
+      "grad_norm": 47.12469482421875,
+      "learning_rate": 4.496993987975952e-05,
+      "loss": 1.2814,
+      "step": 260
+    },
+    {
+      "epoch": 0.108,
+      "grad_norm": 18.20330238342285,
+      "learning_rate": 4.476953907815631e-05,
+      "loss": 1.2717,
+      "step": 270
+    },
+    {
+      "epoch": 0.112,
+      "grad_norm": 39.07451248168945,
+      "learning_rate": 4.456913827655311e-05,
+      "loss": 1.3291,
+      "step": 280
+    },
+    {
+      "epoch": 0.116,
+      "grad_norm": 50.37272644042969,
+      "learning_rate": 4.43687374749499e-05,
+      "loss": 1.3691,
+      "step": 290
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 23.233367919921875,
+      "learning_rate": 4.4168336673346694e-05,
+      "loss": 1.4183,
+      "step": 300
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 5404828786993152.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null