Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b7bdfa9432865811566d801973f8536379093191662a7081831a5e5a10ab7ea
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8887655314c6a7fc55cd02e4ec047c0048e2272c7fd352777bb7dacb970f435
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98b8806bdda72b37431cd7a1864b5e940d2adca924c0255d88519f24424d35ce
 size 2479955235

 version https://git-lfs.github.com/spec/v1
+oid sha256:4544c40f9c86f284c54a215c2c1aec6b854bdd8ff99351fe18c83fabae909708
 size 2479955235

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ff58b41c3672e659a0eb46d9ed11a0ca17415e7a2643a3ddfbaebb9f4e67f8f
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e35963fbe17703d43e57c264c8bf401c049828d6ea5abe6c269f936eebec007
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04a7344de2b10f0c83d4bc41d4ac3ff0903cfb5c2b6ee1654e8dd79d5d353dd0
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:2dacf7348dc62d0b10f0ec94c4cde6e6d272f324c59483c8d9d8548880fb6b96
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -358,6 +358,76 @@
       "learning_rate": 4.027331189710611e-05,
       "loss": 1.0562,
       "step": 500
     }
   ],
   "logging_steps": 10,
@@ -377,7 +447,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9020891125518336.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.24,
   "eval_steps": 500,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.027331189710611e-05,
       "loss": 1.0562,
       "step": 500
+    },
+    {
+      "epoch": 0.204,
+      "grad_norm": 13.945029258728027,
+      "learning_rate": 4.0072347266881035e-05,
+      "loss": 0.9637,
+      "step": 510
+    },
+    {
+      "epoch": 0.208,
+      "grad_norm": 16.55429458618164,
+      "learning_rate": 3.9871382636655953e-05,
+      "loss": 0.8104,
+      "step": 520
+    },
+    {
+      "epoch": 0.212,
+      "grad_norm": 24.434778213500977,
+      "learning_rate": 3.967041800643087e-05,
+      "loss": 1.1942,
+      "step": 530
+    },
+    {
+      "epoch": 0.216,
+      "grad_norm": 20.01283836364746,
+      "learning_rate": 3.946945337620579e-05,
+      "loss": 0.9209,
+      "step": 540
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 18.98524284362793,
+      "learning_rate": 3.926848874598071e-05,
+      "loss": 0.7445,
+      "step": 550
+    },
+    {
+      "epoch": 0.224,
+      "grad_norm": 22.44414710998535,
+      "learning_rate": 3.906752411575563e-05,
+      "loss": 0.9528,
+      "step": 560
+    },
+    {
+      "epoch": 0.228,
+      "grad_norm": 19.79057502746582,
+      "learning_rate": 3.886655948553055e-05,
+      "loss": 0.9216,
+      "step": 570
+    },
+    {
+      "epoch": 0.232,
+      "grad_norm": 17.453460693359375,
+      "learning_rate": 3.866559485530547e-05,
+      "loss": 0.9834,
+      "step": 580
+    },
+    {
+      "epoch": 0.236,
+      "grad_norm": 29.218969345092773,
+      "learning_rate": 3.846463022508039e-05,
+      "loss": 0.9945,
+      "step": 590
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 17.652963638305664,
+      "learning_rate": 3.826366559485531e-05,
+      "loss": 0.9308,
+      "step": 600
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.081989562033152e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null