Training in progress, step 2000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84f12537cd80e96d5b00db2adce34c918e49c02c0163196b020849bfc5dcea70
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:bea1b48825d279d5ca7532312e7e81957e535191d5f4e4e23c6756d53ffb5dc5
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bba8b74741b8f956ed154bc4ece6dfb19904a6a7c6b034624740300cde18a97
 size 2479955235

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc649b7fa91947a37cd4744fb1a38adf59d9a1c0676e9bc59a750dc67ad53fa6
 size 2479955235

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb4bb891f0ebc45b239e473bb43ab5a6e8916e99a94e990939ec84f3da08f81e
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:53febc76262518d0519b05d74ab6f65dd5851f3bbee84bc1c2b8f6935b1f50de
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34908dcef28f44e129bc7cf6f353b95daa97084843f3b737ac3e87c6a4beba8f
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:d68747f6eb2bb192bc48db140d8e66025b016a51ccd2dd4f8273e6973eed04b3
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.76,
   "eval_steps": 500,
-  "global_step": 1900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1338,6 +1338,76 @@
       "learning_rate": 1.2138263665594855e-05,
       "loss": 0.3713,
       "step": 1900
     }
   ],
   "logging_steps": 10,
@@ -1357,7 +1427,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.4245796106594304e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8,
   "eval_steps": 500,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.2138263665594855e-05,
       "loss": 0.3713,
       "step": 1900
+    },
+    {
+      "epoch": 0.764,
+      "grad_norm": 8.78702163696289,
+      "learning_rate": 1.1937299035369776e-05,
+      "loss": 0.3025,
+      "step": 1910
+    },
+    {
+      "epoch": 0.768,
+      "grad_norm": 2.6222591400146484,
+      "learning_rate": 1.1736334405144696e-05,
+      "loss": 0.2279,
+      "step": 1920
+    },
+    {
+      "epoch": 0.772,
+      "grad_norm": 8.457213401794434,
+      "learning_rate": 1.1535369774919615e-05,
+      "loss": 0.3841,
+      "step": 1930
+    },
+    {
+      "epoch": 0.776,
+      "grad_norm": 9.097604751586914,
+      "learning_rate": 1.1334405144694535e-05,
+      "loss": 0.3436,
+      "step": 1940
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 9.933280944824219,
+      "learning_rate": 1.1133440514469454e-05,
+      "loss": 0.4276,
+      "step": 1950
+    },
+    {
+      "epoch": 0.784,
+      "grad_norm": 9.58340072631836,
+      "learning_rate": 1.0932475884244374e-05,
+      "loss": 0.281,
+      "step": 1960
+    },
+    {
+      "epoch": 0.788,
+      "grad_norm": 13.846723556518555,
+      "learning_rate": 1.0731511254019293e-05,
+      "loss": 0.2836,
+      "step": 1970
+    },
+    {
+      "epoch": 0.792,
+      "grad_norm": 30.122060775756836,
+      "learning_rate": 1.0530546623794213e-05,
+      "loss": 0.3722,
+      "step": 1980
+    },
+    {
+      "epoch": 0.796,
+      "grad_norm": 8.666303634643555,
+      "learning_rate": 1.0329581993569132e-05,
+      "loss": 0.2778,
+      "step": 1990
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 7.968908786773682,
+      "learning_rate": 1.0128617363344052e-05,
+      "loss": 0.2778,
+      "step": 2000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.604261231669248e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null